From 84f295f9d7f5e1d294833e3b689d425af672388a Mon Sep 17 00:00:00 2001 From: Jonathan Sauder Date: Wed, 6 May 2026 14:39:43 -0400 Subject: [PATCH 1/8] Try Intrinsics Refinement from LoGer --- camera_profiles/guilhems_camera.json | 55 +++++++++++ deepreefmap/cli/main.py | 8 ++ deepreefmap/mapping/base.py | 9 ++ deepreefmap/mapping/loger_backend.py | 86 +++++++++++++++++ deepreefmap/pipeline/orchestrator.py | 93 +++++++++++++++++-- tests/test_cli_reconstruct_scsfmlearner.py | 27 ++++++ ...test_intrinsics_refinement_orchestrator.py | 62 +++++++++++++ tests/test_loger_adapter_contract.py | 38 ++++++++ 8 files changed, 372 insertions(+), 6 deletions(-) create mode 100644 camera_profiles/guilhems_camera.json create mode 100644 tests/test_intrinsics_refinement_orchestrator.py diff --git a/camera_profiles/guilhems_camera.json b/camera_profiles/guilhems_camera.json new file mode 100644 index 0000000..465469f --- /dev/null +++ b/camera_profiles/guilhems_camera.json @@ -0,0 +1,55 @@ +{ + "name": "guilhems_camera", + "source": "colmap_radial_v1", + "distorted": { + "model": "SIMPLE_RADIAL", + "params": { + "fx": 1307.4492693375018, + "fy": 1307.4492693375018, + "cx": 1920.0, + "cy": 1080.0, + "k1": -0.012404733627158448, + "k2": 0.0 + } + }, + "rectified_pinhole": { + "image_size": [ + 3840, + 2160 + ], + "K": [ + [ + 1296.8115234375, + 0.0, + 1919.5 + ], + [ + 0.0, + 1296.8115234375, + 1079.5 + ], + [ + 0.0, + 0.0, + 1.0 + ] + ] + }, + "diagnostics": { + "n_input_frames": 88, + "n_registered_images": 88, + "mean_reprojection_error_px": 0.9166741137226962, + "camera_model": "SIMPLE_RADIAL", + "rectification_mode": "undistort", + "source_video": "210903_MAGARSAM_ISLAND_Day_5_Dive_1_1.MP4", + "sampling_fps": 10, + "begin_s": 10.0, + "end_s": 17.0, + "valid_roi_xywh": [ + 0, + 0, + 3840, + 2159 + ] + } +} \ No newline at end of file diff --git a/deepreefmap/cli/main.py b/deepreefmap/cli/main.py index e8053d4..dc50cb5 100644 --- a/deepreefmap/cli/main.py +++ b/deepreefmap/cli/main.py @@ -73,6 +73,13 @@ def reconstruct( loger_model_path: Optional[Path] = typer.Option(None, help="LoGeR checkpoint path (defaults to vendored)."), loger_window_size: int = typer.Option(32, help="LoGeR window size."), loger_overlap_size: int = typer.Option(3, help="LoGeR overlap size."), + refine_intrinsics_from_mapper: bool = typer.Option( + False, + help=( + "Allow mapping backend to refine camera intrinsics and override camera profile K for " + "downstream 3D reconstruction." + ), + ), scsfmlearner_checkpoint_path: Optional[Path] = typer.Option( None, help="Optional SC-SfMLearner checkpoint path (.pt containing disp_state_dict and pose_state_dict). Defaults to EPFL-ECEO/deepreefmap-sfm-net/scsfmlearner.pt on Hugging Face Hub.", @@ -169,6 +176,7 @@ def reconstruct( processing_width=processing_width, processing_height=processing_height, skip_segmentation=skip_segmentation, + refine_intrinsics_from_mapper=refine_intrinsics_from_mapper, ) diff --git a/deepreefmap/mapping/base.py b/deepreefmap/mapping/base.py index 9605a17..aa7d2df 100644 --- a/deepreefmap/mapping/base.py +++ b/deepreefmap/mapping/base.py @@ -76,3 +76,12 @@ def process_sequence( scale_type=estimates[0].scale_type, gravity_vectors=None if gravity_vectors is None else gravity_vectors.astype(np.float32), ) + + def refine_intrinsics(self, mapping_result) -> np.ndarray | None: + """Optionally return refined 3x3 intrinsics for this sequence. + + Backends can override this hook when they can estimate intrinsics from + sequence outputs. Returning ``None`` keeps the caller-provided intrinsics. + """ + del mapping_result + return None diff --git a/deepreefmap/mapping/loger_backend.py b/deepreefmap/mapping/loger_backend.py index 1f6b0d5..70b5615 100644 --- a/deepreefmap/mapping/loger_backend.py +++ b/deepreefmap/mapping/loger_backend.py @@ -239,6 +239,24 @@ def process_sequence( except Exception as exc: raise RuntimeError("LoGeR sequence inference failed") from exc + def refine_intrinsics(self, mapping_result: MappingSequenceResult) -> np.ndarray | None: + local_points = mapping_result.local_points + if local_points is None or local_points.ndim != 4 or local_points.shape[0] == 0: + logger.info("LoGeR intrinsics refinement skipped: local_points unavailable.") + return None + try: + refined = _estimate_intrinsics_from_local_points( + local_points=local_points.astype(np.float32), + seed_intrinsics=mapping_result.intrinsics.astype(np.float32), + ) + except Exception as exc: + logger.warning("LoGeR intrinsics refinement failed: %s", exc) + return None + if refined is None: + logger.info("LoGeR intrinsics refinement produced no valid estimate; keeping camera profile K.") + return None + return refined.astype(np.float32) + def _reanchor_to_first_camera( poses: np.ndarray, @@ -315,3 +333,71 @@ def _tensor_to_numpy(value) -> np.ndarray | None: if hasattr(value, "detach"): value = value.squeeze(0).detach().cpu().float().numpy() return np.asarray(value) + + +def _estimate_intrinsics_from_local_points( + *, + local_points: np.ndarray, + seed_intrinsics: np.ndarray, +) -> np.ndarray | None: + """Estimate pinhole intrinsics from per-pixel local 3D points. + + LoGeR predicts local points as camera-frame XYZ. We estimate focal lengths + by robustly solving x/z=(u-cx)/fx and y/z=(v-cy)/fy over all frames. + Principal point is kept from the seed intrinsics. + """ + if local_points.shape[-1] != 3: + return None + n, h, w, _ = local_points.shape + if h <= 0 or w <= 0 or n <= 0: + return None + k = seed_intrinsics.astype(np.float32).copy() + cx = float(k[0, 2]) + cy = float(k[1, 2]) + + z = local_points[..., 2] + x = local_points[..., 0] + y = local_points[..., 1] + valid = np.isfinite(x) & np.isfinite(y) & np.isfinite(z) & (np.abs(z) > 1e-6) + if not np.any(valid): + return None + + rx = x / z + ry = y / z + u = np.broadcast_to(np.arange(w, dtype=np.float32)[None, None, :], (n, h, w)) + v = np.broadcast_to(np.arange(h, dtype=np.float32)[None, :, None], (n, h, w)) + + fx_samples = (u - cx) / np.where(np.abs(rx) > 1e-6, rx, np.nan) + fy_samples = (v - cy) / np.where(np.abs(ry) > 1e-6, ry, np.nan) + fx = _robust_positive_median(fx_samples[valid]) + fy = _robust_positive_median(fy_samples[valid]) + if fx is None and fy is None: + return None + if fx is None: + fx = fy + if fy is None: + fy = fx + assert fx is not None and fy is not None + + max_dim = float(max(h, w)) + fx = float(np.clip(fx, 0.1 * max_dim, 20.0 * max_dim)) + fy = float(np.clip(fy, 0.1 * max_dim, 20.0 * max_dim)) + k[0, 0] = fx + k[1, 1] = fy + k[0, 1] = 0.0 + k[1, 0] = 0.0 + k[2] = np.array([0.0, 0.0, 1.0], dtype=np.float32) + return k + + +def _robust_positive_median(values: np.ndarray) -> float | None: + if values.size == 0: + return None + finite = values[np.isfinite(values)] + if finite.size == 0: + return None + positive = np.abs(finite) + positive = positive[positive > 1e-6] + if positive.size == 0: + return None + return float(np.median(positive)) diff --git a/deepreefmap/pipeline/orchestrator.py b/deepreefmap/pipeline/orchestrator.py index 04324cd..9762b10 100644 --- a/deepreefmap/pipeline/orchestrator.py +++ b/deepreefmap/pipeline/orchestrator.py @@ -59,6 +59,7 @@ def run_reconstruction( processing_width: int | None = None, processing_height: int | None = None, skip_segmentation: bool = False, + refine_intrinsics_from_mapper: bool = False, ) -> None: logging.basicConfig( level=logging.INFO, @@ -230,10 +231,12 @@ def progress_cb(current: int, total: int | None, frame_idx: int, elapsed_s: floa logger.info("Prepared %d sampled frames in %.1fs", frame_count, time.monotonic() - t_start) + map_key_options = dict(mapping_options or {}) + map_key_options["refine_intrinsics_from_mapper"] = bool(refine_intrinsics_from_mapper) map_key = resume_mod.mapping_key( preprocess_key_str=prep_key, mapping_name=mapping_name, - mapping_options=mapping_options, + mapping_options=map_key_options, gravity_available=frame_batch.gravity_vectors is not None, ) map_sidecar = resume_mod.read_sidecar(output_dir, resume_mod.STAGE_MAPPING) @@ -263,6 +266,13 @@ def progress_cb(current: int, total: int | None, frame_idx: int, elapsed_s: floa frame_batch.images, gravity_vectors=frame_batch.gravity_vectors, ) + mapping_result = _maybe_refine_intrinsics( + mapping_name=mapping_name, + mapping=mapping, + mapping_result=mapping_result, + camera_profile_intrinsics=processing_intrinsics, + refine_intrinsics_from_mapper=refine_intrinsics_from_mapper, + ) if viewer is not None: viewer.update_progress("mapping", current=frame_count, total=frame_count, message="Mapping complete") viewer.set_stage("mapping", "completed", "3D mapping complete") @@ -279,6 +289,20 @@ def progress_cb(current: int, total: int | None, frame_idx: int, elapsed_s: floa scale_type=np.asarray(mapping_result.scale_type), ) resume_mod.write_sidecar(output_dir, resume_mod.STAGE_MAPPING, map_key) + elif refine_intrinsics_from_mapper: + logger.info( + "Intrinsics refinement flag is enabled with cached mapping output." + " camera_profile_K=%s effective_mapping_K=%s", + _format_matrix(processing_intrinsics), + _format_matrix(mapping_result.intrinsics), + ) + + mapping_result_for_cloud = mapping_result + if refine_intrinsics_from_mapper and mapping_name in {"loger", "loger_star"}: + logger.info( + "LoGeR intrinsics refinement mode: forcing depth+pose unprojection for cloud assembly." + ) + mapping_result_for_cloud = _mapping_without_world_points(mapping_result) if skip_segmentation: logger.info("Skip segmentation: building geometry-only point cloud...") @@ -287,7 +311,7 @@ def progress_cb(current: int, total: int | None, frame_idx: int, elapsed_s: floa active_stage = "outputs" geometry_xyz, geometry_rgb = _build_geometry_cloud( frame_batch=frame_batch, - mapping_result=mapping_result, + mapping_result=mapping_result_for_cloud, voxel_size=0.003, ) output_files = [ @@ -329,7 +353,7 @@ def progress_cb(current: int, total: int | None, frame_idx: int, elapsed_s: floa logger.info("Building filtered semantic reference cloud...") reference_cloud = build_semantic_reference_cloud( frame_batch, - mapping_result, + mapping_result_for_cloud, classes_config, PointFilterConfig( replacement_radius_factor=1.0 @@ -357,9 +381,9 @@ def progress_cb(current: int, total: int | None, frame_idx: int, elapsed_s: floa masks_for_depth = [_resize_mask(frame.keep_mask, depth_shape) for frame in frame_batch.frames] tsdf_xyz, tsdf_rgb = integrate_tsdf( rgb_for_depth, - [d for d in mapping_result.depth_maps], - [p for p in mapping_result.poses_w_c], - mapping_result.intrinsics, + [d for d in mapping_result_for_cloud.depth_maps], + [p for p in mapping_result_for_cloud.poses_w_c], + mapping_result_for_cloud.intrinsics, masks=masks_for_depth, ) semantic_tsdf = align_tsdf_to_reference(tsdf_xyz, tsdf_rgb, reference_cloud) @@ -538,6 +562,63 @@ def flush_pending() -> None: ) +def _maybe_refine_intrinsics( + *, + mapping_name: str, + mapping, + mapping_result: MappingSequenceResult, + camera_profile_intrinsics: np.ndarray, + refine_intrinsics_from_mapper: bool, +) -> MappingSequenceResult: + if not refine_intrinsics_from_mapper: + return mapping_result + refined_intrinsics = mapping.refine_intrinsics(mapping_result) + if refined_intrinsics is None: + logger.info( + "Intrinsics refinement requested but no refined intrinsics returned for backend '%s'. " + "camera_profile_K=%s effective_mapping_K=%s", + mapping_name, + _format_matrix(camera_profile_intrinsics), + _format_matrix(mapping_result.intrinsics), + ) + return mapping_result + logger.info( + "Intrinsics refinement applied for backend '%s'. camera_profile_K=%s refined_K=%s", + mapping_name, + _format_matrix(camera_profile_intrinsics), + _format_matrix(refined_intrinsics), + ) + return MappingSequenceResult( + frame_indices=mapping_result.frame_indices, + depth_maps=mapping_result.depth_maps, + poses_w_c=mapping_result.poses_w_c, + intrinsics=refined_intrinsics.astype(np.float32), + world_points=mapping_result.world_points, + local_points=mapping_result.local_points, + confidence=mapping_result.confidence, + scale_type=mapping_result.scale_type, + gravity_vectors=mapping_result.gravity_vectors, + ) + + +def _mapping_without_world_points(mapping_result: MappingSequenceResult) -> MappingSequenceResult: + return MappingSequenceResult( + frame_indices=mapping_result.frame_indices, + depth_maps=mapping_result.depth_maps, + poses_w_c=mapping_result.poses_w_c, + intrinsics=mapping_result.intrinsics, + world_points=None, + local_points=mapping_result.local_points, + confidence=mapping_result.confidence, + scale_type=mapping_result.scale_type, + gravity_vectors=mapping_result.gravity_vectors, + ) + + +def _format_matrix(matrix: np.ndarray) -> str: + return np.array2string(np.asarray(matrix), precision=4, suppress_small=False) + + def _build_geometry_cloud( frame_batch: FrameBatch, mapping_result: MappingSequenceResult, diff --git a/tests/test_cli_reconstruct_scsfmlearner.py b/tests/test_cli_reconstruct_scsfmlearner.py index e08b8dd..fb770a9 100644 --- a/tests/test_cli_reconstruct_scsfmlearner.py +++ b/tests/test_cli_reconstruct_scsfmlearner.py @@ -68,3 +68,30 @@ def _fake_run_reconstruction(**kwargs): assert isinstance(mapping_options, dict) assert mapping_options["target_width"] == 320 assert mapping_options["target_height"] == 192 + + +def test_reconstruct_threads_intrinsics_refinement_flag(tmp_path: Path): + profile_dir = tmp_path / "profiles" + profile_dir.mkdir() + (profile_dir / "reefcam.json").write_text("{}", encoding="utf-8") + + captured: dict[str, object] = {} + + def _fake_run_reconstruction(**kwargs): + captured.update(kwargs) + + with patch.object(cli_main, "CAMERA_PROFILE_DIR", profile_dir), patch.object( + intrinsics, "CAMERA_PROFILE_DIR", profile_dir + ), patch.object( + cli_main, + "run_reconstruction", + _fake_run_reconstruction, + ): + cli_main.reconstruct( + videos="clip.mp4", + camera_profile="reefcam", + mapping="loger", + refine_intrinsics_from_mapper=True, + ) + + assert captured["refine_intrinsics_from_mapper"] is True diff --git a/tests/test_intrinsics_refinement_orchestrator.py b/tests/test_intrinsics_refinement_orchestrator.py new file mode 100644 index 0000000..faf9d92 --- /dev/null +++ b/tests/test_intrinsics_refinement_orchestrator.py @@ -0,0 +1,62 @@ +import numpy as np + +from deepreefmap.pipeline.artifacts import MappingSequenceResult +from deepreefmap.pipeline.orchestrator import _mapping_without_world_points, _maybe_refine_intrinsics + + +class _RefiningMapper: + def __init__(self, refined: np.ndarray | None): + self._refined = refined + + def refine_intrinsics(self, mapping_result: MappingSequenceResult) -> np.ndarray | None: + return self._refined + + +def _mapping_result() -> MappingSequenceResult: + return MappingSequenceResult( + frame_indices=np.array([0], dtype=np.int32), + depth_maps=np.ones((1, 2, 2), dtype=np.float32), + poses_w_c=np.eye(4, dtype=np.float32)[None], + intrinsics=np.eye(3, dtype=np.float32), + world_points=np.ones((1, 2, 2, 3), dtype=np.float32), + ) + + +def test_maybe_refine_intrinsics_applies_mapper_override(): + mapping_result = _mapping_result() + refined = np.array([[400.0, 0.0, 10.0], [0.0, 420.0, 11.0], [0.0, 0.0, 1.0]], dtype=np.float32) + + out = _maybe_refine_intrinsics( + mapping_name="loger", + mapping=_RefiningMapper(refined), + mapping_result=mapping_result, + camera_profile_intrinsics=np.eye(3, dtype=np.float32), + refine_intrinsics_from_mapper=True, + ) + + assert out.intrinsics[0, 0] == 400.0 + assert out.intrinsics[1, 1] == 420.0 + + +def test_maybe_refine_intrinsics_keeps_original_when_mapper_returns_none(): + mapping_result = _mapping_result() + + out = _maybe_refine_intrinsics( + mapping_name="scsfmlearner", + mapping=_RefiningMapper(None), + mapping_result=mapping_result, + camera_profile_intrinsics=np.eye(3, dtype=np.float32), + refine_intrinsics_from_mapper=True, + ) + + assert np.allclose(out.intrinsics, mapping_result.intrinsics) + + +def test_mapping_without_world_points_forces_unprojection_path(): + mapping_result = _mapping_result() + + out = _mapping_without_world_points(mapping_result) + + assert out.world_points is None + assert np.allclose(out.depth_maps, mapping_result.depth_maps) + assert np.allclose(out.intrinsics, mapping_result.intrinsics) diff --git a/tests/test_loger_adapter_contract.py b/tests/test_loger_adapter_contract.py index 222095e..f702794 100644 --- a/tests/test_loger_adapter_contract.py +++ b/tests/test_loger_adapter_contract.py @@ -4,10 +4,12 @@ from deepreefmap.camera.intrinsics import scale_intrinsics from deepreefmap.mapping.loger_backend import ( LoGeRBackend, + _estimate_intrinsics_from_local_points, _assert_pose_convention, _nearest_multiple, _reanchor_to_first_camera, ) +from deepreefmap.pipeline.artifacts import MappingSequenceResult def test_loger_disables_per_frame_proxy_path(): @@ -100,3 +102,39 @@ def _se3(rotation: np.ndarray, translation: tuple[float, float, float]) -> np.nd matrix[:3, :3] = rotation matrix[:3, 3] = translation return matrix + + +def test_estimate_intrinsics_from_local_points_recovers_focal(): + h, w = 6, 8 + fx, fy = 120.0, 140.0 + cx, cy = 3.0, 2.0 + z = np.full((1, h, w, 1), 2.0, dtype=np.float32) + u = np.arange(w, dtype=np.float32)[None, None, :, None] + v = np.arange(h, dtype=np.float32)[None, :, None, None] + x = (u - cx) / fx * z + y = (v - cy) / fy * z + local_points = np.concatenate([x, y, z], axis=-1) + seed_k = np.array([[100.0, 0.0, cx], [0.0, 100.0, cy], [0.0, 0.0, 1.0]], dtype=np.float32) + + refined = _estimate_intrinsics_from_local_points(local_points=local_points, seed_intrinsics=seed_k) + + assert refined is not None + assert refined[0, 0] == pytest.approx(fx, rel=1e-3) + assert refined[1, 1] == pytest.approx(fy, rel=1e-3) + assert refined[0, 2] == pytest.approx(cx) + assert refined[1, 2] == pytest.approx(cy) + + +def test_loger_refine_intrinsics_returns_none_without_local_points(): + backend = LoGeRBackend.__new__(LoGeRBackend) + mapping_result = MappingSequenceResult( + frame_indices=np.array([0], dtype=np.int32), + depth_maps=np.ones((1, 2, 2), dtype=np.float32), + poses_w_c=np.eye(4, dtype=np.float32)[None], + intrinsics=np.eye(3, dtype=np.float32), + local_points=None, + ) + + refined = backend.refine_intrinsics(mapping_result) + + assert refined is None From 11b8a10a66d8810c8512de21408e8229973ef497 Mon Sep 17 00:00:00 2001 From: Jonathan Sauder Date: Wed, 6 May 2026 14:58:26 -0400 Subject: [PATCH 2/8] Fix intrinsics scaling --- deepreefmap/pipeline/orchestrator.py | 54 ++++++++++++++++++- ...test_intrinsics_refinement_orchestrator.py | 25 +++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/deepreefmap/pipeline/orchestrator.py b/deepreefmap/pipeline/orchestrator.py index 9762b10..12bdd72 100644 --- a/deepreefmap/pipeline/orchestrator.py +++ b/deepreefmap/pipeline/orchestrator.py @@ -271,6 +271,7 @@ def progress_cb(current: int, total: int | None, frame_idx: int, elapsed_s: floa mapping=mapping, mapping_result=mapping_result, camera_profile_intrinsics=processing_intrinsics, + processing_image_size=processing_image_size, refine_intrinsics_from_mapper=refine_intrinsics_from_mapper, ) if viewer is not None: @@ -568,8 +569,31 @@ def _maybe_refine_intrinsics( mapping, mapping_result: MappingSequenceResult, camera_profile_intrinsics: np.ndarray, + processing_image_size: tuple[int, int], refine_intrinsics_from_mapper: bool, ) -> MappingSequenceResult: + depth_h, depth_w = mapping_result.depth_maps[0].shape + camera_profile_intrinsics_depth = scale_intrinsics( + camera_profile_intrinsics, + processing_image_size, + (depth_w, depth_h), + ) + effective_intrinsics = _coerce_intrinsics_to_depth_scale( + intrinsics=mapping_result.intrinsics, + depth_size=(depth_w, depth_h), + processing_image_size=processing_image_size, + ) + mapping_result = MappingSequenceResult( + frame_indices=mapping_result.frame_indices, + depth_maps=mapping_result.depth_maps, + poses_w_c=mapping_result.poses_w_c, + intrinsics=effective_intrinsics, + world_points=mapping_result.world_points, + local_points=mapping_result.local_points, + confidence=mapping_result.confidence, + scale_type=mapping_result.scale_type, + gravity_vectors=mapping_result.gravity_vectors, + ) if not refine_intrinsics_from_mapper: return mapping_result refined_intrinsics = mapping.refine_intrinsics(mapping_result) @@ -578,14 +602,19 @@ def _maybe_refine_intrinsics( "Intrinsics refinement requested but no refined intrinsics returned for backend '%s'. " "camera_profile_K=%s effective_mapping_K=%s", mapping_name, - _format_matrix(camera_profile_intrinsics), + _format_matrix(camera_profile_intrinsics_depth), _format_matrix(mapping_result.intrinsics), ) return mapping_result + refined_intrinsics = _coerce_intrinsics_to_depth_scale( + intrinsics=refined_intrinsics.astype(np.float32), + depth_size=(depth_w, depth_h), + processing_image_size=processing_image_size, + ) logger.info( "Intrinsics refinement applied for backend '%s'. camera_profile_K=%s refined_K=%s", mapping_name, - _format_matrix(camera_profile_intrinsics), + _format_matrix(camera_profile_intrinsics_depth), _format_matrix(refined_intrinsics), ) return MappingSequenceResult( @@ -619,6 +648,27 @@ def _format_matrix(matrix: np.ndarray) -> str: return np.array2string(np.asarray(matrix), precision=4, suppress_small=False) +def _coerce_intrinsics_to_depth_scale( + *, + intrinsics: np.ndarray, + depth_size: tuple[int, int], + processing_image_size: tuple[int, int], +) -> np.ndarray: + """Ensure intrinsics are in the same pixel frame as depth maps. + + Backends are expected to return depth-scale intrinsics already. If principal + point is clearly out of depth bounds, treat it as processing-scale K and + rescale to depth size. + """ + depth_w, depth_h = depth_size + k = np.asarray(intrinsics, dtype=np.float32).copy() + cx = float(k[0, 2]) + cy = float(k[1, 2]) + if cx > depth_w * 1.25 or cy > depth_h * 1.25: + return scale_intrinsics(k, processing_image_size, (depth_w, depth_h)) + return k + + def _build_geometry_cloud( frame_batch: FrameBatch, mapping_result: MappingSequenceResult, diff --git a/tests/test_intrinsics_refinement_orchestrator.py b/tests/test_intrinsics_refinement_orchestrator.py index faf9d92..a5caa26 100644 --- a/tests/test_intrinsics_refinement_orchestrator.py +++ b/tests/test_intrinsics_refinement_orchestrator.py @@ -31,6 +31,7 @@ def test_maybe_refine_intrinsics_applies_mapper_override(): mapping=_RefiningMapper(refined), mapping_result=mapping_result, camera_profile_intrinsics=np.eye(3, dtype=np.float32), + processing_image_size=(2, 2), refine_intrinsics_from_mapper=True, ) @@ -46,6 +47,7 @@ def test_maybe_refine_intrinsics_keeps_original_when_mapper_returns_none(): mapping=_RefiningMapper(None), mapping_result=mapping_result, camera_profile_intrinsics=np.eye(3, dtype=np.float32), + processing_image_size=(2, 2), refine_intrinsics_from_mapper=True, ) @@ -60,3 +62,26 @@ def test_mapping_without_world_points_forces_unprojection_path(): assert out.world_points is None assert np.allclose(out.depth_maps, mapping_result.depth_maps) assert np.allclose(out.intrinsics, mapping_result.intrinsics) + + +def test_maybe_refine_intrinsics_rescales_processing_frame_intrinsics(): + mapping_result = _mapping_result() + mapping_result = MappingSequenceResult( + frame_indices=mapping_result.frame_indices, + depth_maps=np.ones((1, 10, 20), dtype=np.float32), + poses_w_c=mapping_result.poses_w_c, + intrinsics=np.array([[1000.0, 0.0, 688.0], [0.0, 1000.0, 384.0], [0.0, 0.0, 1.0]], dtype=np.float32), + world_points=mapping_result.world_points, + ) + + out = _maybe_refine_intrinsics( + mapping_name="loger", + mapping=_RefiningMapper(None), + mapping_result=mapping_result, + camera_profile_intrinsics=np.array([[1000.0, 0.0, 688.0], [0.0, 1000.0, 384.0], [0.0, 0.0, 1.0]], dtype=np.float32), + processing_image_size=(1376, 768), + refine_intrinsics_from_mapper=True, + ) + + assert out.intrinsics[0, 2] == np.float32(10.0) + assert out.intrinsics[1, 2] == np.float32(5.0) From 3a02c6e275f1055bac98a9700513f5a1750464d8 Mon Sep 17 00:00:00 2001 From: Jonathan Sauder Date: Wed, 6 May 2026 18:04:35 -0400 Subject: [PATCH 3/8] Update readme, update LoGeR resolution --- README.md | 43 ++++++++++++++++++++++++---- deepreefmap/cli/main.py | 2 +- deepreefmap/mapping/loger_backend.py | 2 +- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 9f18f29..09b6a03 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ -# DeepReefMap v2 +# DeepReefMap -DeepReefMap turns reef videos into 3D reconstructions and semantic maps (for example, coral classes overlaid on geometry). It is designed so you can swap segmentation and reconstruction backends while keeping the same command-line workflow. +[DeepReefMap](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.14307) turns reef videos into 3D reconstructions and semantic maps (for example, coral classes overlaid on geometry). It is designed so you can swap segmentation and reconstruction backends while keeping the same command-line workflow. + +Repository maintained by [Hugues Sibille](https://github.com/HuguesSib) (EPFL) and [Jonathan Sauder](https://josauder.github.io/) (MIT/EPFL). ## Quick overview @@ -45,15 +47,15 @@ Important performance note: ### SC-SfMLearner path (simplest) -Use `--mapping scsfmlearner`. By default, the checkpoint is downloaded from Hugging Face (`EPFL-ECEO/deepreefmap-sfm-net/scsfmlearner.pt`). You can also provide a local checkpoint path: +Use `--mapping scsfmlearner`. By default, the checkpoint is downloaded from Hugging Face (`EPFL-ECEO/deepreefmap-sfm-net/scsfmlearner.pt`). ```bash uv run deepreefmap reconstruct \ --videos GX010001.MP4 \ --mapping scsfmlearner \ - --scsfmlearner-checkpoint-path /path/to/scsfmlearner.pt \ --camera-profile gopro_hero_10 \ - --out out_local_ckpt + --tsdf \ + --out out_scsfm ``` ### LoGeR path (higher quality, more setup) @@ -76,6 +78,18 @@ curl -L -C - "https://huggingface.co/Junyi42/LoGeR/resolve/main/LoGeR_star/lates -o third_party/LoGeR/ckpts/LoGeR_star/latest.pt ``` +And then you can run: + +```bash +uv run deepreefmap reconstruct \ + --videos GX010001.MP4 \ + --mapping loger_star \ + --camera-profile gopro_hero_10 \ + --out out_loger \ +``` + + + ### DINOv3 segmentation models (access + authentication) The DINOv3-based segmentation models are higher quality than SegFormer models, but you need access/authentication on Hugging Face. @@ -229,8 +243,27 @@ Viewer highlights: - Toggle class visibility and switch color mode (RGB vs semantic colors). - Use `Accumulate` to overlay filtered points up to current timeline index. + +## Citation + +If you use this repository or build on it, please cite: + +```bibtex +@article{sauder2024scalable, + title={Scalable semantic 3D mapping of coral reefs with deep learning}, + author={Sauder, Jonathan and Banc-Prandi, Guilhem and Meibom, Anders and Tuia, Devis}, + journal={Methods in Ecology and Evolution}, + volume={15}, + number={5}, + pages={916--934}, + year={2024}, + publisher={Wiley Online Library} +} +``` + ## License DeepReefMap is licensed under the [Apache License 2.0](LICENSE). Vendored or optional third-party components (notably `third_party/LoGeR` and downloaded checkpoints) carry their own terms; see `THIRD_PARTY_NOTICES.md` before redistribution. + diff --git a/deepreefmap/cli/main.py b/deepreefmap/cli/main.py index dc50cb5..ab51d1f 100644 --- a/deepreefmap/cli/main.py +++ b/deepreefmap/cli/main.py @@ -74,7 +74,7 @@ def reconstruct( loger_window_size: int = typer.Option(32, help="LoGeR window size."), loger_overlap_size: int = typer.Option(3, help="LoGeR overlap size."), refine_intrinsics_from_mapper: bool = typer.Option( - False, + True, help=( "Allow mapping backend to refine camera intrinsics and override camera profile K for " "downstream 3D reconstruction." diff --git a/deepreefmap/mapping/loger_backend.py b/deepreefmap/mapping/loger_backend.py index 70b5615..a9c495e 100644 --- a/deepreefmap/mapping/loger_backend.py +++ b/deepreefmap/mapping/loger_backend.py @@ -49,7 +49,7 @@ def __init__( overlap_size: int = 3, model_path: str | None = None, config_path: str | None = None, - target_resolution: tuple[int, int] = (448, 252), + target_resolution: tuple[int, int] = (504, 280), se3: bool = False, sim3: bool = False, turn_off_ttt: bool = False, From 3be9e64095dcbece1dd29280c20437a6374e25f8 Mon Sep 17 00:00:00 2001 From: Jonathan Sauder Date: Wed, 6 May 2026 20:15:37 -0400 Subject: [PATCH 4/8] Add VIT-S --- deepreefmap/segmentation/registry.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deepreefmap/segmentation/registry.py b/deepreefmap/segmentation/registry.py index 3134603..ef7db61 100644 --- a/deepreefmap/segmentation/registry.py +++ b/deepreefmap/segmentation/registry.py @@ -21,6 +21,7 @@ def predict_batch(self, images_rgb: Sequence[np.ndarray]) -> list[SegmentationOu _MODELS: dict[str, tuple[int, int]] = { + "coralscapes-vit-s-dpt": (384, 688), "coralscapes-vit-l-dpt": (768, 1376), "coralscapes-vit-b-dpt": (768, 1376), "segformer-b2": (1024, 1024), @@ -39,6 +40,8 @@ def create_segmentation_model(name: str) -> SegmentationModel: return DinoV3DPTWrapper("EPFL-ECEO/coralscapes-vit-l-dpt", _MODELS[name]) if name == "coralscapes-vit-b-dpt": return DinoV3DPTWrapper("EPFL-ECEO/coralscapes-vit-b-dpt", _MODELS[name]) + if name == "coralscapes-vit-s-dpt": + return DinoV3DPTWrapper("EPFL-ECEO/coralscapes-vit-s-dpt", _MODELS[name]) return _DummySegmentation(name=name, resolution=_MODELS[name]) From 21d5e278ff0bf7a218ec61f221a25d54508aab30 Mon Sep 17 00:00:00 2001 From: Jonathan Sauder Date: Wed, 6 May 2026 21:14:31 -0400 Subject: [PATCH 5/8] Set refine intrisnics to false --- README.md | 5 +-- camera_profiles/gopro_hero_10.json | 54 --------------------------- camera_profiles/guilhems_camera.json | 55 ---------------------------- deepreefmap/cli/main.py | 14 +++++-- 4 files changed, 12 insertions(+), 116 deletions(-) delete mode 100644 camera_profiles/gopro_hero_10.json delete mode 100644 camera_profiles/guilhems_camera.json diff --git a/README.md b/README.md index 09b6a03..822d80b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # DeepReefMap -[DeepReefMap](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.14307) turns reef videos into 3D reconstructions and semantic maps (for example, coral classes overlaid on geometry). It is designed so you can swap segmentation and reconstruction backends while keeping the same command-line workflow. - +[DeepReefMap](https://besjournals.onlinelibrary.wiley.com/doi/full/10.1111/2041-210X.14307) is a software for rapid 3D semantic mapping of coral reefs from handheld cameras. Repository maintained by [Hugues Sibille](https://github.com/HuguesSib) (EPFL) and [Jonathan Sauder](https://josauder.github.io/) (MIT/EPFL). ## Quick overview @@ -113,7 +112,7 @@ uv run huggingface-cli login If your footage is from a GoPro Hero 10 in Linear mode with the GoPro casing setup used by this project, use the built-in profile: -- Camera profile: `gopro_hero_10` (file: `camera_profiles/gopro_hero_10.json`) +- Camera profile: `gopro_hero_10` (bundled JSON: `deepreefmap/resources/camera_profiles/gopro_hero_10.json`). You can also override or add profiles with `./camera_profiles/.json` in the current working directory. Example: diff --git a/camera_profiles/gopro_hero_10.json b/camera_profiles/gopro_hero_10.json deleted file mode 100644 index dc9ed2f..0000000 --- a/camera_profiles/gopro_hero_10.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "name": "gopro_hero_10", - "source": "colmap_radial_v1", - "distorted": { - "model": "RADIAL", - "params": { - "fx": 1243.6276334472113, - "fy": 1243.6276334472113, - "cx": 960.0, - "cy": 540.0, - "k1": 0.36223110184368823, - "k2": 0.2476961799393366 - } - }, - "rectified_pinhole": { - "image_size": [ - 1920, - 1080 - ], - "K": [ - [ - 1562.98876953125, - 0.0, - 959.5 - ], - [ - 0.0, - 1562.98876953125, - 539.5 - ], - [ - 0.0, - 0.0, - 1.0 - ] - ] - }, - "diagnostics": { - "n_input_frames": 100, - "n_registered_images": 100, - "mean_reprojection_error_px": 0.783395585447909, - "camera_model": "RADIAL", - "source_video": "redacted-example-source-video", - "sampling_fps": 10, - "begin_s": 12.0, - "end_s": null, - "valid_roi_xywh": [ - 0, - 0, - 1919, - 1079 - ] - } -} \ No newline at end of file diff --git a/camera_profiles/guilhems_camera.json b/camera_profiles/guilhems_camera.json deleted file mode 100644 index 465469f..0000000 --- a/camera_profiles/guilhems_camera.json +++ /dev/null @@ -1,55 +0,0 @@ -{ - "name": "guilhems_camera", - "source": "colmap_radial_v1", - "distorted": { - "model": "SIMPLE_RADIAL", - "params": { - "fx": 1307.4492693375018, - "fy": 1307.4492693375018, - "cx": 1920.0, - "cy": 1080.0, - "k1": -0.012404733627158448, - "k2": 0.0 - } - }, - "rectified_pinhole": { - "image_size": [ - 3840, - 2160 - ], - "K": [ - [ - 1296.8115234375, - 0.0, - 1919.5 - ], - [ - 0.0, - 1296.8115234375, - 1079.5 - ], - [ - 0.0, - 0.0, - 1.0 - ] - ] - }, - "diagnostics": { - "n_input_frames": 88, - "n_registered_images": 88, - "mean_reprojection_error_px": 0.9166741137226962, - "camera_model": "SIMPLE_RADIAL", - "rectification_mode": "undistort", - "source_video": "210903_MAGARSAM_ISLAND_Day_5_Dive_1_1.MP4", - "sampling_fps": 10, - "begin_s": 10.0, - "end_s": 17.0, - "valid_roi_xywh": [ - 0, - 0, - 3840, - 2159 - ] - } -} \ No newline at end of file diff --git a/deepreefmap/cli/main.py b/deepreefmap/cli/main.py index ab51d1f..8654d4e 100644 --- a/deepreefmap/cli/main.py +++ b/deepreefmap/cli/main.py @@ -48,7 +48,10 @@ def reconstruct( fps: int = typer.Option(10, help="Target processing framerate."), segmentation: str = typer.Option("coralscapes-vit-b-dpt", help="Segmentation model name."), mapping: str = typer.Option("scsfmlearner", help="3D mapping backend name."), - camera_profile: str = typer.Option(..., help="Camera profile name (in camera_profiles)."), + camera_profile: str = typer.Option( + ..., + help="Camera profile name: bundled under deepreefmap or `./camera_profiles/.json` in CWD.", + ), out: Path = typer.Option(Path("out"), help="Output directory."), begin: Optional[float] = typer.Option(None, help="Start timestamp in the concatenated stream (seconds)."), end: Optional[float] = typer.Option(None, help="End timestamp in the concatenated stream (seconds)."), @@ -74,7 +77,7 @@ def reconstruct( loger_window_size: int = typer.Option(32, help="LoGeR window size."), loger_overlap_size: int = typer.Option(3, help="LoGeR overlap size."), refine_intrinsics_from_mapper: bool = typer.Option( - True, + False, help=( "Allow mapping backend to refine camera intrinsics and override camera profile K for " "downstream 3D reconstruction." @@ -183,7 +186,7 @@ def reconstruct( @app.command("calibrate") def calibrate( video: Path = typer.Argument(..., exists=True), - name: str = typer.Option(..., help="Profile name for camera_profiles/.json"), + name: str = typer.Option(..., help="Profile name; writes `./camera_profiles/.json`."), n_frames: int = typer.Option(100), fps: int = typer.Option(10), begin: Optional[float] = typer.Option(None, help="Optional begin timestamp (seconds) for calibration window."), @@ -202,7 +205,10 @@ def calibrate( @app.command("verify-calibration") def verify_calibration( - name: str = typer.Argument(..., help="Camera profile name in camera_profiles."), + name: str = typer.Argument( + ..., + help="Camera profile name (bundled or `./camera_profiles/.json` in CWD).", + ), ) -> None: report = verify_camera_profile(name) typer.echo(json.dumps(report, indent=2)) From e6634bfc6f64edd94ed0d3e1dfa64b7343d42ce8 Mon Sep 17 00:00:00 2001 From: Hugues Sibille Date: Thu, 7 May 2026 11:30:37 +0200 Subject: [PATCH 6/8] fix: wrong number of frames sampled and clear cache if no hit --- deepreefmap/io/video.py | 52 +++++++++++++++++++++++++--- deepreefmap/pipeline/orchestrator.py | 28 ++++++++++----- deepreefmap/telemetry/gopro.py | 21 +++++------ tests/test_prepare_frames_batch.py | 13 +++++++ tests/test_video_frames.py | 47 +++++++++++++++++++++++++ 5 files changed, 138 insertions(+), 23 deletions(-) diff --git a/deepreefmap/io/video.py b/deepreefmap/io/video.py index c847ae2..43bbe70 100644 --- a/deepreefmap/io/video.py +++ b/deepreefmap/io/video.py @@ -21,23 +21,65 @@ def iter_video_frames( """ global_idx = 0 cumulative_time = 0.0 + next_sample_time = _first_sample_time(begin_s, target_fps) + interval_end = float("inf") if end_s is None else max(0.0, float(end_s)) for path in video_paths: meta = iio.immeta(path) src_fps = float(meta.get("fps", target_fps)) - stride = max(1, int(round(src_fps / max(1, target_fps)))) + src_fps = src_fps if src_fps > 0 else float(max(1, target_fps)) local_count = 0 for local_idx, frame in enumerate(iio.imiter(path)): local_count = local_idx + 1 - if local_idx % stride != 0: - continue + if next_sample_time >= interval_end: + break t = cumulative_time + local_idx / src_fps - if begin_s is not None and t < begin_s: + if t + 1e-9 < next_sample_time: continue - if end_s is not None and t >= end_s: + if t >= interval_end: break yield global_idx, frame global_idx += 1 + next_sample_time += 1.0 / max(1, target_fps) + while next_sample_time <= t + 1e-9: + next_sample_time += 1.0 / max(1, target_fps) cumulative_time += local_count / src_fps + if next_sample_time >= interval_end: + break + + +def selected_local_indices_for_clip( + *, + nframes: int, + src_fps: float, + target_fps: int, + cumulative_time: float, + next_sample_time: float, + end_s: float | None, +) -> tuple[list[int], float]: + """Return local frame indices selected by the timestamp sampler for one clip.""" + src_fps = src_fps if src_fps > 0 else float(max(1, target_fps)) + interval_end = float("inf") if end_s is None else max(0.0, float(end_s)) + selected: list[int] = [] + sample_time = next_sample_time + for local_idx in range(max(0, int(nframes))): + if sample_time >= interval_end: + break + t = cumulative_time + local_idx / src_fps + if t + 1e-9 < sample_time: + continue + if t >= interval_end: + break + selected.append(local_idx) + sample_time += 1.0 / max(1, target_fps) + while sample_time <= t + 1e-9: + sample_time += 1.0 / max(1, target_fps) + return selected, sample_time + + +def _first_sample_time(begin_s: float | None, target_fps: int) -> float: + start = 0.0 if begin_s is None else max(0.0, float(begin_s)) + fps = float(max(1, target_fps)) + return float(np.ceil(start * fps - 1e-9) / fps) diff --git a/deepreefmap/pipeline/orchestrator.py b/deepreefmap/pipeline/orchestrator.py index 12bdd72..f3213c3 100644 --- a/deepreefmap/pipeline/orchestrator.py +++ b/deepreefmap/pipeline/orchestrator.py @@ -2,6 +2,7 @@ import gc import logging +import shutil import time from pathlib import Path from typing import Callable @@ -15,7 +16,7 @@ from deepreefmap.camera.rectification import Rectifier from deepreefmap.config.classes import ClassConfig, DEFAULT_CLASSES_PATH, load_classes from deepreefmap.io.exports import save_geometry_cloud, save_ortho_grid, save_semantic_cloud -from deepreefmap.io.video import iter_video_frames +from deepreefmap.io.video import _first_sample_time, iter_video_frames, selected_local_indices_for_clip from deepreefmap.mapping.registry import create_mapping_backend from deepreefmap.pipeline import resume as resume_mod from deepreefmap.pipeline.artifacts import FrameBatch, MappingSequenceResult, PreparedFrame @@ -114,6 +115,7 @@ def run_reconstruction( # Preprocess invalidated → mapping cache also invalid (depends on prep key). resume_mod.clear_sidecar(output_dir, resume_mod.STAGE_PREPROCESS) resume_mod.clear_sidecar(output_dir, resume_mod.STAGE_MAPPING) + _clear_preprocess_artifacts(output_dir) if skip_segmentation: segmentation = None @@ -158,6 +160,7 @@ def progress_cb(current: int, total: int | None, frame_idx: int, elapsed_s: floa logger.warning("Resume: preprocess artifacts incomplete, recomputing.") resume_mod.clear_sidecar(output_dir, resume_mod.STAGE_PREPROCESS) resume_mod.clear_sidecar(output_dir, resume_mod.STAGE_MAPPING) + _clear_preprocess_artifacts(output_dir) prep_hit = False if not skip_segmentation: logger.info("Loading segmentation model '%s'", segmentation_name) @@ -668,6 +671,12 @@ def _coerce_intrinsics_to_depth_scale( return scale_intrinsics(k, processing_image_size, (depth_w, depth_h)) return k +def _clear_preprocess_artifacts(output_dir: Path) -> None: + for dirname in ("frames", "labels", "masks"): + path = output_dir / dirname + if path.exists(): + shutil.rmtree(path) + def _build_geometry_cloud( frame_batch: FrameBatch, @@ -750,12 +759,12 @@ def _estimate_selected_frame_count( interval_end = float("inf") if end_s is None else max(0.0, end_s) total = 0 cumulative_time = 0.0 + next_sample_time = _first_sample_time(begin_s, fps) for path in video_paths: meta = iio.immeta(path) src_fps = float(meta.get("fps", fps)) src_fps = src_fps if src_fps > 0 else float(max(1, fps)) - stride = max(1, int(round(src_fps / max(1, fps)))) nframes_raw = meta.get("nframes") nframes: int | None = None @@ -785,12 +794,15 @@ def _estimate_selected_frame_count( sel_start = max(interval_start, clip_start) sel_end = min(interval_end, clip_end) if sel_end > sel_start and nframes > 0: - local_start_idx = max(0, int(np.ceil((sel_start - clip_start) * src_fps))) - local_end_idx_exclusive = min(nframes, int(np.ceil((sel_end - clip_start) * src_fps))) - if local_end_idx_exclusive > local_start_idx: - first = ((local_start_idx + stride - 1) // stride) * stride - if first < local_end_idx_exclusive: - total += ((local_end_idx_exclusive - 1 - first) // stride) + 1 + selected, next_sample_time = selected_local_indices_for_clip( + nframes=nframes, + src_fps=src_fps, + target_fps=fps, + cumulative_time=cumulative_time, + next_sample_time=next_sample_time, + end_s=end_s, + ) + total += len(selected) cumulative_time = clip_end if interval_end <= cumulative_time: diff --git a/deepreefmap/telemetry/gopro.py b/deepreefmap/telemetry/gopro.py index e93c8e2..db3e0e8 100644 --- a/deepreefmap/telemetry/gopro.py +++ b/deepreefmap/telemetry/gopro.py @@ -6,6 +6,8 @@ import imageio.v3 as iio import numpy as np +from deepreefmap.io.video import _first_sample_time, selected_local_indices_for_clip + GravityStream = tuple[np.ndarray, np.ndarray] logger = logging.getLogger(__name__) @@ -33,8 +35,8 @@ def extract_gravity_vectors_for_video_selection( """Return one normalized gravity vector per frame selected by `iter_video_frames`.""" parts: list[np.ndarray] = [] cumulative_time = 0.0 - interval_start = 0.0 if begin_s is None else max(0.0, float(begin_s)) interval_end = float("inf") if end_s is None else max(0.0, float(end_s)) + next_sample_time = _first_sample_time(begin_s, target_fps) for path in video_paths: meta = iio.immeta(path) @@ -46,15 +48,14 @@ def extract_gravity_vectors_for_video_selection( return None clip_duration = nframes / src_fps - stride = max(1, int(round(src_fps / max(1, target_fps)))) - selected_local_indices = [] - for local_idx in range(0, nframes, stride): - t = cumulative_time + local_idx / src_fps - if t < interval_start: - continue - if t >= interval_end: - break - selected_local_indices.append(local_idx) + selected_local_indices, next_sample_time = selected_local_indices_for_clip( + nframes=nframes, + src_fps=src_fps, + target_fps=target_fps, + cumulative_time=cumulative_time, + next_sample_time=next_sample_time, + end_s=end_s, + ) if selected_local_indices: stream = _extract_gravity_stream(path) diff --git a/tests/test_prepare_frames_batch.py b/tests/test_prepare_frames_batch.py index e1b2642..1163a67 100644 --- a/tests/test_prepare_frames_batch.py +++ b/tests/test_prepare_frames_batch.py @@ -57,3 +57,16 @@ def test_prepare_frames_segments_rectified_frames_in_batches(tmp_path, monkeypat assert all(frame.image_path is not None and frame.image_path.exists() for frame in batch.frames) assert all(frame.labels_path is not None and frame.labels_path.exists() for frame in batch.frames) assert all(frame.mask_path is not None and frame.mask_path.exists() for frame in batch.frames) + + +def test_clear_preprocess_artifacts_removes_stale_frame_outputs(tmp_path) -> None: + for dirname in ("frames", "labels", "masks"): + artifact_dir = tmp_path / dirname + artifact_dir.mkdir() + (artifact_dir / "00000099.stale").write_text("old") + + orchestrator._clear_preprocess_artifacts(tmp_path) + + assert not (tmp_path / "frames").exists() + assert not (tmp_path / "labels").exists() + assert not (tmp_path / "masks").exists() diff --git a/tests/test_video_frames.py b/tests/test_video_frames.py index 4863fd0..89f0e21 100644 --- a/tests/test_video_frames.py +++ b/tests/test_video_frames.py @@ -3,6 +3,7 @@ import numpy as np from deepreefmap.io import video +from deepreefmap.pipeline import orchestrator def test_iter_video_frames_applies_concatenated_time_window(monkeypatch): @@ -18,3 +19,49 @@ def test_iter_video_frames_applies_concatenated_time_window(monkeypatch): assert [idx for idx, _ in result] == [0, 1] assert [int(frame[0, 0, 0]) for _, frame in result] == [2, 4] + + +def test_estimated_frame_count_matches_iterator_for_end_exclusive_window(monkeypatch): + path = Path("clip.mp4") + frames = [np.full((1, 1, 3), i % 256, dtype=np.uint8) for i in range(300)] + + monkeypatch.setattr(video.iio, "immeta", lambda p: {"fps": 30, "nframes": len(frames)}) + monkeypatch.setattr(video.iio, "imiter", lambda p: iter(frames)) + monkeypatch.setattr(orchestrator.iio, "immeta", lambda p: {"fps": 30, "nframes": len(frames)}) + + result = list(video.iter_video_frames([path], target_fps=10, begin_s=1.0, end_s=2.0)) + estimated = orchestrator._estimate_selected_frame_count([path], fps=10, begin_s=1.0, end_s=2.0) + + assert estimated == 10 + assert len(result) == estimated + assert [int(frame[0, 0, 0]) for _, frame in result] == list(range(30, 60, 3)) + + +def test_estimated_frame_count_matches_iterator_for_non_integer_source_fps(monkeypatch): + path = Path("clip.mp4") + frames = [np.full((1, 1, 3), i, dtype=np.uint8) for i in range(120)] + + monkeypatch.setattr(video.iio, "immeta", lambda p: {"fps": 29.97, "nframes": len(frames)}) + monkeypatch.setattr(video.iio, "imiter", lambda p: iter(frames)) + monkeypatch.setattr(orchestrator.iio, "immeta", lambda p: {"fps": 29.97, "nframes": len(frames)}) + + result = list(video.iter_video_frames([path], target_fps=10, begin_s=0.0, end_s=1.0)) + estimated = orchestrator._estimate_selected_frame_count([path], fps=10, begin_s=0.0, end_s=1.0) + + assert len(result) == estimated + assert len(result) == 10 + + +def test_timestamp_sampler_honors_target_fps_when_source_is_not_multiple(monkeypatch): + path = Path("clip.mp4") + frames = [np.full((1, 1, 3), i % 256, dtype=np.uint8) for i in range(750)] + + monkeypatch.setattr(video.iio, "immeta", lambda p: {"fps": 25, "nframes": len(frames)}) + monkeypatch.setattr(video.iio, "imiter", lambda p: iter(frames)) + monkeypatch.setattr(orchestrator.iio, "immeta", lambda p: {"fps": 25, "nframes": len(frames)}) + + result = list(video.iter_video_frames([path], target_fps=10, begin_s=10.0, end_s=20.0)) + estimated = orchestrator._estimate_selected_frame_count([path], fps=10, begin_s=10.0, end_s=20.0) + + assert estimated == 100 + assert len(result) == estimated From 1ee7bffe9d75db5b227750d43210ab65e1e593ff Mon Sep 17 00:00:00 2001 From: Hugues Sibille Date: Thu, 7 May 2026 23:37:09 +0200 Subject: [PATCH 7/8] Apply viser transect crop to render-video Render-video now crops the ortho panel using the same transect-line geometry as the viser app. Crop parameters can be passed via --transect-length-m / --crop-width-m, or read from a `transect_crop` block in run_manifest.json that the viser app writes when "Save current ortho + cover" is clicked. Also tightens the ortho panel to the bbox of mapped pixels and always stacks the RGB ortho above the class-colored ortho (no more side-by-side flip), letterboxes into the cell so the rendered ortho doesn't get squished, and adds tqdm + logger output so the terminal shows progress. --- deepreefmap/cli/main.py | 22 +++- deepreefmap/postproc/reports.py | 142 +++++++++++++++++++++++-- deepreefmap/visualization/viser_app.py | 23 ++++ 3 files changed, 175 insertions(+), 12 deletions(-) diff --git a/deepreefmap/cli/main.py b/deepreefmap/cli/main.py index 8654d4e..00d707a 100644 --- a/deepreefmap/cli/main.py +++ b/deepreefmap/cli/main.py @@ -1,6 +1,7 @@ from pathlib import Path from typing import Optional import json +import logging import typer @@ -217,8 +218,27 @@ def verify_calibration( @app.command("render-video") def render_video( run_dir: Path = typer.Option(..., exists=True, help="Run output directory from reconstruct."), + transect_length_m: Optional[float] = typer.Option( + None, + "--transect-length-m", + help="Transect length in meters; enables ortho crop (matches viser). Falls back to manifest.", + ), + crop_width_m: Optional[float] = typer.Option( + None, + "--crop-width-m", + help="Crop width in meters around the transect line. Falls back to manifest.", + ), ) -> None: - render_offline_video_placeholder(run_dir) + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s: %(message)s", + datefmt="%H:%M:%S", + ) + render_offline_video_placeholder( + run_dir, + transect_length_m=transect_length_m, + crop_width_m=crop_width_m, + ) typer.echo(f"Offline render completed in {run_dir}") diff --git a/deepreefmap/postproc/reports.py b/deepreefmap/postproc/reports.py index 18ec3f1..e8e578a 100644 --- a/deepreefmap/postproc/reports.py +++ b/deepreefmap/postproc/reports.py @@ -1,10 +1,18 @@ from pathlib import Path import json +import logging import cv2 import numpy as np +from tqdm.auto import tqdm from deepreefmap.config.classes import load_classes +from deepreefmap.pointcloud.transect_crop import ( + build_transect_crop_geometry, + build_transect_crop_selection, +) + +logger = logging.getLogger(__name__) def save_cover_report(path: Path, cover: dict[str, object]) -> None: @@ -15,7 +23,11 @@ def save_run_manifest(path: Path, manifest: dict[str, object]) -> None: path.write_text(json.dumps(manifest, indent=2)) -def render_offline_video_placeholder(run_dir: Path) -> None: +def render_offline_video_placeholder( + run_dir: Path, + transect_length_m: float | None = None, + crop_width_m: float | None = None, +) -> None: """Render a DRM-style 4-panel QC video from manifest artifacts. Layout per frame (matching the original mee-deepreefmap render): @@ -60,6 +72,24 @@ def render_offline_video_placeholder(run_dir: Path) -> None: ortho = _load_ortho(run_dir / "ortho.npz", class_colors) + crop_block = manifest.get("transect_crop") if isinstance(manifest.get("transect_crop"), dict) else None + if transect_length_m is None and crop_block and crop_block.get("enabled"): + transect_length_m = float(crop_block.get("transect_length_m")) + if crop_width_m is None and crop_block and crop_block.get("enabled"): + crop_width_m = float(crop_block.get("crop_width_m")) + if ortho is not None and transect_length_m is not None and crop_width_m is not None: + ortho = _apply_transect_crop_to_ortho( + ortho, + transect_label=classes_config.single_id_for_role("transect_line"), + transect_tools_label=classes_config.single_id_for_role("transect_tools"), + transect_length_m=transect_length_m, + crop_width_m=crop_width_m, + ) + if ortho is not None: + # Always tighten to the bbox of mapped pixels so the ortho panel does + # not include vast empty borders when no explicit transect crop was set. + ortho = _tighten_ortho_to_valid_bbox(ortho) + out_path = run_dir / "videos" / "qc_render.mp4" out_path.parent.mkdir(parents=True, exist_ok=True) first = cv2.imread(str(frame_paths[0])) @@ -69,7 +99,11 @@ def render_offline_video_placeholder(run_dir: Path) -> None: writer = cv2.VideoWriter(str(out_path), cv2.VideoWriter_fourcc(*"mp4v"), 10, (w * 2, h * 2)) legend_cache: dict[tuple[int, ...], np.ndarray] = {} - for idx, frame_path in enumerate(frame_paths[: len(depths)]): + n_frames = len(frame_paths[: len(depths)]) + logger.info("Rendering QC video → %s (%d frames)", out_path, n_frames) + iterable = list(enumerate(frame_paths[: len(depths)])) + progress = tqdm(iterable, desc="render-video", unit="frame", total=n_frames) + for idx, frame_path in progress: bgr = cv2.imread(str(frame_path)) if bgr is None: continue @@ -100,7 +134,9 @@ def render_offline_video_placeholder(run_dir: Path) -> None: top = np.concatenate([bgr, seg_panel], axis=1) bottom = np.concatenate([depth_panel, ortho_panel], axis=1) writer.write(np.concatenate([top, bottom], axis=0)) + progress.close() writer.release() + logger.info("Render-video complete: %s", out_path) def _colorize_depth(depth: np.ndarray, size_wh: tuple[int, int]) -> np.ndarray: @@ -149,6 +185,72 @@ def _load_ortho(path: Path, class_colors: dict[int, tuple[int, int, int]]) -> di return {"rgb": rgb, "class_rgb": class_rgb, "labels": labels, "frame_index": frame_index} +def _tighten_ortho_to_valid_bbox(ortho: dict) -> dict: + """Crop the ortho dict to the bbox of pixels that received any frame data. + + Pixels with `frame_index < 0` are treated as empty. When all pixels are + valid, the ortho is returned unchanged. This avoids the ortho panel + rendering a mostly-empty rectangle (which then has to be stretched to fit + the panel cell, squishing the actual content). + """ + fi = ortho.get("frame_index") + if fi is None or fi.size == 0: + return ortho + valid = fi >= 0 + if not np.any(valid) or np.all(valid): + return ortho + rows = np.any(valid, axis=1) + cols = np.any(valid, axis=0) + y0 = int(np.argmax(rows)) + y1 = int(rows.size - np.argmax(rows[::-1])) + x0 = int(np.argmax(cols)) + x1 = int(cols.size - np.argmax(cols[::-1])) + if y1 <= y0 or x1 <= x0: + return ortho + return { + "rgb": ortho["rgb"][y0:y1, x0:x1], + "class_rgb": ortho["class_rgb"][y0:y1, x0:x1], + "labels": ortho["labels"][y0:y1, x0:x1], + "frame_index": ortho["frame_index"][y0:y1, x0:x1], + } + + +def _apply_transect_crop_to_ortho( + ortho: dict, + transect_label: int | None, + transect_tools_label: int | None, + transect_length_m: float, + crop_width_m: float, +) -> dict: + geometry = build_transect_crop_geometry( + labels=ortho["labels"], + transect_label=transect_label, + transect_tools_label=transect_tools_label, + ) + selection = build_transect_crop_selection( + geometry=geometry, + transect_length_m=transect_length_m, + crop_width_m=crop_width_m, + ) + if selection is None: + return ortho + y0, y1, x0, x1 = selection.y0, selection.y1, selection.x0, selection.x1 + mask = selection.mask + + def _slice(arr: np.ndarray, fill: int) -> np.ndarray: + sub = arr[y0:y1, x0:x1] + if sub.ndim == 3: + return np.where(mask[..., None], sub, fill).astype(sub.dtype) + return np.where(mask, sub, fill).astype(sub.dtype) + + return { + "rgb": _slice(ortho["rgb"], 0), + "class_rgb": _slice(ortho["class_rgb"], 0), + "labels": _slice(ortho["labels"], 0), + "frame_index": _slice(ortho["frame_index"], -1), + } + + def _ortho_panel( ortho: dict | None, class_colors: dict[int, tuple[int, int, int]], @@ -167,16 +269,12 @@ def _ortho_panel( rgb_cum = (ortho["rgb"] * mask3).astype(np.uint8) class_cum = (ortho["class_rgb"] * mask3).astype(np.uint8) - h_o, w_o = rgb_cum.shape[:2] - if h_o < w_o: - stacked = np.concatenate([rgb_cum, class_cum], axis=0) - else: - stacked = np.concatenate([rgb_cum, class_cum], axis=1) + stacked = np.concatenate([rgb_cum, class_cum], axis=0) stacked_bgr = cv2.cvtColor(stacked, cv2.COLOR_RGB2BGR) present = _present_class_ids(ortho.get("labels"), valid, class_colors) if not present: - return cv2.resize(stacked_bgr, (target_w, target_h), interpolation=cv2.INTER_AREA) + return _letterbox_into(stacked_bgr, target_w, target_h) legend_bgr = legend_cache.get(present) if legend_bgr is None: @@ -190,9 +288,31 @@ def _ortho_panel( legend_cap = max(80, target_w // 3) legend_w = max(1, min(legend_bgr.shape[1], legend_cap, target_w - 1)) legend_resized = cv2.resize(legend_bgr, (legend_w, target_h), interpolation=cv2.INTER_AREA) - stacked_w = target_w - legend_w - stacked_resized = cv2.resize(stacked_bgr, (stacked_w, target_h), interpolation=cv2.INTER_AREA) - return np.concatenate([stacked_resized, legend_resized], axis=1) + stacked_cell_w = target_w - legend_w + stacked_letterboxed = _letterbox_into(stacked_bgr, stacked_cell_w, target_h) + return np.concatenate([stacked_letterboxed, legend_resized], axis=1) + + +def _letterbox_into(image: np.ndarray, cell_w: int, cell_h: int) -> np.ndarray: + """Resize `image` into a `cell_w x cell_h` BGR canvas preserving aspect. + + Uses INTER_AREA on shrink and INTER_LINEAR on enlarge; pads with black. + """ + cell_w = max(1, int(cell_w)) + cell_h = max(1, int(cell_h)) + h, w = image.shape[:2] + if h <= 0 or w <= 0: + return np.zeros((cell_h, cell_w, 3), dtype=np.uint8) + scale = min(cell_w / w, cell_h / h) + new_w = max(1, int(round(w * scale))) + new_h = max(1, int(round(h * scale))) + interp = cv2.INTER_AREA if scale < 1.0 else cv2.INTER_LINEAR + resized = cv2.resize(image, (new_w, new_h), interpolation=interp) + canvas = np.zeros((cell_h, cell_w, 3), dtype=np.uint8) + x0 = (cell_w - new_w) // 2 + y0 = (cell_h - new_h) // 2 + canvas[y0 : y0 + new_h, x0 : x0 + new_w] = resized + return canvas def _present_class_ids( diff --git a/deepreefmap/visualization/viser_app.py b/deepreefmap/visualization/viser_app.py index c2eb47c..4ae3da4 100644 --- a/deepreefmap/visualization/viser_app.py +++ b/deepreefmap/visualization/viser_app.py @@ -715,6 +715,7 @@ def _save_current_ortho_outputs(self) -> None: cv2.imwrite(str(output_dir / "ortho.png"), cv2.cvtColor(grid.rgb, cv2.COLOR_RGB2BGR)) save_ortho_grid(output_dir / "ortho.npz", grid) save_cover_report(output_dir / "benthic_cover.json", self._current_ortho_outputs.cover) + self._persist_crop_to_manifest(output_dir, self._active_crop_params) self._set_markdown_content( self._crop_summary_markdown_handle, self._cover_summary_markdown( @@ -725,6 +726,28 @@ def _save_current_ortho_outputs(self) -> None: + f"\n- Saved: `{output_dir}`", ) + @staticmethod + def _persist_crop_to_manifest(output_dir: Path, crop: TransectCropParams | None) -> None: + import json + + manifest_path = output_dir / "run_manifest.json" + if not manifest_path.exists(): + return + try: + manifest = json.loads(manifest_path.read_text()) + except (OSError, ValueError): + return + if crop is None: + manifest["transect_crop"] = {"enabled": False} + else: + manifest["transect_crop"] = { + "enabled": True, + "transect_length_m": float(crop.transect_length_m), + "crop_width_m": float(crop.crop_width_m), + } + with suppress(OSError): + manifest_path.write_text(json.dumps(manifest, indent=2)) + def _current_crop_params(self) -> TransectCropParams | None: if not bool(self._crop_enabled_toggle is not None and self._crop_enabled_toggle.value): return None From 7e104d8e07567d2d918fe481d22db7524401bc00 Mon Sep 17 00:00:00 2001 From: Hugues Sibille Date: Thu, 7 May 2026 23:59:47 +0200 Subject: [PATCH 8/8] Guard _active_crop_params for tests bypassing __init__ --- deepreefmap/visualization/viser_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepreefmap/visualization/viser_app.py b/deepreefmap/visualization/viser_app.py index 4ae3da4..b3c3ee0 100644 --- a/deepreefmap/visualization/viser_app.py +++ b/deepreefmap/visualization/viser_app.py @@ -715,7 +715,7 @@ def _save_current_ortho_outputs(self) -> None: cv2.imwrite(str(output_dir / "ortho.png"), cv2.cvtColor(grid.rgb, cv2.COLOR_RGB2BGR)) save_ortho_grid(output_dir / "ortho.npz", grid) save_cover_report(output_dir / "benthic_cover.json", self._current_ortho_outputs.cover) - self._persist_crop_to_manifest(output_dir, self._active_crop_params) + self._persist_crop_to_manifest(output_dir, getattr(self, "_active_crop_params", None)) self._set_markdown_content( self._crop_summary_markdown_handle, self._cover_summary_markdown(