From 7ffe106bb10e8fd0f05917e1f7f71ada1a3b551e Mon Sep 17 00:00:00 2001
From: alexlin2 <alex.lin416@outlook.com>
Date: Wed, 21 Jan 2026 18:34:23 -0500
Subject: [PATCH 1/6] initial working detection node refactor

---
 README.md                                     |   3 +
 detect_anything/CMakeLists.txt                |  47 ++++
 detect_anything/README.md                     |  28 ++
 detect_anything/config/objects.yaml           |  46 ++++
 detect_anything/detect_anything/__init__.py   |   1 +
 detect_anything/detect_anything/detection.py  | 256 ++++++++++++++++++
 .../detect_anything/detection_node.py         | 169 ++++++++++++
 detect_anything/detect_anything/yoloe.py      | 139 ++++++++++
 .../env-hooks/venv_pythonpath.sh.in           |   1 +
 detect_anything/msg/DetectionResult.msg       |  10 +
 detect_anything/package.xml                   |  26 ++
 detect_anything/resource/detect_anything      |   1 +
 detect_anything/scripts/detection_node        |   5 +
 requirements.txt                              |   7 +-
 semantic_mapping/package.xml                  |  27 ++
 semantic_mapping/resource/semantic_mapping    |   1 +
 semantic_mapping/semantic_mapping/__init__.py |   1 +
 semantic_mapping/setup.cfg                    |   4 +
 semantic_mapping/setup.py                     |  27 ++
 vector_perception_utils/setup.cfg             |   5 -
 20 files changed, 796 insertions(+), 8 deletions(-)
 create mode 100644 detect_anything/CMakeLists.txt
 create mode 100644 detect_anything/README.md
 create mode 100644 detect_anything/config/objects.yaml
 create mode 100644 detect_anything/detect_anything/__init__.py
 create mode 100644 detect_anything/detect_anything/detection.py
 create mode 100644 detect_anything/detect_anything/detection_node.py
 create mode 100644 detect_anything/detect_anything/yoloe.py
 create mode 100644 detect_anything/env-hooks/venv_pythonpath.sh.in
 create mode 100644 detect_anything/msg/DetectionResult.msg
 create mode 100644 detect_anything/package.xml
 create mode 100644 detect_anything/resource/detect_anything
 create mode 100755 detect_anything/scripts/detection_node
 create mode 100644 semantic_mapping/package.xml
 create mode 100644 semantic_mapping/resource/semantic_mapping
 create mode 100644 semantic_mapping/semantic_mapping/__init__.py
 create mode 100644 semantic_mapping/setup.cfg
 create mode 100644 semantic_mapping/setup.py
 delete mode 100644 vector_perception_utils/setup.cfg

diff --git a/README.md b/README.md
index 49392b8..21352a8 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@ ROS2 perception stack for generalist robotics.
 ## Packages
 
 - **track_anything** - EdgeTAM tracking + 3D segmentation with RGBD
+- **detect_anything** - YOLO-E detection node and utilities
+- **semantic_mapping** - placeholder for future mapping components (currently empty)
 - **vector_perception_utils** - Image and point cloud utilities
 
 ## Installation
@@ -21,6 +23,7 @@ pip install -r requirements.txt
 # Build
 source /opt/ros/jazzy/setup.bash
 colcon build
+```
 
 
 
diff --git a/detect_anything/CMakeLists.txt b/detect_anything/CMakeLists.txt
new file mode 100644
index 0000000..c8e9188
--- /dev/null
+++ b/detect_anything/CMakeLists.txt
@@ -0,0 +1,47 @@
+cmake_minimum_required(VERSION 3.10)
+project(detect_anything)
+
+find_package(ament_cmake REQUIRED)
+find_package(rclpy REQUIRED)
+find_package(sensor_msgs REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(vision_msgs REQUIRED)
+find_package(cv_bridge REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(Python3 REQUIRED COMPONENTS Interpreter)
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+  "msg/DetectionResult.msg"
+  DEPENDENCIES std_msgs sensor_msgs
+)
+
+set(PYTHON_INSTALL_DIR "lib/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/site-packages")
+
+install(
+  DIRECTORY detect_anything
+  DESTINATION ${PYTHON_INSTALL_DIR}
+)
+
+install(
+  PROGRAMS
+    scripts/detection_node
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+install(
+  FILES resource/detect_anything
+  DESTINATION share/${PROJECT_NAME}/resource
+)
+
+install(
+  DIRECTORY config
+  DESTINATION share/${PROJECT_NAME}
+)
+
+set(ENV_HOOK "${CMAKE_CURRENT_SOURCE_DIR}/env-hooks/venv_pythonpath.sh.in")
+set(ENV_HOOK_OUT "${CMAKE_CURRENT_BINARY_DIR}/ament_cmake_environment_hooks/venv_pythonpath.sh")
+configure_file(${ENV_HOOK} ${ENV_HOOK_OUT} @ONLY)
+ament_environment_hooks(${ENV_HOOK_OUT})
+
+ament_export_dependencies(rosidl_default_runtime)
+ament_package()
diff --git a/detect_anything/README.md b/detect_anything/README.md
new file mode 100644
index 0000000..1e94a06
--- /dev/null
+++ b/detect_anything/README.md
@@ -0,0 +1,28 @@
+# detect_anything
+
+YOLO-E detection node that publishes `DetectionResult` with cropped masks and an annotated overlay topic.
+
+## What’s inside
+- `detect_anything/detection.py`: detection results container and Ultralytics parsing helpers.
+- `detect_anything/yoloe.py`: YOLO-E wrapper with prompt support and basic filtering.
+- `detect_anything/detection_node.py`: ROS2 node wiring the detector to `DetectionResult`.
+- `msg/DetectionResult.msg`: compressed image + cropped mask array.
+
+## Quick start
+```bash
+source ~/vector_venv/bin/activate
+source /opt/ros/jazzy/setup.bash
+colcon build --packages-select detect_anything
+source install/setup.bash
+
+ros2 run detect_anything detection_node \
+  --ros-args -p model_path:=/path/to/yoloe/models \
+             -p model_name:=yoloe-11s-seg-pf.pt \
+             -p conf:=0.6 \
+             -p max_area_ratio:=0.3 \
+             -p image_topic:=/camera/image
+```
+
+Topics:
+- Publishes `/detection_result` (`detect_anything/DetectionResult`) and `/annotated_image_detection` (`sensor_msgs/Image`).
+- Subscribes to `/camera/image` (or `/camera/image/compressed` if `use_compressed:=true`).
diff --git a/detect_anything/config/objects.yaml b/detect_anything/config/objects.yaml
new file mode 100644
index 0000000..371f186
--- /dev/null
+++ b/detect_anything/config/objects.yaml
@@ -0,0 +1,46 @@
+# Simple list of object names for prompting YOLO-E
+- chair
+- desk
+- tv_monitor
+- sofa
+- unknown
+- printer
+- coffee machine
+- refrigerator
+- trash can
+- shoe
+- sink
+- table
+- oven
+- bed
+- painting
+- bulletin board
+- plant
+- vase
+- cabinet
+- shelf
+- book
+- cup
+- sculpture
+- keyboard
+- mouse
+- clock
+- phone
+- toilet
+- bathtub
+- microwave oven
+- pan
+- suitcase
+- light
+- curtain
+- whiteboard
+- shower knob
+- bottle
+- water dispenser
+- vending machine
+- laptop
+- bag
+- locker
+- picture
+- cardboard
+- extinguisher
diff --git a/detect_anything/detect_anything/__init__.py b/detect_anything/detect_anything/__init__.py
new file mode 100644
index 0000000..9c61aab
--- /dev/null
+++ b/detect_anything/detect_anything/__init__.py
@@ -0,0 +1 @@
+"""detect_anything package."""
diff --git a/detect_anything/detect_anything/detection.py b/detect_anything/detect_anything/detection.py
new file mode 100644
index 0000000..1d64d76
--- /dev/null
+++ b/detect_anything/detect_anything/detection.py
@@ -0,0 +1,256 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import List, Optional, Sequence
+
+from cv_bridge import CvBridge
+import cv2
+import numpy as np
+
+from detect_anything.msg import DetectionResult
+
+_BRIDGE = CvBridge()
+
+
+@dataclass
+class SingleDetectionResult:
+    """Simple container for a YOLO/Ultralytics detection with a segmentation mask."""
+
+    bbox: tuple[float, float, float, float]
+    track_id: int
+    class_id: int
+    label: str
+    confidence: float
+    mask: np.ndarray
+
+    def area(self) -> float:
+        x1, y1, x2, y2 = self.bbox
+        return max(0.0, (x2 - x1)) * max(0.0, (y2 - y1))
+
+
+@dataclass
+class DetectionResults:
+    """Container for detections plus the source image."""
+
+    image: np.ndarray
+    detections: List[SingleDetectionResult]
+
+    def overlay(self, alpha: float = 0.4, thickness: int = 2) -> np.ndarray:
+        """Draw bboxes and masks on the image."""
+        output = self.image.copy()
+        for det in self.detections:
+            rng = np.random.default_rng(det.track_id)
+            color = tuple(int(c) for c in rng.integers(50, 255, size=3))
+
+            if det.mask.size:
+                mask_bool = det.mask.astype(bool)
+                if mask_bool.shape[:2] == output.shape[:2]:
+                    colored_mask = np.zeros_like(output)
+                    colored_mask[mask_bool] = color
+                    output[mask_bool] = cv2.addWeighted(
+                        output[mask_bool], 1 - alpha, colored_mask[mask_bool], alpha, 0
+                    )
+                    contours, _ = cv2.findContours(det.mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                    cv2.drawContours(output, contours, -1, color, thickness)
+
+            x1, y1, x2, y2 = map(int, det.bbox)
+            cv2.rectangle(output, (x1, y1), (x2, y2), color, thickness)
+
+            label_text = f"{det.label} {det.confidence:.2f}"
+            label_text = f"{label_text} | id {det.track_id}"
+
+            (text_w, text_h), baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            cv2.rectangle(
+                output,
+                (x1, max(0, y1 - text_h - baseline - 2)),
+                (x1 + text_w, max(0, y1)),
+                color,
+                -1,
+            )
+            cv2.putText(
+                output,
+                label_text,
+                (x1, max(0, y1 - baseline - 2)),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.5,
+                (255, 255, 255),
+                1,
+                cv2.LINE_AA,
+            )
+        return output
+
+    def to_msg(self, header) -> DetectionResult:
+        """
+        Convert detections into a DetectionResult ROS message.
+        """
+        msg = DetectionResult()
+        msg.header = header
+        msg.image = _BRIDGE.cv2_to_compressed_imgmsg(self.image, dst_format="jpeg")
+        msg.image.header = header
+
+        for det in self.detections:
+            msg.track_id.append(int(det.track_id))
+            msg.x1.append(float(det.bbox[0]))
+            msg.y1.append(float(det.bbox[1]))
+            msg.x2.append(float(det.bbox[2]))
+            msg.y2.append(float(det.bbox[3]))
+            msg.label.append(str(det.label))
+            msg.confidence.append(float(det.confidence))
+
+            mask = det.mask
+            # Crop mask to bbox and align origin to bbox top-left for transport.
+            x1, y1, x2, y2 = [int(v) for v in det.bbox]
+            h, w = self.image.shape[:2]
+            x1 = max(0, min(x1, w))
+            x2 = max(0, min(x2, w))
+            y1 = max(0, min(y1, h))
+            y2 = max(0, min(y2, h))
+            crop_h = max(1, y2 - y1)
+            crop_w = max(1, x2 - x1)
+            if mask.size == 0:
+                mask = np.zeros((crop_h, crop_w), dtype=np.uint8)
+            else:
+                mask = mask[y1:y2, x1:x2]
+                if mask.shape != (crop_h, crop_w):
+                    mask = cv2.resize(
+                        mask.astype(np.uint8),
+                        (crop_w, crop_h),
+                        interpolation=cv2.INTER_NEAREST,
+                    )
+
+            mask_msg = _BRIDGE.cv2_to_compressed_imgmsg(mask, dst_format="png")
+            mask_msg.header = header
+            msg.masks.append(mask_msg)
+
+        return msg
+
+    @classmethod
+    def from_msg(cls, msg: DetectionResult) -> DetectionResults:
+        """
+        Construct DetectionResults from a DetectionResult ROS message.
+        Masks are converted back to full-size overlays at bbox locations.
+        """
+        image = _BRIDGE.compressed_imgmsg_to_cv2(msg.image)
+        detections: List[SingleDetectionResult] = []
+
+        for i in range(len(msg.track_id)):
+            bbox = (
+                float(msg.x1[i]),
+                float(msg.y1[i]),
+                float(msg.x2[i]),
+                float(msg.y2[i]),
+            )
+            mask_img = _BRIDGE.compressed_imgmsg_to_cv2(msg.masks[i], desired_encoding="mono8")
+
+            full_mask = np.zeros(image.shape[:2], dtype=np.uint8)
+            x1, y1, x2, y2 = [int(v) for v in bbox]
+            h, w = image.shape[:2]
+            x1 = max(0, min(x1, w))
+            x2 = max(0, min(x2, w))
+            y1 = max(0, min(y1, h))
+            y2 = max(0, min(y2, h))
+            crop_h = max(1, y2 - y1)
+            crop_w = max(1, x2 - x1)
+            if mask_img.shape[:2] != (crop_h, crop_w):
+                mask_img = cv2.resize(mask_img, (crop_w, crop_h), interpolation=cv2.INTER_NEAREST)
+            full_mask[y1:y2, x1:x2] = mask_img
+
+            detections.append(
+                SingleDetectionResult(
+                    bbox=bbox,
+                    track_id=int(msg.track_id[i]),
+                    class_id=0,  # class_id is not transmitted in DetectionResult
+                    label=str(msg.label[i]),
+                    confidence=float(msg.confidence[i]),
+                    mask=full_mask,
+                )
+            )
+
+        return cls(image=image, detections=detections)
+
+
+def _from_ultralytics_result(
+    result,
+    idx: int,
+    image_shape: Optional[tuple[int, int]] = None,
+    names: Optional[Sequence[str]] = None,
+) -> SingleDetectionResult:
+    """Convert a single Ultralytics Results entry into a SingleDetectionResult."""
+    from ultralytics.engine.results import Results  # local import
+
+    if not isinstance(result, Results):
+        raise TypeError("result must be an ultralytics Results object")
+    if result.boxes is None:
+        raise ValueError("Result has no boxes")
+
+    bbox_array = result.boxes.xyxy[idx].cpu().numpy()
+    bbox = (
+        float(bbox_array[0]),
+        float(bbox_array[1]),
+        float(bbox_array[2]),
+        float(bbox_array[3]),
+    )
+
+    confidence = float(result.boxes.conf[idx].cpu())
+    class_id = int(result.boxes.cls[idx].cpu())
+
+    resolved_names = names
+    if resolved_names is None and hasattr(result, "names"):
+        resolved_names = result.names  # type: ignore[assignment]
+
+    label = f"class_{class_id}"
+    if resolved_names is not None:
+        if isinstance(resolved_names, dict):
+            label = resolved_names.get(class_id, label)
+        elif isinstance(resolved_names, (list, tuple)) and class_id < len(resolved_names):
+            label = resolved_names[class_id]
+
+    track_id = idx
+    if hasattr(result.boxes, "id") and result.boxes.id is not None:
+        track_id = int(result.boxes.id[idx].cpu())
+
+    target_h, target_w = (image_shape or (None, None))
+    if target_h is None or target_w is None:
+        if hasattr(result, "orig_shape") and result.orig_shape:
+            target_h, target_w = result.orig_shape[:2]
+
+    mask = np.zeros((target_h, target_w), dtype=np.uint8) if target_h and target_w else np.zeros((0, 0), dtype=np.uint8)
+    if result.masks is not None and idx < len(result.masks.data):
+        mask_tensor = result.masks.data[idx]
+        mask_np = mask_tensor.cpu().numpy()
+
+        if mask_np.ndim == 3:
+            mask_np = mask_np.squeeze()
+
+        if target_h and target_w and mask_np.shape != (target_h, target_w):
+            mask_np = cv2.resize(mask_np.astype(np.float32), (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+
+        mask = (mask_np > 0.5).astype(np.uint8) * 255  # type: ignore[assignment]
+
+    return SingleDetectionResult(
+        bbox=bbox,
+        track_id=track_id,
+        class_id=class_id,
+        label=label,
+        confidence=confidence,
+        mask=mask,
+    )
+
+
+def from_ultralytics_results(
+    results,
+    image_shape: Optional[tuple[int, int]] = None,
+    names: Optional[Sequence[str]] = None,
+) -> List[SingleDetectionResult]:
+    """Convert a list of Ultralytics Results objects into detections with masks."""
+    detections: List[SingleDetectionResult] = []
+    for result in results:
+        if result.boxes is None:
+            continue
+        num_detections = len(result.boxes.xyxy)
+        for i in range(num_detections):
+            detections.append(_from_ultralytics_result(result, i, image_shape=image_shape, names=names))
+    return detections
+
+
+__all__ = ["DetectionResults", "SingleDetectionResult", "from_ultralytics_results"]
diff --git a/detect_anything/detect_anything/detection_node.py b/detect_anything/detect_anything/detection_node.py
new file mode 100644
index 0000000..4d9f339
--- /dev/null
+++ b/detect_anything/detect_anything/detection_node.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import List, Optional
+
+import yaml
+import rclpy
+from cv_bridge import CvBridge
+from sensor_msgs.msg import CompressedImage, Image
+from rclpy.node import Node
+from rclpy.qos import DurabilityPolicy, QoSProfile, ReliabilityPolicy
+
+from detect_anything.detection import DetectionResults
+from detect_anything.yoloe import Yoloe2DDetector, YoloePromptMode
+from detect_anything.msg import DetectionResult
+
+
+class DetectionNode(Node):
+    """ROS2 node that runs YOLO-E detection and publishes DetectionResult."""
+
+    def __init__(self) -> None:
+        super().__init__("detect_anything_detection_node")
+        self.bridge = CvBridge()
+
+        # Parameters
+        defaults = self._default_params()
+        self.declare_parameter("image_topic", defaults["image_topic"])
+        self.declare_parameter("compressed_image_topic", defaults["compressed_image_topic"])
+        self.declare_parameter("use_compressed", defaults["use_compressed"])
+        self.declare_parameter("model_path", defaults["model_path"])
+        self.declare_parameter("model_name", defaults["model_name"])
+        self.declare_parameter("device", defaults["device"])
+        self.declare_parameter("prompt_mode", defaults["prompt_mode"])
+        self.declare_parameter("objects_file", defaults["objects_file"])
+        self.declare_parameter("conf", defaults["conf"])
+        self.declare_parameter("max_area_ratio", defaults["max_area_ratio"])
+
+        params = self._default_params()
+        image_topic = str(self.get_parameter("image_topic").value)
+        compressed_image_topic = str(self.get_parameter("compressed_image_topic").value)
+        self.use_compressed = bool(self.get_parameter("use_compressed").value)
+
+        model_path_val = self.get_parameter("model_path").value
+        model_path = str(model_path_val) if model_path_val not in (None, "") else params["model_path"]
+        if str(model_path).lower() == "none":
+            model_path = params["model_path"]
+
+        model_name = str(self.get_parameter("model_name").value or params["model_name"])
+        device = str(self.get_parameter("device").value or params["device"])
+        prompt_mode_str = str(self.get_parameter("prompt_mode").value or params["prompt_mode"]).lower()
+        objects_file = Path(str(self.get_parameter("objects_file").value or params["objects_file"]))
+        conf = float(self.get_parameter("conf").value)
+        max_area_ratio = float(self.get_parameter("max_area_ratio").value)
+
+        prompt_mode = YoloePromptMode(prompt_mode_str)
+
+        # Auto-pick the right default weight if user didn't override
+        if prompt_mode == YoloePromptMode.LRPC and ("seg.pt" in model_name and "seg-pf.pt" not in model_name):
+            model_name = "yoloe-11l-seg-pf.pt"
+            self.get_logger().info(f"Prompt-free mode: switching model_name to {model_name}")
+        if prompt_mode == YoloePromptMode.PROMPT and "seg-pf.pt" in model_name:
+            model_name = "yoloe-11l-seg.pt"
+            self.get_logger().info(f"Prompt mode: switching model_name to {model_name}")
+
+        self.detector = Yoloe2DDetector(
+            model_path=model_path,
+            model_name=model_name,
+            device=device or None,
+            prompt_mode=prompt_mode,
+            max_area_ratio=max_area_ratio,
+            conf=conf,
+        )
+
+        prompts = self._load_prompts(objects_file)
+        if prompts and prompt_mode == YoloePromptMode.PROMPT:
+            self.detector.set_prompts(text=prompts)
+            self.get_logger().info(f"Loaded {len(prompts)} prompts from {objects_file}")
+
+        qos = QoSProfile(
+            depth=10,
+            reliability=ReliabilityPolicy.RELIABLE,
+            durability=DurabilityPolicy.VOLATILE,
+        )
+        if self.use_compressed:
+            self.create_subscription(
+                CompressedImage,
+                compressed_image_topic,
+                self._compressed_image_callback,
+                qos,
+            )
+        else:
+            self.create_subscription(
+                Image,
+                image_topic,
+                self._image_callback,
+                qos,
+            )
+
+        self.detection_pub = self.create_publisher(DetectionResult, "/detection_result", qos)
+        self.overlay_pub = self.create_publisher(Image, "/annotated_image_detection", qos)
+
+        self.get_logger().info("detect_anything detection node started")
+
+    def _default_params(self) -> dict:
+        base_share = Path(__file__).resolve().parent.parent
+        return {
+            "image_topic": "/camera/image",
+            "compressed_image_topic": "/camera/image/compressed",
+            "use_compressed": True,
+            "model_path": str(base_share / "models"),
+            "model_name": "yoloe-11l-seg.pt",
+            "device": "cuda",
+            "prompt_mode": YoloePromptMode.PROMPT.value,
+            "objects_file": str(base_share / "config" / "objects.yaml"),
+            "conf": 0.5,
+            "max_area_ratio": 0.3,
+        }
+
+    def _load_prompts(self, objects_file: Path) -> List[str]:
+        if not objects_file.exists():
+            return []
+        try:
+            with open(objects_file, "r", encoding="utf-8") as f:
+                data = yaml.safe_load(f) or []
+                if isinstance(data, list):
+                    return [str(p) for p in data]
+        except Exception as exc:
+            self.get_logger().warn(f"Failed to read prompts from {objects_file}: {exc}")
+        return []
+
+    def _image_callback(self, msg: Image) -> None:
+        cv_image = self.bridge.imgmsg_to_cv2(msg, desired_encoding="bgr8")
+        self._process_image(cv_image, msg.header)
+
+    def _compressed_image_callback(self, msg: CompressedImage) -> None:
+        cv_image = self.bridge.compressed_imgmsg_to_cv2(msg, desired_encoding="bgr8")
+        self._process_image(cv_image, msg.header)
+
+    def _process_image(self, cv_image, header) -> None:
+        detections: DetectionResults = self.detector.process_image(cv_image)
+
+        detection_msg = detections.to_msg(header)
+        self.detection_pub.publish(detection_msg)
+
+        overlay = detections.overlay()
+        overlay_msg = self.bridge.cv2_to_imgmsg(overlay, encoding="bgr8")
+        overlay_msg.header = header
+        self.overlay_pub.publish(overlay_msg)
+
+    def destroy_node(self) -> bool:
+        if hasattr(self, "detector") and self.detector:
+            self.detector.stop()
+        return super().destroy_node()
+
+
+def main(args: Optional[list[str]] = None) -> None:
+    rclpy.init(args=args)
+    node = DetectionNode()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        node.destroy_node()
+        rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/detect_anything/detect_anything/yoloe.py b/detect_anything/detect_anything/yoloe.py
new file mode 100644
index 0000000..c0695c3
--- /dev/null
+++ b/detect_anything/detect_anything/yoloe.py
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+import threading
+from typing import Any, List, Optional, Sequence
+
+import numpy as np
+import torch
+from ultralytics import YOLOE  # type: ignore[attr-defined, import-not-found]
+
+from detect_anything.detection import (
+    DetectionResults,
+    SingleDetectionResult,
+    from_ultralytics_results,
+)
+
+
+class YoloePromptMode(Enum):
+    """YOLO-E prompt modes."""
+
+    LRPC = "lrpc"
+    PROMPT = "prompt"
+
+
+class Yoloe2DDetector:
+    """Thin wrapper around YOLO-E for 2D detection + tracking with masks."""
+
+    def __init__(
+        self,
+        model_path: str | None = None,
+        model_name: str | None = None,
+        device: str | None = None,
+        prompt_mode: YoloePromptMode = YoloePromptMode.LRPC,
+        max_area_ratio: Optional[float] = 0.3,
+        conf: float = 0.6,
+        iou: float = 0.6,
+    ) -> None:
+        if model_name is None:
+            model_name = "yoloe-11s-seg-pf.pt" if prompt_mode == YoloePromptMode.LRPC else "yoloe-11s-seg.pt"
+
+        model_dir = Path(model_path) if model_path else Path("models/yoloe")
+        self.model = YOLOE(model_dir / model_name)
+        self.prompt_mode = prompt_mode
+        self._visual_prompts: dict[str, np.ndarray] | None = None
+        self.max_area_ratio = max_area_ratio
+        self._lock = threading.Lock()
+        self.conf = conf
+        self.iou = iou
+        self._names_override: Sequence[str] | None = None
+
+        if prompt_mode == YoloePromptMode.PROMPT:
+            self.set_prompts(text=["nothing"])
+
+        if self.max_area_ratio is not None and not (0.0 < self.max_area_ratio <= 1.0):
+            raise ValueError("max_area_ratio must be in the range (0, 1].")
+
+        if device:
+            self.device = device
+        elif torch.cuda.is_available():
+            self.device = "cuda"
+        else:
+            self.device = "cpu"
+
+    def set_prompts(
+        self,
+        text: Optional[List[str]] = None,
+        bboxes: Optional[np.ndarray] = None,
+    ) -> None:
+        """Set prompts for detection. Provide either text or bboxes, not both."""
+        if self.prompt_mode == YoloePromptMode.LRPC:
+            # Prompt-free model cannot accept text/visual prompts.
+            return
+        if text is not None and bboxes is not None:
+            raise ValueError("Provide either text or bboxes, not both.")
+        if text is None and bboxes is None:
+            raise ValueError("Must provide either text or bboxes.")
+
+        with self._lock:
+            self.model.predictor = None
+            if text is not None:
+                self.model.set_classes(text, self.model.get_text_pe(text))  # type: ignore[no-untyped-call]
+                self._visual_prompts = None
+                self._names_override = list(text)
+            else:
+                cls = np.arange(len(bboxes), dtype=np.int16)  # type: ignore[arg-type]
+                self._visual_prompts = {"bboxes": bboxes, "cls": cls}  # type: ignore[dict-item]
+                self._names_override = None
+
+    def process_image(self, image: np.ndarray) -> DetectionResults:
+        """Process an image and return detection results."""
+        track_kwargs: dict[str, Any] = {
+            "source": image,
+            "device": self.device,
+            "conf": self.conf,
+            "iou": self.iou,
+            "persist": True,
+            "verbose": False,
+        }
+
+        with self._lock:
+            if self._visual_prompts is not None:
+                track_kwargs["visual_prompts"] = self._visual_prompts
+
+            results = self.model.track(**track_kwargs)  # type: ignore[arg-type]
+
+        detections = from_ultralytics_results(results, image_shape=image.shape[:2], names=self._names_override)
+        filtered = self._apply_filters(image, detections)
+        return DetectionResults(image=image, detections=filtered)
+
+    def _apply_filters(
+        self,
+        image: np.ndarray,
+        detections: List[SingleDetectionResult],
+    ) -> List[SingleDetectionResult]:
+        if self.max_area_ratio is None:
+            return detections
+
+        image_area = float(image.shape[0] * image.shape[1])
+        if image_area <= 0:
+            return detections
+
+        return [det for det in detections if (det.area() / image_area) <= self.max_area_ratio]
+
+    def stop(self) -> None:
+        """Clean up resources used by the detector."""
+        if hasattr(self.model, "predictor") and self.model.predictor is not None:
+            predictor = self.model.predictor
+            if hasattr(predictor, "trackers") and predictor.trackers:
+                for tracker in predictor.trackers:
+                    if hasattr(tracker, "tracker") and hasattr(tracker.tracker, "gmc"):
+                        gmc = tracker.tracker.gmc
+                        if hasattr(gmc, "executor") and gmc.executor is not None:
+                            gmc.executor.shutdown(wait=True)
+            self.model.predictor = None
+
+
+__all__ = ["Yoloe2DDetector", "YoloePromptMode"]
diff --git a/detect_anything/env-hooks/venv_pythonpath.sh.in b/detect_anything/env-hooks/venv_pythonpath.sh.in
new file mode 100644
index 0000000..9212bac
--- /dev/null
+++ b/detect_anything/env-hooks/venv_pythonpath.sh.in
@@ -0,0 +1 @@
+ament_prepend_unique_value PYTHONPATH "@PYTHON_INSTALL_DIR@"
diff --git a/detect_anything/msg/DetectionResult.msg b/detect_anything/msg/DetectionResult.msg
new file mode 100644
index 0000000..766d5f1
--- /dev/null
+++ b/detect_anything/msg/DetectionResult.msg
@@ -0,0 +1,10 @@
+std_msgs/Header header
+int32[] track_id
+float32[] x1
+float32[] y1
+float32[] x2
+float32[] y2
+string[] label
+float32[] confidence
+sensor_msgs/CompressedImage image
+sensor_msgs/CompressedImage[] masks  # cropped to bbox; aligned to bbox top-left; encoding: mono8
diff --git a/detect_anything/package.xml b/detect_anything/package.xml
new file mode 100644
index 0000000..a7699ae
--- /dev/null
+++ b/detect_anything/package.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>detect_anything</name>
+  <version>0.0.1</version>
+  <description>YOLO-E detection node and utilities.</description>
+  <maintainer email="alex.lin416@outlook.com">Alex Lin</maintainer>
+  <license>MIT</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <build_depend>rosidl_default_generators</build_depend>
+  <exec_depend>rosidl_default_runtime</exec_depend>
+  <member_of_group>rosidl_interface_packages</member_of_group>
+
+  <depend>rclpy</depend>
+  <depend>std_msgs</depend>
+  <depend>sensor_msgs</depend>
+  <depend>vision_msgs</depend>
+  <depend>cv_bridge</depend>
+  <depend>vector_perception_utils</depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/detect_anything/resource/detect_anything b/detect_anything/resource/detect_anything
new file mode 100644
index 0000000..eac574f
--- /dev/null
+++ b/detect_anything/resource/detect_anything
@@ -0,0 +1 @@
+detect_anything
diff --git a/detect_anything/scripts/detection_node b/detect_anything/scripts/detection_node
new file mode 100755
index 0000000..822cacf
--- /dev/null
+++ b/detect_anything/scripts/detection_node
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+from detect_anything.detection_node import main
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
index 7b6c110..67894e1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,13 +4,14 @@ opencv-python
 scipy
 pillow
 tqdm
+pyyaml
+lark
 
 # 3D processing
 open3d
 
-# deep learning
-torch<=2.4.0
-torchvision<=1.19.0
+# vision
+ultralytics
 
 # SAM2 and EdgeTAM dependencies
 sam2
diff --git a/semantic_mapping/package.xml b/semantic_mapping/package.xml
new file mode 100644
index 0000000..ca18f0e
--- /dev/null
+++ b/semantic_mapping/package.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>semantic_mapping</name>
+  <version>0.0.1</version>
+  <description>YOLO-E based detection utilities and ROS2 node for semantic mapping.</description>
+  <maintainer email="alex.lin416@outlook.com">Alex Lin</maintainer>
+  <license>MIT</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <depend>rclpy</depend>
+  <depend>std_msgs</depend>
+  <depend>sensor_msgs</depend>
+  <depend>vision_msgs</depend>
+  <depend>cv_bridge</depend>
+  <depend>vector_perception_utils</depend>
+
+  <test_depend>ament_copyright</test_depend>
+  <test_depend>ament_flake8</test_depend>
+  <test_depend>ament_pep257</test_depend>
+  <test_depend>python3-pytest</test_depend>
+
+  <export>
+    <build_type>ament_python</build_type>
+  </export>
+</package>
diff --git a/semantic_mapping/resource/semantic_mapping b/semantic_mapping/resource/semantic_mapping
new file mode 100644
index 0000000..8f63025
--- /dev/null
+++ b/semantic_mapping/resource/semantic_mapping
@@ -0,0 +1 @@
+semantic_mapping
diff --git a/semantic_mapping/semantic_mapping/__init__.py b/semantic_mapping/semantic_mapping/__init__.py
new file mode 100644
index 0000000..6ac0fcc
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/__init__.py
@@ -0,0 +1 @@
+"""Semantic mapping ROS2 package."""
diff --git a/semantic_mapping/setup.cfg b/semantic_mapping/setup.cfg
new file mode 100644
index 0000000..31239a6
--- /dev/null
+++ b/semantic_mapping/setup.cfg
@@ -0,0 +1,4 @@
+[develop]
+script_dir=$base/lib/semantic_mapping
+[install]
+install_scripts=$base/lib/semantic_mapping
diff --git a/semantic_mapping/setup.py b/semantic_mapping/setup.py
new file mode 100644
index 0000000..fc5260b
--- /dev/null
+++ b/semantic_mapping/setup.py
@@ -0,0 +1,27 @@
+from glob import glob
+import os
+
+from setuptools import find_packages, setup
+
+package_name = "semantic_mapping"
+
+setup(
+    name=package_name,
+    version="0.0.1",
+    packages=find_packages(exclude=["test"]),
+    package_data={package_name: []},
+    data_files=[
+        ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
+        ("share/" + package_name, ["package.xml"]),
+        (os.path.join("share", package_name, "launch"), glob(os.path.join("launch", "*launch.[pxy][yma]*"))),
+        (os.path.join("share", package_name, "config"), glob(os.path.join("config", "*.yaml"))),
+    ],
+    install_requires=["setuptools"],
+    zip_safe=True,
+    maintainer="Alex Lin",
+    maintainer_email="alex.lin416@outlook.com",
+    description="Semantic mapping utilities with a YOLO-E detector node.",
+    license="MIT",
+    tests_require=["pytest"],
+    entry_points={"console_scripts": []},
+)
diff --git a/vector_perception_utils/setup.cfg b/vector_perception_utils/setup.cfg
deleted file mode 100644
index 1ce0c22..0000000
--- a/vector_perception_utils/setup.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-[develop]
-script_dir=$base/lib/vector_perception_utils
-[install]
-install_scripts=$base/lib/vector_perception_utils
-

From 45c180478c1bacfa0a2ac8b8e7ca6db218038dc0 Mon Sep 17 00:00:00 2001
From: alexlin2 <alex.lin416@outlook.com>
Date: Thu, 22 Jan 2026 23:32:11 -0500
Subject: [PATCH 2/6] initial migration

---
 .gitignore                                    |   7 +
 .../detect_anything/detection_node.py         |   2 +-
 semantic_mapping/CMakeLists.txt               |  52 ++
 semantic_mapping/README.md                    | 117 +++
 .../config/sensor_to_camera_tf.yaml           |  11 +
 .../docs/SEMANTIC_MAPPING_MIGRATION.md        | 452 ++++++++++++
 semantic_mapping/msg/ObjectNode.msg           |  35 +
 semantic_mapping/msg/ObjectType.msg           |  17 +
 .../msg/TargetObjectInstruction.msg           |  22 +
 semantic_mapping/msg/ViewpointRep.msg         |   8 +
 semantic_mapping/package.xml                  |  18 +-
 semantic_mapping/resource/semantic_mapping    |   1 -
 .../scripts/semantic_mapping_node             |   7 +
 semantic_mapping/semantic_mapping/__init__.py |   2 +-
 .../semantic_mapping/map_object.py            | 683 ++++++++++++++++++
 .../semantic_mapping/map_object_utils.py      | 217 ++++++
 semantic_mapping/semantic_mapping/mapper.py   | 257 +++++++
 semantic_mapping/semantic_mapping/matching.py | 246 +++++++
 semantic_mapping/semantic_mapping/storage.py  |  91 +++
 semantic_mapping/setup.cfg                    |   4 -
 semantic_mapping/setup.py                     |  27 -
 .../pointcloud_utils.py                       | 230 +++++-
 .../transform_utils.py                        |  22 +-
 23 files changed, 2476 insertions(+), 52 deletions(-)
 create mode 100644 semantic_mapping/CMakeLists.txt
 create mode 100644 semantic_mapping/README.md
 create mode 100644 semantic_mapping/config/sensor_to_camera_tf.yaml
 create mode 100644 semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
 create mode 100644 semantic_mapping/msg/ObjectNode.msg
 create mode 100644 semantic_mapping/msg/ObjectType.msg
 create mode 100644 semantic_mapping/msg/TargetObjectInstruction.msg
 create mode 100644 semantic_mapping/msg/ViewpointRep.msg
 create mode 100644 semantic_mapping/scripts/semantic_mapping_node
 create mode 100644 semantic_mapping/semantic_mapping/map_object.py
 create mode 100644 semantic_mapping/semantic_mapping/map_object_utils.py
 create mode 100644 semantic_mapping/semantic_mapping/mapper.py
 create mode 100644 semantic_mapping/semantic_mapping/matching.py
 create mode 100644 semantic_mapping/semantic_mapping/storage.py
 delete mode 100644 semantic_mapping/setup.cfg
 delete mode 100644 semantic_mapping/setup.py

diff --git a/.gitignore b/.gitignore
index ccd784f..36a793e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -209,3 +209,10 @@ cython_debug/
 marimo/_static/
 marimo/_lsp/
 __marimo__/
+
+# model files
+*.pt
+*.pth
+*.onnx
+*.engine
+*.ts
diff --git a/detect_anything/detect_anything/detection_node.py b/detect_anything/detect_anything/detection_node.py
index 4d9f339..aebd0e8 100644
--- a/detect_anything/detect_anything/detection_node.py
+++ b/detect_anything/detect_anything/detection_node.py
@@ -112,7 +112,7 @@ def _default_params(self) -> dict:
             "device": "cuda",
             "prompt_mode": YoloePromptMode.PROMPT.value,
             "objects_file": str(base_share / "config" / "objects.yaml"),
-            "conf": 0.5,
+            "conf": 0.25,
             "max_area_ratio": 0.3,
         }
 
diff --git a/semantic_mapping/CMakeLists.txt b/semantic_mapping/CMakeLists.txt
new file mode 100644
index 0000000..7266468
--- /dev/null
+++ b/semantic_mapping/CMakeLists.txt
@@ -0,0 +1,52 @@
+cmake_minimum_required(VERSION 3.10)
+project(semantic_mapping)
+
+find_package(ament_cmake REQUIRED)
+find_package(rclpy REQUIRED)
+find_package(std_msgs REQUIRED)
+find_package(sensor_msgs REQUIRED)
+find_package(geometry_msgs REQUIRED)
+find_package(nav_msgs REQUIRED)
+find_package(visualization_msgs REQUIRED)
+find_package(cv_bridge REQUIRED)
+find_package(rosidl_default_generators REQUIRED)
+find_package(Python3 REQUIRED COMPONENTS Interpreter)
+
+rosidl_generate_interfaces(${PROJECT_NAME}
+  "msg/ObjectNode.msg"
+  "msg/ObjectType.msg"
+  "msg/TargetObjectInstruction.msg"
+  "msg/ViewpointRep.msg"
+  DEPENDENCIES std_msgs sensor_msgs geometry_msgs
+)
+
+set(PYTHON_INSTALL_DIR "lib/python${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}/site-packages")
+
+install(
+  DIRECTORY semantic_mapping
+  DESTINATION ${PYTHON_INSTALL_DIR}
+)
+
+install(
+  PROGRAMS
+    scripts/semantic_mapping_node
+  DESTINATION lib/${PROJECT_NAME}
+)
+
+install(
+  FILES resource/semantic_mapping
+  DESTINATION share/${PROJECT_NAME}/resource
+)
+
+install(
+  DIRECTORY config
+  DESTINATION share/${PROJECT_NAME}
+)
+
+install(
+  DIRECTORY launch
+  DESTINATION share/${PROJECT_NAME}
+)
+
+ament_export_dependencies(rosidl_default_runtime)
+ament_package()
diff --git a/semantic_mapping/README.md b/semantic_mapping/README.md
new file mode 100644
index 0000000..2dfd1dc
--- /dev/null
+++ b/semantic_mapping/README.md
@@ -0,0 +1,117 @@
+# Semantic Mapping
+
+ROS2 package for semantic 3D object mapping with VLM integration.
+
+## Overview
+
+This package builds a persistent semantic map of objects detected by `detect_anything`, fusing 2D detections with LiDAR point clouds to create 3D object representations. It publishes `ObjectNode` messages compatible with the tare_planner exploration system.
+
+## Status: Work in Progress
+
+| Component | File | Status |
+|-----------|------|--------|
+| Messages | `msg/*.msg` | Complete |
+| Data Models | `map_object.py` | Complete |
+| Matching | `matching.py` | Pending |
+| Object Mapper | `mapper.py` | Pending |
+| Cloud Projection | `vector_perception_utils/pointcloud_utils.py` | Pending |
+| Publishers | `map_object_utils.py` | Pending |
+| Image Storage | `storage.py` | Pending |
+| Mapping Node | `mapping_node.py` | Pending |
+| Launch Files | `launch/` | Pending |
+
+## Messages
+
+### ObjectNode.msg
+Main output message for planner integration:
+- `object_id[]`: Track IDs (may contain multiple merged IDs)
+- `label`: Dominant class label
+- `position`: Object centroid in world frame
+- `bbox3d[8]`: 3D bounding box corners
+- `cloud`: Object point cloud
+- `status`: Lifecycle status ("new", "updated", "unchanged")
+- `img_path`: Path to best saved image crop
+- `is_asked_vlm`: VLM query flag
+- `viewpoint_id`: Associated viewpoint
+
+### ObjectType.msg
+VLM query/answer for object relabeling.
+
+### TargetObjectInstruction.msg
+User instruction for target object search.
+
+### ViewpointRep.msg
+Viewpoint trigger for image saving.
+
+## Architecture
+
+```
+detect_anything/DetectionResult
+        │
+        ▼
+┌───────────────────────────────────────┐
+│         semantic_mapping_node          │
+│                                        │
+│  ┌─────────────┐    ┌──────────────┐  │
+│  │ CloudImage  │    │  ObjectMapper │  │
+│  │  Fusion     │───▶│  (ObjectDB    │  │
+│  │             │    │   pattern)    │  │
+│  └─────────────┘    └──────────────┘  │
+│         │                   │          │
+│         ▼                   ▼          │
+│  ┌─────────────┐    ┌──────────────┐  │
+│  │  MapObject  │    │  Publishers   │  │
+│  │  (voxels,   │───▶│  (ObjectNode, │  │
+│  │   votes)    │    │   Markers)    │  │
+│  └─────────────┘    └──────────────┘  │
+└───────────────────────────────────────┘
+        │
+        ▼
+/object_nodes, /obj_points, /obj_boxes, /obj_labels
+```
+
+## Dependencies
+
+- `detect_anything`: Detection results with masks
+- `vector_perception_utils`: Point cloud and image utilities
+- ROS2 messages: `sensor_msgs`, `geometry_msgs`, `nav_msgs`, `visualization_msgs`
+
+## Usage
+
+```bash
+# Build
+colcon build --packages-select semantic_mapping
+
+# Run (after implementation complete)
+ros2 launch semantic_mapping semantic_mapping.launch.py
+```
+
+## Topics
+
+### Subscriptions
+| Topic | Type | Description |
+|-------|------|-------------|
+| `/detection_result` | `detect_anything/DetectionResult` | 2D detections with masks |
+| `/registered_scan` | `sensor_msgs/PointCloud2` | LiDAR point cloud |
+| `/state_estimation` | `nav_msgs/Odometry` | Robot odometry |
+| `/object_type_answer` | `ObjectType` | VLM relabeling response |
+| `/target_object_instruction` | `TargetObjectInstruction` | Target object search |
+| `/viewpoint_rep_header` | `ViewpointRep` | Viewpoint trigger |
+
+### Publications
+| Topic | Type | Description |
+|-------|------|-------------|
+| `/object_nodes` | `ObjectNode` | Semantic object map (for planner) |
+| `/obj_points` | `sensor_msgs/PointCloud2` | Colored object point cloud |
+| `/obj_boxes` | `visualization_msgs/MarkerArray` | 3D bounding box wireframes |
+| `/obj_labels` | `visualization_msgs/MarkerArray` | Object label text |
+| `/object_type_query` | `ObjectType` | VLM query for relabeling |
+
+## References
+
+- Full migration plan: [`docs/SEMANTIC_MAPPING_MIGRATION.md`](docs/SEMANTIC_MAPPING_MIGRATION.md)
+- Original implementation: `VLM_ROS/src/semantic_mapping/` (main branch)
+  - SAM2 for segmentation
+  - External Gemini API for VLM reasoning
+- Detection: `detect_anything` package (decoupled)
+- dimos patterns: `dimos/perception/detection/objectDB.py`
diff --git a/semantic_mapping/config/sensor_to_camera_tf.yaml b/semantic_mapping/config/sensor_to_camera_tf.yaml
new file mode 100644
index 0000000..baa6724
--- /dev/null
+++ b/semantic_mapping/config/sensor_to_camera_tf.yaml
@@ -0,0 +1,11 @@
+sensor_to_camera:
+  parent_frame: lidar
+  child_frame: camera
+  translation:
+    x: 0.0
+    y: 0.0
+    z: 0.265
+  rotation_rpy:
+    roll: -1.5707963
+    pitch: 0.0
+    yaw: -1.5707963
diff --git a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
new file mode 100644
index 0000000..4a23f74
--- /dev/null
+++ b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
@@ -0,0 +1,452 @@
+# Semantic Mapping Migration Plan
+
+## Overview
+
+This document outlines the comprehensive plan for porting the `semantic_mapping` package from `VLM_ROS` into `vector_perception_ros/semantic_mapping`, following the coding standards of `vector_perception_ros` while preserving functional compatibility with downstream consumers (particularly the tare_planner).
+
+> **Note**: Detection is decoupled into `detect_anything` package. VLM reasoning uses external Gemini API.
+
+## Design Principles
+
+1. **Logic cleanup, not restructure** - Preserve the core algorithms (voxel voting, observation angles, shape regularization, IoU/distance matching) while cleaning up dead code and improving organization.
+2. **Same I/O contract** - Maintain the same ROS topic names, message types, and semantics so existing planner integration continues to work.
+3. **vector_perception_ros coding standards** - Full type hints, Google-style docstrings, dataclass containers, proper ROS2 patterns.
+4. **ObjectDB-style patterns** - Adopt the two-tier (pending/permanent) architecture and dual matching (track_id + distance) from dimos where it improves clarity.
+
+---
+
+## Implementation Progress
+
+| Step | Component | Status |
+|------|-----------|--------|
+| 1 | Package Setup & Messages | **COMPLETED** |
+| 2 | Data Models (`map_object.py`) | **COMPLETED** |
+| 2.1 | Refactor to use shared utilities | **COMPLETED** |
+| 3 | Matching Utilities (`matching.py`) | Pending |
+| 4 | Object Mapper (`mapper.py`) | Pending |
+| 5 | Cloud-Image Projection (`pointcloud_utils.py`) | Pending |
+| 6 | Message Publishers (`map_object_utils.py`) | Pending |
+| 7 | Image Storage (`storage.py`) | Pending |
+| 8 | Mapping Node (`mapping_node.py`) | Pending |
+| 9 | Launch Files & Config | Pending |
+| 10 | Integration Testing | Pending |
+
+### Step 2.1: Refactored map_object.py to use shared utilities
+
+Added to `vector_perception_utils/transform_utils.py`:
+- `normalize_angle()` - now supports both scalar and array input
+- `discretize_angles()` - discretize angles into bin indices
+
+Added to `vector_perception_utils/pointcloud_utils.py`:
+- `get_oriented_bbox()` - returns (center, corners) tuple from minimal OBB
+
+Refactored in `map_object.py`:
+- Removed duplicate `_normalize_angles_to_pi` and `_discretize_angles` functions
+- Simplified `_compute_geometry()` from 43 lines to 15 using `get_oriented_bbox()`
+- Use `create_pointcloud()` in `regularize_shape()` instead of manual o3d creation
+- Use `compute_centroid()` instead of `np.mean(..., axis=0)`
+
+---
+
+## VLM_ROS Reference (Source Implementation)
+
+Key files in `VLM_ROS/src/semantic_mapping/`:
+- `semantic_map_new.py`: `ObjMapper` manages the persistent map
+- `single_object_new.py`: `SingleObject` holds per-object state
+- `semantic_mapping_node.py`: `MappingNode` orchestrates SAM2 masks, LiDAR projection, odom transforms
+
+### ObjMapper Core Flow
+- Input: dict with `bboxes`, `labels`, `confidences`, `ids`, `masks` (from SAM2), plus LiDAR cloud and odom
+- Memory fix: Uses `.copy()` on detection data extraction to prevent memory leaks
+- VLM query threshold: Objects need `best_image_score > 500` before querying VLM
+- `update_map()` steps:
+  1. Transform incoming cloud into body/world frame using odom
+  2. Filter detections by confidence threshold
+  3. For each detection: project masked cloud via `CloudImageFusion`, build per-object voxels, then either merge or create a `SingleObject`
+  4. Merge heuristics: match by tracker `obj_id`, 3D IoU, distance thresholds, and optional primitive groups (e.g., chair/sofa)
+  5. Maintain delete lists, save queues (best image/mask per object)
+  6. Publish ROS messages: `ObjectNode`, bbox markers, label markers, colored point cloud
+
+### SingleObject Highlights
+- Keeps class_id→counts/confidence, track_id list, voxel grid (`VoxelFeatureManager`), robot poses, masks, best image saving
+- Computes 3D bbox (axis-aligned + oriented), centroids, clustering flags, and validity caches
+- Manages persistence (`life`, `inactive_frame`, `publish_status`) and VLM interaction state
+
+### dimos Patterns to Adopt
+- `ObjMapper` ⇄ `ObjectDB`: explicit pending/permanent tiers, track_id + spatial matching, promotion thresholds
+- `SingleObject` ⇄ `Object`: cleaner update patterns, separation of core geometry vs VLM metadata
+
+---
+
+## Phase 1: Package Structure & Messages (COMPLETED)
+
+### 1.1 Package Structure
+
+```
+semantic_mapping/
+├── CMakeLists.txt            # Hybrid CMake for message generation + Python install
+├── package.xml               # Dependencies: detect_anything, vector_perception_utils, ROS msgs
+├── msg/
+│   ├── ObjectNode.msg
+│   ├── ObjectType.msg
+│   ├── TargetObjectInstruction.msg
+│   └── ViewpointRep.msg
+├── config/
+│   ├── objects.yaml          # Object taxonomy (to be copied from VLM_ROS)
+│   └── params.yaml           # Default node parameters (to be created)
+├── launch/
+│   └── semantic_mapping.launch.py
+├── semantic_mapping/
+│   ├── __init__.py
+│   ├── mapping_node.py       # ROS2 node (orchestration)
+│   ├── mapper.py             # ObjectMapper class (core logic)
+│   ├── map_object.py         # Data classes: MapObject, Observation, VoxelManager
+│   ├── matching.py           # Object matching utilities (IoU, distance, track_id)
+│   ├── (moved to utils)      # Cloud-image projection helpers in vector_perception_utils
+│   ├── map_object_utils.py   # ROS message conversion helpers
+│   └── storage.py            # Async image saving queue
+├── docs/
+│   └── SEMANTIC_MAPPING_MIGRATION.md
+└── resource/
+    └── semantic_mapping      # ament resource marker
+```
+
+### 1.2 Message Definitions
+
+**ObjectNode.msg** (preserves exact schema for planner compatibility):
+```
+std_msgs/Header header
+int32[] object_id
+string label
+geometry_msgs/Point position
+geometry_msgs/Point[8] bbox3d    # 8 corner points in world frame
+sensor_msgs/PointCloud2 cloud
+string status                    # "new", "updated", "unchanged"
+string img_path
+bool is_asked_vlm
+int32 viewpoint_id
+```
+
+**ObjectType.msg**: VLM query/answer for object relabeling
+**TargetObjectInstruction.msg**: User-set target/anchor objects
+**ViewpointRep.msg**: Viewpoint trigger message
+
+---
+
+## Phase 2: Data Models (COMPLETED)
+
+### 2.1 ObjectStatus Enum
+
+```python
+class ObjectStatus(Enum):
+    """Object lifecycle status (ObjectDB-style tiering)."""
+    PENDING = "pending"       # < min_detections threshold
+    PERMANENT = "permanent"   # >= threshold, confirmed object
+    DELETED = "deleted"       # Marked for removal
+```
+
+### 2.2 Observation
+
+```python
+@dataclass
+class Observation:
+    """A single detection observation to update a MapObject."""
+    track_id: int
+    label: str
+    confidence: float
+    points: np.ndarray                    # (N, 3) world-frame points
+    robot_pose: dict                      # {R: (3,3), t: (3,)}
+    mask: np.ndarray | None = None        # 2D segmentation mask
+    image: np.ndarray | None = None       # RGB crop for best-image selection
+    timestamp: float = 0.0
+```
+
+### 2.3 VoxelFeatureManager
+
+```python
+@dataclass
+class VoxelFeatureManager:
+    """Manages voxelized point cloud with observation angle voting."""
+    voxels: np.ndarray                    # (N, 3) world coords
+    votes: np.ndarray                     # (N,) detection count per voxel
+    observation_angles: np.ndarray        # (N, num_bins) angle histogram
+    regularized_mask: np.ndarray | None   # (N,) valid voxel mask after clustering
+    voxel_size: float = 0.03
+    num_angle_bins: int = 20
+
+    def update(self, new_points: np.ndarray, robot_position: np.ndarray) -> None:
+        """Add new points with KDTree-based merge and angle voting."""
+
+    def get_valid_voxels(self, min_votes: int = 1) -> np.ndarray:
+        """Return voxels passing vote threshold and regularization mask."""
+```
+
+### 2.4 MapObject
+
+```python
+@dataclass
+class MapObject:
+    """Represents a tracked object in the semantic map."""
+    # Identity
+    object_id: str                        # UUID for this object
+    track_ids: list[int]                  # Associated tracker IDs (can merge)
+
+    # Classification (voted)
+    class_votes: dict[str, int]           # label -> observation count
+    confidence_scores: dict[str, float]   # label -> best confidence
+
+    # Geometry
+    voxel_manager: VoxelFeatureManager
+    robot_poses: list[dict]               # [{R, t}, ...] observation poses
+
+    # Lifecycle (ObjectDB-style)
+    detection_count: int = 0
+    inactive_frames: int = 0
+    status: ObjectStatus = ObjectStatus.PENDING
+
+    # VLM interaction
+    is_asked_vlm: bool = False
+    updated_by_vlm: bool = False
+    best_image_path: str | None = None
+    best_image_score: float = 0.0
+
+    @property
+    def label(self) -> str: ...
+    @property
+    def centroid(self) -> np.ndarray: ...
+    @property
+    def bbox3d(self) -> np.ndarray: ...
+    def update(self, observation: Observation) -> None: ...
+    def merge(self, other: MapObject) -> None: ...
+    def regularize_shape(self, percentile: float = 0.85) -> None: ...
+```
+
+---
+
+## Phase 3: Matching Utilities (matching.py)
+
+```python
+def compute_iou_3d(bbox1: np.ndarray, bbox2: np.ndarray) -> float:
+    """Compute 3D IoU between two axis-aligned bounding boxes."""
+
+def compute_center_distance(obj1: MapObject, obj2: MapObject) -> float:
+    """Compute Euclidean distance between object centroids."""
+
+def are_merge_compatible(label1: str, label2: str, merge_groups: list[set[str]]) -> bool:
+    """Check if two labels can be merged (same class or in same merge group)."""
+
+def find_best_match(
+    obs: Observation,
+    candidates: list[MapObject],
+    distance_threshold: float,
+    iou_threshold: float,
+) -> MapObject | None:
+    """Find best matching object by distance and IoU criteria."""
+```
+
+---
+
+## Phase 4: Object Mapper (mapper.py)
+
+```python
+class ObjectMapper:
+    """
+    Manages the semantic object map with two-tier state management.
+    Pending objects are promoted to permanent after detection_count >= threshold.
+    """
+
+    def __init__(
+        self,
+        voxel_size: float = 0.03,
+        confidence_threshold: float = 0.30,
+        distance_threshold: float = 0.5,
+        iou_threshold: float = 0.2,
+        min_detections_for_permanent: int = 3,
+        pending_ttl_s: float = 10.0,
+        max_inactive_frames: int = 20,
+    ) -> None:
+        self._pending_objects: dict[str, MapObject] = {}
+        self._permanent_objects: dict[str, MapObject] = {}
+        self._track_id_map: dict[int, str] = {}
+        self._lock = threading.RLock()
+        self._merge_groups: list[set[str]] = [
+            {"chair", "sofa", "couch", "armchair"},
+            {"table", "desk"},
+        ]
+
+    def update(self, observations: list[Observation], target_object: str | None = None) -> UpdateStats: ...
+    def _match_observation(self, obs: Observation) -> MapObject | None: ...
+    def _create_object(self, obs: Observation) -> MapObject: ...
+    def _check_promotion(self, obj: MapObject) -> None: ...
+    def _try_merge_objects(self) -> None: ...
+    def _prune_inactive(self, target_object: str | None) -> list[MapObject]: ...
+    def get_objects(self, include_pending: bool = False) -> list[MapObject]: ...
+```
+
+---
+
+## Phase 5: Mapping Node (mapping_node.py)
+
+### 5.1 Node Structure
+
+```python
+class SemanticMappingNode(Node):
+    """ROS2 node for semantic 3D object mapping."""
+
+    def __init__(self) -> None:
+        super().__init__("semantic_mapping_node")
+        self._declare_parameters()
+        self.mapper = ObjectMapper(...)
+        self.projection = pointcloud_utils
+        self.image_saver = ImageSaveQueue(...)
+
+        # Data buffers (with locks)
+        self._cloud_buffer: deque[tuple[float, np.ndarray]] = deque(maxlen=50)
+        self._odom_buffer: deque[tuple[float, Pose]] = deque(maxlen=100)
+        self._lock = threading.Lock()
+
+        self._create_subscriptions()
+        self._create_publishers()
+        self.create_timer(0.5, self._mapping_callback)
+```
+
+### 5.2 Subscriptions
+
+| Topic | Type | QoS |
+|-------|------|-----|
+| `/detection_result` | `detect_anything/DetectionResult` | Reliable |
+| `/registered_scan` | `PointCloud2` | Best Effort |
+| `/state_estimation` | `Odometry` | Best Effort |
+| `/object_type_answer` | `ObjectType` | Reliable |
+| `/target_object_instruction` | `TargetObjectInstruction` | Reliable |
+| `/viewpoint_rep_header` | `ViewpointRep` | Reliable |
+
+### 5.3 Publishers
+
+| Topic | Type | Description |
+|-------|------|-------------|
+| `/object_nodes` | `ObjectNode` | Main output for planner |
+| `/obj_points` | `PointCloud2` | Colored object cloud |
+| `/obj_boxes` | `MarkerArray` | Bbox wireframes |
+| `/obj_labels` | `MarkerArray` | Text labels |
+| `/annotated_image` | `Image` | Debug visualization |
+| `/object_type_query` | `ObjectType` | VLM query |
+
+---
+
+## Phase 6: Cloud-Image Projection (pointcloud_utils.py)
+
+```python
+def project_pointcloud_to_image(...): ...
+def segment_pointcloud_by_mask(...): ...
+def segment_pointclouds_from_projection(...): ...
+def transform_segmented_clouds_to_world(...): ...
+```
+
+---
+
+## Phase 7: Message Publishers (map_object_utils.py)
+
+```python
+def map_object_to_object_node(obj: MapObject, header: Header, status: str) -> ObjectNode | None: ...
+def create_bbox_markers(objects: list[MapObject], header: Header) -> MarkerArray: ...
+def create_label_markers(objects: list[MapObject], header: Header) -> MarkerArray: ...
+def create_object_pointcloud(objects: list[MapObject], header: Header) -> PointCloud2 | None: ...
+def create_delete_markers(deleted_ids: list[int], header: Header, namespace: str) -> MarkerArray: ...
+```
+
+---
+
+## Phase 8: Image Storage (storage.py)
+
+```python
+class ImageSaveQueue:
+    """Background worker for saving object images to disk."""
+
+    def __init__(self, output_dir: Path, max_queue_size: int = 100) -> None: ...
+    def start(self) -> None: ...
+    def stop(self) -> None: ...
+    def queue_save(self, object_id: str, image: np.ndarray, mask: np.ndarray | None = None) -> str: ...
+```
+
+---
+
+## Phase 9: Launch Files
+
+```python
+# semantic_mapping.launch.py
+DeclareLaunchArgument('platform', default_value='mecanum_sim')
+DeclareLaunchArgument('detection_topic', default_value='/detection_result')
+DeclareLaunchArgument('cloud_topic', default_value='/registered_scan')
+DeclareLaunchArgument('odom_topic', default_value='/state_estimation')
+DeclareLaunchArgument('voxel_size', default_value='0.03')
+DeclareLaunchArgument('confidence_threshold', default_value='0.30')
+DeclareLaunchArgument('target_object', default_value='')
+```
+
+---
+
+## Implementation Checklist
+
+### Completed
+- [x] Create `CMakeLists.txt` for message generation
+- [x] Port message definitions from tare_planner
+- [x] Set up `package.xml` with dependencies
+- [x] Implement `map_object.py` with VoxelFeatureManager, MapObject, Observation
+- [x] Port lazy computation patterns (centroid, bbox3d, regularization)
+- [x] Refactor `map_object.py` to use shared utilities from `vector_perception_utils`
+- [x] Add `normalize_angle`, `discretize_angles` to `transform_utils.py`
+- [x] Add `get_oriented_bbox` to `pointcloud_utils.py`
+- [x] Simplify `_compute_geometry()` using Open3D oriented bbox
+
+### Remaining
+- [ ] Implement `matching.py` with IoU, distance, merge-compatibility functions
+- [ ] Implement `mapper.py` with ObjectDB-style tiering
+- [ ] Implement cloud-image projection helpers in `vector_perception_utils/pointcloud_utils.py`
+- [ ] Implement `map_object_utils.py` message conversion functions
+- [ ] Implement `storage.py` with async queue
+- [ ] Implement `mapping_node.py` following detect_anything patterns
+- [ ] Create launch files and params.yaml
+- [ ] Integration testing with bag files
+
+---
+
+## Code Cleanup Items
+
+Items from VLM_ROS to preserve:
+- Memory fixes with `.copy()` on detection data extraction
+- VLM query threshold (`best_image_score > 500`)
+
+Items to remove:
+1. Unused publishers: `/cloud_image`
+2. Unused methods: `serialize_objs_to_bag()`, `is_merge_allowed()`, `handle_object_query()`
+3. Review constants: `BACKGROUND_OBJECTS`, `VERTICAL_OBJECTS`, `BIG_OBJECTS`, `SMALL_OBJECTS`
+4. Don't port: `byte_track`, `Grounded-SAM-2`, `mmconfig` directories
+
+---
+
+## Compatibility Checklist
+
+- [ ] `/object_nodes` publishes ObjectNode with exact schema
+- [ ] `/obj_points` publishes colored PointCloud2
+- [ ] `/obj_boxes` publishes MarkerArray wireframes
+- [ ] `/obj_labels` publishes MarkerArray text
+- [ ] `/object_type_query` publishes ObjectType for VLM
+- [ ] Subscribes to `/detection_result` from detect_anything
+- [ ] Subscribes to `/registered_scan` PointCloud2
+- [ ] Subscribes to `/state_estimation` Odometry
+- [ ] Subscribes to `/target_object_instruction`
+- [ ] Subscribes to `/viewpoint_rep_header`
+- [ ] Subscribes to `/object_type_answer`
+- [ ] Object positions, bbox corners, status flags match expected semantics
+- [ ] Delete markers properly remove objects from RViz
+
+---
+
+## Planner Dependencies
+
+The exploration planner (`sensor_coverage_planner_ground.cpp` in tare_planner) is tightly coupled to `ObjectNode`:
+- Subscribes to `/object_nodes` and stores per-object reps
+- Uses object positions, bbox corners, clouds, status, `is_asked_vlm`, and `img_path` for navigation decisions
+
+**Implication**: Keep the `ObjectNode` schema stable. Any new fields from the `MapObject` model can be mapped into extensions, but existing fields and semantics must remain for planner compatibility.
diff --git a/semantic_mapping/msg/ObjectNode.msg b/semantic_mapping/msg/ObjectNode.msg
new file mode 100644
index 0000000..b2d7686
--- /dev/null
+++ b/semantic_mapping/msg/ObjectNode.msg
@@ -0,0 +1,35 @@
+# Semantic object representation for planner integration.
+#
+# Each message represents a single object in the semantic map with its
+# 3D bounding box, point cloud, and metadata for VLM interaction.
+
+std_msgs/Header header
+
+# Object identity - may contain multiple merged track IDs
+int32[] object_id
+
+# Dominant class label (by weighted vote)
+string label
+
+# Object centroid in world frame
+geometry_msgs/Point position
+
+# 3D bounding box corners (8 points in world frame)
+# Order: [min_x,min_y,min_z], [max_x,min_y,min_z], [max_x,max_y,min_z], [min_x,max_y,min_z],
+#        [min_x,min_y,max_z], [max_x,min_y,max_z], [max_x,max_y,max_z], [min_x,max_y,max_z]
+geometry_msgs/Point[8] bbox3d
+
+# Object point cloud in world frame
+sensor_msgs/PointCloud2 cloud
+
+# Lifecycle status: "new", "updated", "unchanged"
+string status
+
+# Path to best saved image crop of this object
+string img_path
+
+# VLM interaction flags
+bool is_asked_vlm
+
+# Associated viewpoint ID (for viewpoint-triggered saves)
+int32 viewpoint_id
diff --git a/semantic_mapping/msg/ObjectType.msg b/semantic_mapping/msg/ObjectType.msg
new file mode 100644
index 0000000..ab02ea6
--- /dev/null
+++ b/semantic_mapping/msg/ObjectType.msg
@@ -0,0 +1,17 @@
+# VLM query/answer message for object relabeling.
+#
+# Used bidirectionally:
+# - Query (mapping -> VLM): object_id, img_path, labels[] populated
+# - Answer (VLM -> mapping): final_label populated with VLM decision
+
+# Object ID to query/update
+int32 object_id
+
+# Path to object image for VLM analysis
+string img_path
+
+# VLM-determined final label (populated in answer)
+string final_label
+
+# Candidate labels from detection (populated in query)
+string[] labels
diff --git a/semantic_mapping/msg/TargetObjectInstruction.msg b/semantic_mapping/msg/TargetObjectInstruction.msg
new file mode 100644
index 0000000..1a88005
--- /dev/null
+++ b/semantic_mapping/msg/TargetObjectInstruction.msg
@@ -0,0 +1,22 @@
+# User instruction for target object search.
+#
+# Specifies the target object to find and optional spatial/attribute
+# conditions relative to an anchor object.
+
+# Primary target object label to search for
+string target_object
+
+# Room condition (e.g., "in the kitchen")
+string room_condition
+
+# Spatial condition relative to anchor (e.g., "next to", "on top of")
+string spatial_condition
+
+# Attribute condition for target (e.g., "red", "large")
+string attribute_condition
+
+# Anchor object for spatial reference
+string anchor_object
+
+# Attribute condition for anchor object
+string attribute_condition_anchor
diff --git a/semantic_mapping/msg/ViewpointRep.msg b/semantic_mapping/msg/ViewpointRep.msg
new file mode 100644
index 0000000..5bb1bfe
--- /dev/null
+++ b/semantic_mapping/msg/ViewpointRep.msg
@@ -0,0 +1,8 @@
+# Viewpoint trigger message for image saving.
+#
+# Published by planner to request saving images at specific viewpoints.
+
+std_msgs/Header header
+
+# Unique viewpoint identifier
+int32 viewpoint_id
diff --git a/semantic_mapping/package.xml b/semantic_mapping/package.xml
index ca18f0e..4804d1a 100644
--- a/semantic_mapping/package.xml
+++ b/semantic_mapping/package.xml
@@ -3,25 +3,27 @@
 <package format="3">
   <name>semantic_mapping</name>
   <version>0.0.1</version>
-  <description>YOLO-E based detection utilities and ROS2 node for semantic mapping.</description>
+  <description>Semantic 3D object mapping node with VLM integration.</description>
   <maintainer email="alex.lin416@outlook.com">Alex Lin</maintainer>
   <license>MIT</license>
 
   <buildtool_depend>ament_cmake</buildtool_depend>
 
+  <build_depend>rosidl_default_generators</build_depend>
+  <exec_depend>rosidl_default_runtime</exec_depend>
+  <member_of_group>rosidl_interface_packages</member_of_group>
+
   <depend>rclpy</depend>
   <depend>std_msgs</depend>
   <depend>sensor_msgs</depend>
-  <depend>vision_msgs</depend>
+  <depend>geometry_msgs</depend>
+  <depend>nav_msgs</depend>
+  <depend>visualization_msgs</depend>
   <depend>cv_bridge</depend>
+  <depend>detect_anything</depend>
   <depend>vector_perception_utils</depend>
 
-  <test_depend>ament_copyright</test_depend>
-  <test_depend>ament_flake8</test_depend>
-  <test_depend>ament_pep257</test_depend>
-  <test_depend>python3-pytest</test_depend>
-
   <export>
-    <build_type>ament_python</build_type>
+    <build_type>ament_cmake</build_type>
   </export>
 </package>
diff --git a/semantic_mapping/resource/semantic_mapping b/semantic_mapping/resource/semantic_mapping
index 8f63025..e69de29 100644
--- a/semantic_mapping/resource/semantic_mapping
+++ b/semantic_mapping/resource/semantic_mapping
@@ -1 +0,0 @@
-semantic_mapping
diff --git a/semantic_mapping/scripts/semantic_mapping_node b/semantic_mapping/scripts/semantic_mapping_node
new file mode 100644
index 0000000..0312e88
--- /dev/null
+++ b/semantic_mapping/scripts/semantic_mapping_node
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+"""Entry point for semantic_mapping_node."""
+
+from semantic_mapping.mapping_node import main
+
+if __name__ == "__main__":
+    main()
diff --git a/semantic_mapping/semantic_mapping/__init__.py b/semantic_mapping/semantic_mapping/__init__.py
index 6ac0fcc..8df6f20 100644
--- a/semantic_mapping/semantic_mapping/__init__.py
+++ b/semantic_mapping/semantic_mapping/__init__.py
@@ -1 +1 @@
-"""Semantic mapping ROS2 package."""
+"""Semantic 3D object mapping for vector_perception_ros."""
diff --git a/semantic_mapping/semantic_mapping/map_object.py b/semantic_mapping/semantic_mapping/map_object.py
new file mode 100644
index 0000000..8140835
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/map_object.py
@@ -0,0 +1,683 @@
+"""Data models for semantic mapping.
+
+This module defines the core data structures for the semantic mapping system:
+- VoxelFeatureManager: Manages voxelized point clouds with observation angle voting
+- MapObject: Represents a tracked object in the semantic map
+- Observation: A single detection observation
+- ObjectStatus: Enum for object lifecycle status
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Optional
+
+import numpy as np
+from scipy.spatial import cKDTree
+
+from vector_perception_utils.transform_utils import discretize_angles, normalize_angle
+from vector_perception_utils.pointcloud_utils import (
+    compute_centroid,
+    create_pointcloud,
+)
+
+
+class ObjectStatus(Enum):
+    """Object lifecycle status (ObjectDB-style tiering)."""
+
+    PENDING = "pending"  # < min_detections threshold
+    PERMANENT = "permanent"  # >= threshold, confirmed object
+    DELETED = "deleted"  # Marked for removal
+
+
+@dataclass
+class Observation:
+    """A single detection observation to update a MapObject.
+
+    Attributes:
+        track_id: Tracker ID from detection.
+        label: Class label string.
+        confidence: Detection confidence score.
+        points: World-frame point cloud (N, 3).
+        robot_R: Robot rotation matrix (3, 3) at observation time.
+        robot_t: Robot translation vector (3,) at observation time.
+        mask: Optional 2D segmentation mask.
+        image: Optional RGB image crop for best-image selection.
+        timestamp: Observation timestamp in seconds.
+    """
+
+    track_id: int
+    label: str
+    confidence: float
+    points: np.ndarray
+    robot_R: np.ndarray
+    robot_t: np.ndarray
+    mask: Optional[np.ndarray] = None
+    image: Optional[np.ndarray] = None
+    timestamp: float = 0.0
+
+
+class VoxelFeatureManager:
+    """Manages voxelized point cloud with observation angle voting.
+
+    This class tracks 3D voxels representing an object, along with:
+    - Vote counts per voxel (how many times each voxel was observed)
+    - Observation angles (which directions each voxel was seen from)
+    - Regularization mask (which voxels pass shape filtering)
+
+    The observation angle voting helps distinguish between reliable points
+    (seen from multiple angles) and spurious points (seen from only one angle).
+
+    Attributes:
+        voxels: Point coordinates (N, 3) in world frame.
+        votes: Detection count per voxel (N,).
+        observation_angles: Binary angle histogram per voxel (N, num_bins).
+        regularized_mask: Boolean mask for valid voxels after clustering (N,).
+        voxel_size: Size of voxel grid cells in meters.
+        num_angle_bins: Number of discretized angle bins.
+    """
+
+    def __init__(
+        self,
+        points: np.ndarray,
+        voxel_size: float,
+        robot_R: np.ndarray,
+        robot_t: np.ndarray,
+        num_angle_bins: int = 15,
+    ) -> None:
+        """Initialize VoxelFeatureManager with initial points.
+
+        Args:
+            points: Initial point cloud (N, 3) in world frame.
+            voxel_size: Voxel grid cell size in meters.
+            robot_R: Robot rotation matrix (3, 3) at observation time.
+            robot_t: Robot translation vector (3,) at observation time.
+            num_angle_bins: Number of angle bins for observation tracking.
+        """
+        num_points = points.shape[0]
+
+        # Compute observation angles from robot position
+        vectors = points - robot_t
+        obs_angles = np.arctan2(vectors[:, 1], vectors[:, 0])
+        obs_bins = discretize_angles(obs_angles, num_angle_bins)
+
+        self.voxels = points.copy()
+        self.voxel_size = voxel_size
+        self.num_angle_bins = num_angle_bins
+        self._tree = cKDTree(points)
+
+        # Initialize vote counts
+        self.votes = np.ones(num_points, dtype=np.float32)
+
+        # Initialize observation angle histogram (binary: seen/not seen from each angle)
+        self.observation_angles = np.zeros((num_points, num_angle_bins), dtype=np.uint8)
+        self.observation_angles[np.arange(num_points), obs_bins] = 1
+
+        # Regularization mask (all False until regularize_shape is called)
+        self.regularized_mask = np.zeros(num_points, dtype=bool)
+
+        # Remove vote for dynamic object clearing (not commonly used)
+        self._remove_votes = np.zeros(num_points, dtype=np.int32)
+
+    def update(
+        self,
+        new_points: np.ndarray,
+        robot_R: np.ndarray,
+        robot_t: np.ndarray,
+    ) -> None:
+        """Add new points with KDTree-based merge and angle voting.
+
+        Points within voxel_size of existing points are merged (votes incremented).
+        Points farther away are added as new voxels.
+
+        Args:
+            new_points: New point cloud (M, 3) in world frame.
+            robot_R: Robot rotation matrix (3, 3) at observation time.
+            robot_t: Robot translation vector (3,) at observation time.
+        """
+        if new_points.shape[0] == 0:
+            return
+
+        # Compute observation angles for new points
+        vectors = new_points - robot_t
+        obs_angles = np.arctan2(vectors[:, 1], vectors[:, 0])
+        obs_bins = discretize_angles(obs_angles, self.num_angle_bins)
+
+        # Find nearest existing voxels
+        distances, indices = self._tree.query(new_points)
+
+        # Merge points within voxel_size
+        merge_mask = distances < self.voxel_size
+        merge_indices = indices[merge_mask]
+        merge_bins = obs_bins[merge_mask]
+
+        self.votes[merge_indices] += 1
+        self.observation_angles[merge_indices, merge_bins] = 1
+
+        # Add new points that don't merge
+        new_mask = ~merge_mask
+        if np.any(new_mask):
+            new_voxels = new_points[new_mask]
+            new_bins = obs_bins[new_mask]
+            num_new = new_voxels.shape[0]
+
+            # Extend arrays
+            self.voxels = np.vstack([self.voxels, new_voxels])
+            self.votes = np.concatenate([self.votes, np.ones(num_new, dtype=np.float32)])
+
+            new_obs = np.zeros((num_new, self.num_angle_bins), dtype=np.uint8)
+            new_obs[np.arange(num_new), new_bins] = 1
+            self.observation_angles = np.vstack([self.observation_angles, new_obs])
+
+            self.regularized_mask = np.concatenate(
+                [self.regularized_mask, np.zeros(num_new, dtype=bool)]
+            )
+            self._remove_votes = np.concatenate(
+                [self._remove_votes, np.zeros(num_new, dtype=np.int32)]
+            )
+
+            # Rebuild KDTree
+            self._tree = cKDTree(self.voxels)
+
+    def merge_from(self, other: VoxelFeatureManager) -> None:
+        """Merge voxels from another VoxelFeatureManager.
+
+        Args:
+            other: Another VoxelFeatureManager to merge from.
+        """
+        if other.voxels.shape[0] == 0:
+            return
+
+        distances, indices = self._tree.query(other.voxels)
+
+        # Merge overlapping voxels
+        merge_mask = distances < self.voxel_size
+        merge_indices = indices[merge_mask]
+
+        self.votes[merge_indices] += other.votes[merge_mask]
+        self.observation_angles[merge_indices] = np.logical_or(
+            self.observation_angles[merge_indices],
+            other.observation_angles[merge_mask],
+        ).astype(np.uint8)
+
+        # Add non-overlapping voxels
+        new_mask = ~merge_mask
+        if np.any(new_mask):
+            self.voxels = np.vstack([self.voxels, other.voxels[new_mask]])
+            self.votes = np.concatenate([self.votes, other.votes[new_mask]])
+            self.observation_angles = np.vstack(
+                [self.observation_angles, other.observation_angles[new_mask]]
+            )
+            self.regularized_mask = np.concatenate(
+                [self.regularized_mask, other.regularized_mask[new_mask]]
+            )
+            self._remove_votes = np.concatenate(
+                [self._remove_votes, np.zeros(np.sum(new_mask), dtype=np.int32)]
+            )
+            self._tree = cKDTree(self.voxels)
+
+    def get_valid_indices(
+        self,
+        diversity_percentile: float = 0.3,
+        use_regularized: bool = True,
+    ) -> np.ndarray:
+        """Get indices of valid voxels based on observation angle diversity.
+
+        Voxels are ranked by their observation angle diversity (number of angles
+        from which they were observed), weighted by vote count. The top percentile
+        of voxels are returned.
+
+        Args:
+            diversity_percentile: Fraction of total weight to include (0-1).
+            use_regularized: If True, only consider voxels in regularized_mask.
+
+        Returns:
+            Indices of valid voxels.
+        """
+        if use_regularized:
+            mask = self.regularized_mask
+            if not np.any(mask):
+                return np.array([], dtype=int)
+            obs_angles = self.observation_angles[mask]
+            votes = self.votes[mask]
+        else:
+            obs_angles = self.observation_angles
+            votes = self.votes
+
+        if len(obs_angles) == 0:
+            return np.array([], dtype=int)
+
+        # Compute diversity score (number of angles observed)
+        angle_diversity = np.sum(obs_angles, axis=1)
+
+        # Sort by diversity (ascending)
+        sorted_indices = np.argsort(angle_diversity)
+        sorted_diversity = angle_diversity[sorted_indices]
+        sorted_weights = votes[sorted_indices]
+
+        # Weight by diversity * votes
+        weighted = sorted_diversity * sorted_weights
+        total_weight = np.sum(weighted)
+
+        if total_weight == 0:
+            return np.array([], dtype=int)
+
+        # Find cutoff index for percentile
+        target_weight = (1 - diversity_percentile) * total_weight
+        cumsum = np.cumsum(weighted)
+        cutoff_idx = np.searchsorted(cumsum, target_weight)
+
+        # Return indices above cutoff (higher diversity)
+        return sorted_indices[cutoff_idx:]
+
+    def get_valid_voxels(
+        self,
+        diversity_percentile: float = 0.3,
+        use_regularized: bool = True,
+    ) -> np.ndarray:
+        """Get valid voxel coordinates.
+
+        Args:
+            diversity_percentile: Fraction of total weight to include (0-1).
+            use_regularized: If True, only consider voxels in regularized_mask.
+
+        Returns:
+            Valid voxel coordinates (M, 3).
+        """
+        indices = self.get_valid_indices(diversity_percentile, use_regularized)
+        if use_regularized and np.any(self.regularized_mask):
+            voxels = self.voxels[self.regularized_mask]
+        else:
+            voxels = self.voxels
+
+        if len(indices) == 0:
+            return np.empty((0, 3), dtype=np.float32)
+        return voxels[indices]
+
+    @property
+    def num_voxels(self) -> int:
+        """Total number of voxels."""
+        return self.voxels.shape[0]
+
+    @property
+    def num_regularized(self) -> int:
+        """Number of voxels passing regularization."""
+        return int(np.sum(self.regularized_mask))
+
+
+@dataclass
+class MapObject:
+    """Represents a tracked object in the semantic map.
+
+    This class combines patterns from VLM_ROS SingleObject and dimos Object,
+    with ObjectDB-style lifecycle management.
+
+    Attributes:
+        object_id: Unique identifier for this object (UUID string).
+        track_ids: List of associated tracker IDs (can merge multiple).
+        voxel_manager: Manages the object's voxelized point cloud.
+        class_votes: Label -> observation count mapping.
+        confidence_scores: Label -> best confidence score mapping.
+        points_count: Label -> total points count mapping.
+        robot_poses: List of robot poses when object was observed.
+        detection_count: Total number of detections.
+        inactive_frames: Frames since last update.
+        last_update_time: Timestamp of last update.
+        status: Object lifecycle status.
+        is_asked_vlm: Whether VLM has been queried for this object.
+        updated_by_vlm: Whether object was relabeled by VLM.
+        best_image_path: Path to best saved image crop.
+        best_image_score: Score of best saved image.
+        centroid: Cached centroid position (None if needs recompute).
+        bbox3d_corners: Cached 3D bbox corners (None if needs recompute).
+    """
+
+    object_id: str
+    track_ids: list[int]
+    voxel_manager: VoxelFeatureManager
+
+    # Classification (voted)
+    class_votes: dict[str, int] = field(default_factory=dict)
+    confidence_scores: dict[str, float] = field(default_factory=dict)
+    points_count: dict[str, int] = field(default_factory=dict)
+
+    # Robot poses at observation times
+    robot_poses: list[dict] = field(default_factory=list)
+
+    # Lifecycle (ObjectDB-style)
+    detection_count: int = 1
+    inactive_frames: int = 0
+    last_update_time: float = 0.0
+    status: ObjectStatus = ObjectStatus.PENDING
+
+    # VLM interaction
+    is_asked_vlm: bool = False
+    updated_by_vlm: bool = False
+
+    # Image storage
+    best_image_path: Optional[str] = None
+    best_image_score: float = 0.0
+
+    # Cached geometry (None means needs recompute)
+    _centroid: Optional[np.ndarray] = field(default=None, repr=False)
+    _bbox3d_corners: Optional[np.ndarray] = field(default=None, repr=False)
+    _needs_recompute: bool = field(default=True, repr=False)
+
+    # Configuration
+    _diversity_percentile: float = field(default=0.85, repr=False)
+    _angle_threshold: float = field(default=np.deg2rad(5), repr=False)
+    _distance_threshold: float = field(default=0.3, repr=False)
+
+    @classmethod
+    def from_observation(
+        cls,
+        object_id: str,
+        obs: Observation,
+        voxel_size: float = 0.03,
+        num_angle_bins: int = 15,
+    ) -> MapObject:
+        """Create a new MapObject from an initial observation.
+
+        Args:
+            object_id: Unique identifier for this object.
+            obs: Initial observation.
+            voxel_size: Voxel grid cell size in meters.
+            num_angle_bins: Number of angle bins for observation tracking.
+
+        Returns:
+            New MapObject instance.
+        """
+        voxel_manager = VoxelFeatureManager(
+            points=obs.points,
+            voxel_size=voxel_size,
+            robot_R=obs.robot_R,
+            robot_t=obs.robot_t,
+            num_angle_bins=num_angle_bins,
+        )
+
+        obj = cls(
+            object_id=object_id,
+            track_ids=[obs.track_id],
+            voxel_manager=voxel_manager,
+            class_votes={obs.label: 1},
+            confidence_scores={obs.label: obs.confidence},
+            points_count={obs.label: len(obs.points)},
+            robot_poses=[{"R": obs.robot_R.copy(), "t": obs.robot_t.copy()}],
+            last_update_time=obs.timestamp,
+        )
+        return obj
+
+    def _invalidate_cache(self) -> None:
+        """Mark cached geometry as needing recompute."""
+        self._needs_recompute = True
+        self._centroid = None
+        self._bbox3d_corners = None
+
+    def _is_similar_pose(self, new_R: np.ndarray, new_t: np.ndarray) -> bool:
+        """Check if a new pose is similar to existing poses.
+
+        Used to avoid redundant updates from the same viewpoint.
+        Compares observation angle and distance to object center.
+
+        Args:
+            new_R: New robot rotation matrix (3, 3). Not used but kept for API.
+            new_t: New robot translation vector (3,).
+
+        Returns:
+            True if pose is similar to an existing pose.
+        """
+        _ = new_R  # Unused but kept for API consistency
+        obj_center = self.centroid
+        if obj_center is None:
+            obj_center = compute_centroid(self.voxel_manager.voxels)
+
+        for pose in self.robot_poses:
+            old_t = pose["t"]  # R not used for similarity check
+
+            old_to_obj = obj_center - old_t
+            new_to_obj = obj_center - new_t
+
+            old_angle = np.arctan2(old_to_obj[1], old_to_obj[0])
+            new_angle = np.arctan2(new_to_obj[1], new_to_obj[0])
+            angle_diff = abs(normalize_angle(np.array([new_angle - old_angle]))[0])
+
+            old_dist = np.linalg.norm(old_to_obj)
+            new_dist = np.linalg.norm(new_to_obj)
+            dist_diff = abs(new_dist - old_dist)
+
+            if angle_diff < self._angle_threshold and dist_diff < self._distance_threshold:
+                return True
+
+        return False
+
+    def update(self, obs: Observation) -> None:
+        """Update object with a new observation.
+
+        Args:
+            obs: New observation to incorporate.
+        """
+        # Update voxel manager
+        self.voxel_manager.update(obs.points, obs.robot_R, obs.robot_t)
+        self.detection_count += 1
+        self.inactive_frames = 0
+        self.last_update_time = obs.timestamp
+
+        # Check if pose is new (not similar to existing)
+        is_similar = self._is_similar_pose(obs.robot_R, obs.robot_t)
+
+        if not is_similar:
+            # Update class statistics
+            self.class_votes[obs.label] = self.class_votes.get(obs.label, 0) + 1
+            self.confidence_scores[obs.label] = max(
+                self.confidence_scores.get(obs.label, 0), obs.confidence
+            )
+            self.points_count[obs.label] = (
+                self.points_count.get(obs.label, 0) + len(obs.points)
+            )
+
+            # Add pose
+            self.robot_poses.append({"R": obs.robot_R.copy(), "t": obs.robot_t.copy()})
+
+        # Add track ID if new
+        if obs.track_id not in self.track_ids:
+            self.track_ids.append(obs.track_id)
+
+        self._invalidate_cache()
+
+    def merge(self, other: MapObject) -> None:
+        """Merge another MapObject into this one.
+
+        Args:
+            other: Object to merge from.
+        """
+        # Merge track IDs
+        for tid in other.track_ids:
+            if tid not in self.track_ids:
+                self.track_ids.append(tid)
+
+        # Merge voxel managers
+        self.voxel_manager.merge_from(other.voxel_manager)
+
+        # Merge class statistics
+        for label, count in other.class_votes.items():
+            self.class_votes[label] = self.class_votes.get(label, 0) + count
+            self.confidence_scores[label] = max(
+                self.confidence_scores.get(label, 0),
+                other.confidence_scores.get(label, 0),
+            )
+            self.points_count[label] = (
+                self.points_count.get(label, 0) + other.points_count.get(label, 0)
+            )
+
+        # Merge poses (only unique ones)
+        for pose in other.robot_poses:
+            if not self._is_similar_pose(pose["R"], pose["t"]):
+                self.robot_poses.append(pose)
+
+        # Update lifecycle
+        self.detection_count += other.detection_count
+        self.last_update_time = max(self.last_update_time, other.last_update_time)
+        self.is_asked_vlm = self.is_asked_vlm or other.is_asked_vlm
+
+        self._invalidate_cache()
+
+    @property
+    def label(self) -> str:
+        """Get dominant class label by weighted vote.
+
+        Returns:
+            Most likely class label based on points_count * confidence.
+        """
+        if not self.class_votes:
+            return "unknown"
+
+        # Compute weighted scores
+        weighted_scores = {}
+        for lbl in self.class_votes:
+            pts = self.points_count.get(lbl, 0)
+            conf = self.confidence_scores.get(lbl, 0)
+            weighted_scores[lbl] = pts * conf
+
+        return max(weighted_scores, key=weighted_scores.get)
+
+    @property
+    def confidence(self) -> float:
+        """Get confidence of the dominant label."""
+        return self.confidence_scores.get(self.label, 0.0)
+
+    @property
+    def centroid(self) -> Optional[np.ndarray]:
+        """Compute centroid weighted by observation angles.
+
+        Returns:
+            Centroid position (3,) or None if no valid voxels.
+        """
+        if self._needs_recompute or self._centroid is None:
+            self._compute_geometry()
+        return self._centroid
+
+    @property
+    def bbox3d_corners(self) -> Optional[np.ndarray]:
+        """Compute axis-aligned 3D bounding box corners.
+
+        Returns:
+            8 corner points (8, 3) or None if no valid voxels.
+        """
+        if self._needs_recompute or self._bbox3d_corners is None:
+            self._compute_geometry()
+        return self._bbox3d_corners
+
+    def _compute_geometry(self) -> None:
+        """Compute cached centroid and bbox from valid voxels using Open3D.
+
+        Returns:
+            None. Updates cached centroid and bbox in-place.
+        """
+        # Get valid voxels (regularized first, then fallback to all)
+        valid_voxels = self.voxel_manager.get_valid_voxels(
+            diversity_percentile=self._diversity_percentile,
+            use_regularized=True,
+        )
+        if len(valid_voxels) == 0:
+            valid_voxels = self.voxel_manager.get_valid_voxels(
+                diversity_percentile=self._diversity_percentile,
+                use_regularized=False,
+            )
+
+        if len(valid_voxels) < 4:
+            self._centroid = compute_centroid(valid_voxels) if len(valid_voxels) > 0 else None
+            self._bbox3d_corners = None
+        else:
+            pcd = create_pointcloud(valid_voxels)
+            obb = pcd.get_minimal_oriented_bounding_box()
+            self._centroid = np.asarray(obb.center)
+            self._bbox3d_corners = np.asarray(obb.get_box_points())
+
+        self._needs_recompute = False
+
+    def regularize_shape(self, percentile: float = 0.85) -> None:
+        """Apply DBSCAN clustering to filter outlier voxels.
+
+        This method clusters the voxels using DBSCAN, then selects clusters
+        by descending weight (observation angle diversity) until the target
+        percentile of total weight is reached. Clusters far from the main
+        cluster are down-weighted.
+
+        Args:
+            percentile: Target fraction of total weight to include (0-1).
+        """
+        voxels = self.voxel_manager.voxels
+        if len(voxels) < 3:
+            self.voxel_manager.regularized_mask = np.ones(len(voxels), dtype=bool)
+            self._invalidate_cache()
+            return
+
+        # DBSCAN clustering
+        pcd = create_pointcloud(voxels)
+
+        eps = self.voxel_manager.voxel_size * 2.5
+        min_points = 10 if self.detection_count >= 3 else 5
+
+        labels = np.array(pcd.cluster_dbscan(eps=eps, min_points=min_points))
+        unique_labels = np.unique(labels)
+
+        # Collect cluster info
+        cluster_masks = []
+        cluster_weights = []
+        cluster_centroids = []
+
+        for label in unique_labels:
+            if label == -1:  # Noise
+                continue
+            mask = labels == label
+            weight = float(np.sum(self.voxel_manager.observation_angles[mask]))
+            if weight < 5:  # Skip tiny clusters
+                continue
+            cluster_masks.append(mask)
+            cluster_weights.append(weight)
+            cluster_centroids.append(compute_centroid(voxels[mask]))
+
+        if len(cluster_masks) == 0:
+            self.voxel_manager.regularized_mask = np.zeros(len(voxels), dtype=bool)
+        elif len(cluster_masks) == 1:
+            self.voxel_manager.regularized_mask = cluster_masks[0]
+        else:
+            weights = np.array(cluster_weights)
+            centroids = np.array(cluster_centroids)
+
+            # Find main cluster
+            main_idx = int(np.argmax(weights))
+            main_centroid = centroids[main_idx]
+
+            # Distance-based weight adjustment
+            dists = np.linalg.norm(centroids - main_centroid, axis=1)
+            dist_base = np.max(dists) if np.max(dists) > 0 else 1.0
+
+            alpha = 3.0  # Decay rate
+            scale = np.exp(-alpha * dists / dist_base)
+            adjusted_weights = weights * scale
+
+            # Select clusters up to percentile
+            target = percentile * np.sum(adjusted_weights)
+            order = np.argsort(adjusted_weights)[::-1]
+
+            chosen = np.zeros(len(cluster_masks), dtype=bool)
+            acc = 0.0
+
+            for i in order:
+                if acc + adjusted_weights[i] <= target + 1e-8:
+                    chosen[i] = True
+                    acc += adjusted_weights[i]
+                if acc >= target:
+                    break
+
+            # Merge chosen clusters
+            valid_mask = np.zeros(len(voxels), dtype=bool)
+            for i, cmask in enumerate(cluster_masks):
+                if chosen[i]:
+                    valid_mask |= cmask
+
+            self.voxel_manager.regularized_mask = valid_mask
+
+        self._invalidate_cache()
diff --git a/semantic_mapping/semantic_mapping/map_object_utils.py b/semantic_mapping/semantic_mapping/map_object_utils.py
new file mode 100644
index 0000000..6bc3d8d
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/map_object_utils.py
@@ -0,0 +1,217 @@
+"""ROS message helpers for MapObject publishers."""
+
+from __future__ import annotations
+
+import zlib
+from typing import Optional
+
+import numpy as np
+from geometry_msgs.msg import Point
+from sensor_msgs.msg import PointCloud2
+from std_msgs.msg import Header
+from visualization_msgs.msg import Marker, MarkerArray
+
+from semantic_mapping.msg import ObjectNode
+from vector_perception_utils.pointcloud_utils import create_pointcloud2_msg
+
+from .map_object import MapObject
+
+
+def map_object_to_object_node(
+    obj: MapObject,
+    header: Header,
+    status: str = "new",
+    viewpoint_id: int = -1,
+) -> Optional[ObjectNode]:
+    """Convert a MapObject into an ObjectNode message.
+
+    Args:
+        obj: MapObject instance.
+        header: ROS header to attach to the message.
+        status: Lifecycle status string ("new", "updated", "unchanged").
+        viewpoint_id: Viewpoint identifier for image saving.
+
+    Returns:
+        ObjectNode message or None if required geometry is missing.
+    """
+    if obj.centroid is None or obj.bbox3d_corners is None:
+        return None
+
+    msg = ObjectNode()
+    msg.header = header
+    msg.object_id = obj.track_ids
+    msg.label = obj.label
+    msg.status = status
+    msg.position.x = float(obj.centroid[0])
+    msg.position.y = float(obj.centroid[1])
+    msg.position.z = float(obj.centroid[2])
+
+    for i in range(8):
+        msg.bbox3d[i].x = float(obj.bbox3d_corners[i, 0])
+        msg.bbox3d[i].y = float(obj.bbox3d_corners[i, 1])
+        msg.bbox3d[i].z = float(obj.bbox3d_corners[i, 2])
+
+    voxels = obj.voxel_manager.get_valid_voxels(
+        diversity_percentile=obj._diversity_percentile,
+        use_regularized=True,
+    )
+    if len(voxels) == 0:
+        voxels = obj.voxel_manager.get_valid_voxels(
+            diversity_percentile=obj._diversity_percentile,
+            use_regularized=False,
+        )
+    if len(voxels) > 0:
+        seed = zlib.crc32(obj.label.encode("utf-8"))
+        color = np.array(
+            [
+                (seed & 0xFF) / 255.0,
+                ((seed >> 8) & 0xFF) / 255.0,
+                ((seed >> 16) & 0xFF) / 255.0,
+            ],
+            dtype=np.float32,
+        )
+        colors = np.tile(color, (voxels.shape[0], 1))
+        msg.cloud = create_pointcloud2_msg(points=voxels, colors=colors, header=header)
+
+    msg.img_path = obj.best_image_path or ""
+    msg.is_asked_vlm = obj.is_asked_vlm
+    msg.viewpoint_id = -1 if obj.updated_by_vlm else viewpoint_id
+    return msg
+
+
+def create_bbox_markers(
+    objects: list[MapObject], header: Header, namespace: str = "bbox"
+) -> MarkerArray:
+    """Create wireframe bounding box markers for MapObjects."""
+    markers = MarkerArray()
+    edges = [
+        (0, 1),
+        (1, 2),
+        (2, 3),
+        (3, 0),
+        (4, 5),
+        (5, 6),
+        (6, 7),
+        (7, 4),
+        (0, 4),
+        (1, 5),
+        (2, 6),
+        (3, 7),
+    ]
+
+    for obj in objects:
+        if obj.bbox3d_corners is None:
+            continue
+        marker = Marker()
+        marker.header = header
+        marker.ns = namespace
+        marker.id = int(obj.track_ids[0]) if obj.track_ids else 0
+        marker.type = Marker.LINE_LIST
+        marker.action = Marker.ADD
+        marker.scale.x = 0.02
+
+        seed = zlib.crc32(obj.label.encode("utf-8"))
+        marker.color.r = (seed & 0xFF) / 255.0
+        marker.color.g = ((seed >> 8) & 0xFF) / 255.0
+        marker.color.b = ((seed >> 16) & 0xFF) / 255.0
+        marker.color.a = 1.0
+
+        corners = obj.bbox3d_corners
+        for i, j in edges:
+            marker.points.append(Point(x=float(corners[i, 0]), y=float(corners[i, 1]), z=float(corners[i, 2])))
+            marker.points.append(Point(x=float(corners[j, 0]), y=float(corners[j, 1]), z=float(corners[j, 2])))
+
+        markers.markers.append(marker)
+
+    return markers
+
+
+def create_label_markers(
+    objects: list[MapObject],
+    header: Header,
+    namespace: str = "labels",
+    text_height: float = 0.4,
+) -> MarkerArray:
+    """Create text label markers for MapObjects."""
+    markers = MarkerArray()
+
+    for obj in objects:
+        if obj.centroid is None:
+            continue
+        marker = Marker()
+        marker.header = header
+        marker.ns = namespace
+        marker.id = int(obj.track_ids[0]) if obj.track_ids else 0
+        marker.type = Marker.TEXT_VIEW_FACING
+        marker.action = Marker.ADD
+        marker.scale.z = text_height
+        marker.text = obj.label
+        marker.pose.position.x = float(obj.centroid[0])
+        marker.pose.position.y = float(obj.centroid[1])
+        marker.pose.position.z = float(obj.centroid[2]) + 0.3
+
+        seed = zlib.crc32(obj.label.encode("utf-8"))
+        marker.color.r = (seed & 0xFF) / 255.0
+        marker.color.g = ((seed >> 8) & 0xFF) / 255.0
+        marker.color.b = ((seed >> 16) & 0xFF) / 255.0
+        marker.color.a = 1.0
+
+        markers.markers.append(marker)
+
+    return markers
+
+
+def create_object_pointcloud(objects: list[MapObject], header: Header) -> Optional[PointCloud2]:
+    """Create a colored point cloud for a list of MapObjects."""
+    points_list = []
+    colors_list = []
+
+    for obj in objects:
+        voxels = obj.voxel_manager.get_valid_voxels(
+            diversity_percentile=obj._diversity_percentile,
+            use_regularized=True,
+        )
+        if len(voxels) == 0:
+            voxels = obj.voxel_manager.get_valid_voxels(
+                diversity_percentile=obj._diversity_percentile,
+                use_regularized=False,
+            )
+        if len(voxels) == 0:
+            continue
+
+        seed = zlib.crc32(obj.label.encode("utf-8"))
+        color = np.array(
+            [
+                (seed & 0xFF) / 255.0,
+                ((seed >> 8) & 0xFF) / 255.0,
+                ((seed >> 16) & 0xFF) / 255.0,
+            ],
+            dtype=np.float32,
+        )
+        colors = np.tile(color, (voxels.shape[0], 1))
+        points_list.append(voxels)
+        colors_list.append(colors)
+
+    if len(points_list) == 0:
+        return None
+
+    points = np.concatenate(points_list, axis=0)
+    colors = np.concatenate(colors_list, axis=0)
+    return create_pointcloud2_msg(points=points, colors=colors, header=header)
+
+
+def create_delete_markers(
+    deleted_ids: list[int],
+    header: Header,
+    namespace: str = "bbox",
+) -> MarkerArray:
+    """Create deletion markers for a list of marker IDs."""
+    markers = MarkerArray()
+    for marker_id in deleted_ids:
+        marker = Marker()
+        marker.header = header
+        marker.ns = namespace
+        marker.id = int(marker_id)
+        marker.action = Marker.DELETE
+        markers.markers.append(marker)
+    return markers
diff --git a/semantic_mapping/semantic_mapping/mapper.py b/semantic_mapping/semantic_mapping/mapper.py
new file mode 100644
index 0000000..f8a10ac
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/mapper.py
@@ -0,0 +1,257 @@
+"""Object map manager for semantic mapping."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+import threading
+import time
+import uuid
+
+from .matching import compute_iou_3d_with_ratios, find_best_match
+from .map_object import MapObject, ObjectStatus, Observation
+
+
+@dataclass
+class UpdateStats:
+    """Summary of updates applied in a mapper step."""
+
+    created: int = 0
+    updated: int = 0
+    merged: int = 0
+    deleted: int = 0
+    promoted: int = 0
+    total: int = 0
+
+
+class ObjectMapper:
+    """Manages the semantic object map with two-tier state management."""
+
+    def __init__(
+        self,
+        voxel_size: float = 0.03,
+        confidence_threshold: float = 0.30,
+        distance_threshold: float = 0.5,
+        iou_threshold: float = 0.2,
+        min_detections_for_permanent: int = 3,
+        pending_ttl_s: float = 10.0,
+        max_inactive_frames: int = 20,
+        num_angle_bins: int = 15,
+    ) -> None:
+        """Initialize the object mapper and its thresholds.
+
+        Args:
+            voxel_size: Voxel grid cell size in meters.
+            confidence_threshold: Minimum detection confidence to accept.
+            distance_threshold: Max centroid distance for spatial matching.
+            iou_threshold: IoU threshold for merge checks.
+            min_detections_for_permanent: Detections before promotion.
+            pending_ttl_s: Time-to-live for pending objects.
+            max_inactive_frames: Frames before pruning inactive objects.
+            num_angle_bins: Observation angle bins for voxel manager.
+        """
+        self._pending_objects: dict[str, MapObject] = {}
+        self._permanent_objects: dict[str, MapObject] = {}
+        self._track_id_map: dict[int, str] = {}
+        self._lock = threading.RLock()
+
+        self._voxel_size = voxel_size
+        self._confidence_threshold = confidence_threshold
+        self._distance_threshold = distance_threshold
+        self._iou_threshold = iou_threshold
+        self._min_detections_for_permanent = min_detections_for_permanent
+        self._pending_ttl_s = pending_ttl_s
+        self._max_inactive_frames = max_inactive_frames
+        self._num_angle_bins = num_angle_bins
+
+    def update(self, observations: list[Observation], target_object: str | None = None) -> UpdateStats:
+        """Update the map with new observations.
+
+        Args:
+            observations: List of new observations to integrate.
+            target_object: Optional label to protect from pruning.
+
+        Returns:
+            UpdateStats summarizing changes applied.
+        """
+        stats = UpdateStats()
+        now = max((obs.timestamp for obs in observations), default=time.time())
+        updated_ids: set[str] = set()
+
+        with self._lock:
+            for obs in observations:
+                if obs.confidence < self._confidence_threshold:
+                    continue
+
+                obj = self._match_observation(obs)
+                if obj is None:
+                    obj = self._create_object(obs)
+                    self._pending_objects[obj.object_id] = obj
+                    stats.created += 1
+                else:
+                    obj.update(obs)
+                    stats.updated += 1
+
+                updated_ids.add(obj.object_id)
+                for track_id in obj.track_ids:
+                    self._track_id_map[track_id] = obj.object_id
+
+                if obj.status == ObjectStatus.PENDING:
+                    self._check_promotion(obj)
+                    if obj.status == ObjectStatus.PERMANENT:
+                        stats.promoted += 1
+
+            for obj in list(self._pending_objects.values()) + list(self._permanent_objects.values()):
+                if obj.object_id not in updated_ids:
+                    obj.inactive_frames += 1
+
+            stats.deleted += len(self._prune_inactive(target_object, now))
+            stats.merged += self.merge_objects()
+            stats.total = len(self._pending_objects) + len(self._permanent_objects)
+
+        return stats
+
+    def _match_observation(self, obs: Observation) -> MapObject | None:
+        """Match an observation to an existing object."""
+        if obs.track_id in self._track_id_map:
+            obj_id = self._track_id_map[obs.track_id]
+            if obj_id in self._permanent_objects:
+                return self._permanent_objects[obj_id]
+            if obj_id in self._pending_objects:
+                return self._pending_objects[obj_id]
+
+        candidates = list(self._permanent_objects.values()) + list(self._pending_objects.values())
+        return find_best_match(
+            obs=obs,
+            candidates=candidates,
+            distance_threshold=self._distance_threshold,
+            iou_threshold=self._iou_threshold,
+        )
+
+    def _create_object(self, obs: Observation) -> MapObject:
+        """Create a new MapObject from an observation."""
+        obj = MapObject.from_observation(
+            object_id=str(uuid.uuid4()),
+            obs=obs,
+            voxel_size=self._voxel_size,
+            num_angle_bins=self._num_angle_bins,
+        )
+        self._track_id_map[obs.track_id] = obj.object_id
+        return obj
+
+    def _check_promotion(self, obj: MapObject) -> None:
+        """Promote object from pending to permanent if threshold met."""
+        if obj.status != ObjectStatus.PENDING:
+            return
+        if obj.detection_count < self._min_detections_for_permanent:
+            return
+        if obj.object_id in self._pending_objects:
+            self._pending_objects.pop(obj.object_id)
+        obj.status = ObjectStatus.PERMANENT
+        self._permanent_objects[obj.object_id] = obj
+
+    def merge_objects(self) -> int:
+        """Merge overlapping permanent objects.
+
+        Returns:
+            Number of merges performed.
+        """
+        merged = 0
+        objs = list(self._permanent_objects.values())
+        i = 0
+        while i < len(objs):
+            obj = objs[i]
+            j = i + 1
+            while j < len(objs):
+                other = objs[j]
+                should_merge = False
+
+                if set(obj.track_ids) & set(other.track_ids):
+                    should_merge = True
+                else:
+                    if obj.centroid is None or other.centroid is None:
+                        j += 1
+                        continue
+                    dist = float(((obj.centroid - other.centroid) ** 2).sum() ** 0.5)
+                    if dist > self._distance_threshold:
+                        j += 1
+                        continue
+
+                    obj_corners = obj.bbox3d_corners
+                    other_corners = other.bbox3d_corners
+                    obj_extent = (
+                        obj_corners.max(axis=0) - obj_corners.min(axis=0)
+                        if obj_corners is not None and obj_corners.size > 0
+                        else None
+                    )
+                    other_extent = (
+                        other_corners.max(axis=0) - other_corners.min(axis=0)
+                        if other_corners is not None and other_corners.size > 0
+                        else None
+                    )
+
+                    dist_merge = dist < 0.25
+                    if obj_extent is not None and other_extent is not None:
+                        dist_thresh = (((obj_extent / 2 + other_extent / 2) / 2) ** 2).sum() ** 0.5
+                        dist_merge = dist_merge or dist < dist_thresh * 0.5
+
+                    if dist_merge:
+                        should_merge = True
+                    elif obj_corners is not None and other_corners is not None:
+                        iou, ratio_obj, ratio_other = compute_iou_3d_with_ratios(
+                            obj_corners, other_corners
+                        )
+                        if (
+                            iou > self._iou_threshold
+                            or ratio_obj > 0.4
+                            or ratio_other > 0.4
+                        ):
+                            should_merge = True
+
+                if should_merge:
+                    obj.merge(other)
+                    for track_id in other.track_ids:
+                        self._track_id_map[track_id] = obj.object_id
+                    self._permanent_objects.pop(other.object_id, None)
+                    objs.pop(j)
+                    merged += 1
+                else:
+                    j += 1
+            i += 1
+
+        return merged
+
+    def _prune_inactive(self, target_object: str | None, now: float) -> list[MapObject]:
+        """Remove inactive pending/permanent objects."""
+        deleted: list[MapObject] = []
+
+        for obj_id, obj in list(self._pending_objects.items()):
+            if (now - obj.last_update_time) > self._pending_ttl_s or (
+                obj.inactive_frames > self._max_inactive_frames
+            ):
+                deleted.append(self._pending_objects.pop(obj_id))
+
+        for obj_id, obj in list(self._permanent_objects.items()):
+            if target_object and obj.label == target_object:
+                continue
+            if obj.inactive_frames > self._max_inactive_frames:
+                deleted.append(self._permanent_objects.pop(obj_id))
+
+        for obj in deleted:
+            for track_id in obj.track_ids:
+                if self._track_id_map.get(track_id) == obj.object_id:
+                    self._track_id_map.pop(track_id, None)
+
+        return deleted
+
+    def get_objects(self, include_pending: bool = False) -> list[MapObject]:
+        """Return current objects.
+
+        Args:
+            include_pending: If True, include pending objects.
+
+        Returns:
+            List of MapObject instances.
+        """
+        if include_pending:
+            return list(self._permanent_objects.values()) + list(self._pending_objects.values())
+        return list(self._permanent_objects.values())
diff --git a/semantic_mapping/semantic_mapping/matching.py b/semantic_mapping/semantic_mapping/matching.py
new file mode 100644
index 0000000..418521e
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/matching.py
@@ -0,0 +1,246 @@
+"""Object matching utilities for semantic mapping."""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import numpy as np
+
+from vector_perception_utils.pointcloud_utils import compute_centroid, get_oriented_bbox
+
+from .map_object import MapObject, Observation
+
+
+def _convex_hull_2d(points: np.ndarray) -> np.ndarray:
+    """Compute 2D convex hull in CCW order (Monotonic chain).
+
+    Args:
+        points: Input points (N, 2).
+
+    Returns:
+        Hull vertices in CCW order (M, 2).
+    """
+    if points.shape[0] < 3:
+        return points
+
+    pts = np.unique(points, axis=0)
+    if pts.shape[0] < 3:
+        return pts
+
+    pts = pts[np.lexsort((pts[:, 1], pts[:, 0]))]
+
+    def cross(o: np.ndarray, a: np.ndarray, b: np.ndarray) -> float:
+        return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])
+
+    lower: list[np.ndarray] = []
+    for p in pts:
+        while len(lower) >= 2 and cross(lower[-2], lower[-1], p) <= 0:
+            lower.pop()
+        lower.append(p)
+
+    upper: list[np.ndarray] = []
+    for p in reversed(pts):
+        while len(upper) >= 2 and cross(upper[-2], upper[-1], p) <= 0:
+            upper.pop()
+        upper.append(p)
+
+    hull = np.array(lower[:-1] + upper[:-1])
+    return hull
+
+
+def _polygon_area(poly: np.ndarray) -> float:
+    """Compute polygon area using the shoelace formula.
+
+    Args:
+        poly: Polygon vertices in order (N, 2).
+
+    Returns:
+        Polygon area.
+    """
+    if poly.shape[0] < 3:
+        return 0.0
+    x = poly[:, 0]
+    y = poly[:, 1]
+    return 0.5 * abs(float(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1))))
+
+
+def _line_intersection(p1: np.ndarray, p2: np.ndarray, cp1: np.ndarray, cp2: np.ndarray) -> np.ndarray:
+    """Intersection of line p1->p2 with line cp1->cp2.
+
+    Args:
+        p1: Line segment start (2,).
+        p2: Line segment end (2,).
+        cp1: Clip edge start (2,).
+        cp2: Clip edge end (2,).
+
+    Returns:
+        Intersection point (2,).
+    """
+    dc = cp1 - cp2
+    dp = p1 - p2
+    denom = dc[0] * dp[1] - dc[1] * dp[0]
+    if denom == 0:
+        return p2
+    n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
+    n2 = p1[0] * p2[1] - p1[1] * p2[0]
+    x = (n1 * dp[0] - n2 * dc[0]) / denom
+    y = (n1 * dp[1] - n2 * dc[1]) / denom
+    return np.array([x, y], dtype=np.float64)
+
+
+def _polygon_clip(subject: np.ndarray, clip: np.ndarray) -> np.ndarray:
+    """Clip a polygon with a convex clip polygon (Sutherland–Hodgman).
+
+    Args:
+        subject: Subject polygon vertices (N, 2).
+        clip: Clip polygon vertices (M, 2).
+
+    Returns:
+        Clipped polygon vertices (K, 2).
+    """
+    if subject.shape[0] == 0:
+        return subject
+
+    def inside(p: np.ndarray, cp1: np.ndarray, cp2: np.ndarray) -> bool:
+        return (cp2[0] - cp1[0]) * (p[1] - cp1[1]) - (cp2[1] - cp1[1]) * (
+            p[0] - cp1[0]
+        ) >= 0
+
+    output = subject
+    for i in range(len(clip)):
+        cp1 = clip[i]
+        cp2 = clip[(i + 1) % len(clip)]
+        input_list = output
+        output = []
+        if len(input_list) == 0:
+            break
+        s = input_list[-1]
+        for e in input_list:
+            if inside(e, cp1, cp2):
+                if not inside(s, cp1, cp2):
+                    output.append(_line_intersection(s, e, cp1, cp2))
+                output.append(e)
+            elif inside(s, cp1, cp2):
+                output.append(_line_intersection(s, e, cp1, cp2))
+            s = e
+        output = np.array(output, dtype=np.float64)
+    return output
+
+
+def compute_iou_3d_with_ratios(
+    bbox1: np.ndarray, bbox2: np.ndarray
+) -> tuple[float, float, float]:
+    """Compute IoU and intersection ratios using 2D polygon overlap + Z overlap.
+
+    Args:
+        bbox1: Corner points for bbox1 (N, 3).
+        bbox2: Corner points for bbox2 (N, 3).
+
+    Returns:
+        Tuple of (iou, ratio_bbox1, ratio_bbox2).
+    """
+    if bbox1.shape[0] < 4 or bbox2.shape[0] < 4:
+        return 0.0, 0.0, 0.0
+
+    hull1 = _convex_hull_2d(bbox1[:, :2])
+    hull2 = _convex_hull_2d(bbox2[:, :2])
+    if hull1.shape[0] < 3 or hull2.shape[0] < 3:
+        return 0.0, 0.0, 0.0
+
+    poly_inter = _polygon_clip(hull1, hull2)
+    inter_area = _polygon_area(poly_inter) if poly_inter.shape[0] >= 3 else 0.0
+
+    zmin1, zmax1 = float(np.min(bbox1[:, 2])), float(np.max(bbox1[:, 2]))
+    zmin2, zmax2 = float(np.min(bbox2[:, 2])), float(np.max(bbox2[:, 2]))
+    height = max(0.0, min(zmax1, zmax2) - max(zmin1, zmin2))
+    inter_vol = inter_area * height
+
+    area1 = _polygon_area(hull1)
+    area2 = _polygon_area(hull2)
+    vol1 = area1 * (zmax1 - zmin1)
+    vol2 = area2 * (zmax2 - zmin2)
+    denom = vol1 + vol2 - inter_vol
+
+    iou = inter_vol / denom if denom > 0 else 0.0
+    ratio1 = inter_vol / vol1 if vol1 > 0 else 0.0
+    ratio2 = inter_vol / vol2 if vol2 > 0 else 0.0
+    return iou, ratio1, ratio2
+
+
+def find_best_match(
+    obs: Observation,
+    candidates: list[MapObject],
+    distance_threshold: float,
+    iou_threshold: float,
+    ratio_threshold: float = 0.4,
+    min_distance_merge: float = 0.25,
+) -> Optional[MapObject]:
+    """Find best matching object by distance/IoU heuristics.
+
+    Args:
+        obs: Observation to match.
+        candidates: Candidate map objects.
+        distance_threshold: Max centroid distance to consider.
+        iou_threshold: Min IoU to accept a merge.
+        ratio_threshold: Min overlap ratio to accept a merge.
+        min_distance_merge: Absolute distance threshold for merge.
+
+    Returns:
+        Best matching object or None if no match passes thresholds.
+    """
+    if obs.points.shape[0] == 0:
+        return None
+
+    obs_center = compute_centroid(obs.points)
+    obs_bbox = get_oriented_bbox(obs.points)
+    obs_corners = obs_bbox[1] if obs_bbox is not None else None
+    obs_extent = (
+        np.max(obs_corners, axis=0) - np.min(obs_corners, axis=0)
+        if obs_corners is not None and obs_corners.size > 0
+        else None
+    )
+
+    best_candidate: Optional[MapObject] = None
+    best_dist = float("inf")
+
+    for candidate in candidates:
+        if obs.track_id in candidate.track_ids:
+            return candidate
+
+        cand_center = candidate.centroid
+
+        dist = float(np.linalg.norm(obs_center - cand_center))
+        if dist > distance_threshold:
+            continue
+
+        cand_corners = candidate.bbox3d_corners
+        cand_extent = (
+            np.max(cand_corners, axis=0) - np.min(cand_corners, axis=0)
+            if cand_corners is not None and cand_corners.size > 0
+            else None
+        )
+
+        dist_merge = dist < min_distance_merge
+        if obs_extent is not None and cand_extent is not None:
+            dist_thresh = np.linalg.norm((obs_extent / 2 + cand_extent / 2) / 2) * 0.5
+            dist_merge = dist_merge or dist < dist_thresh
+
+        if dist_merge:
+            if dist < best_dist:
+                best_candidate = candidate
+                best_dist = dist
+            continue
+
+        if obs_corners is None or cand_corners is None:
+            continue
+
+        iou, ratio_obs, ratio_cand = compute_iou_3d_with_ratios(
+            obs_corners, cand_corners
+        )
+
+        if iou > iou_threshold or ratio_obs > ratio_threshold or ratio_cand > ratio_threshold:
+            if dist < best_dist:
+                best_candidate = candidate
+                best_dist = dist
+
+    return best_candidate
diff --git a/semantic_mapping/semantic_mapping/storage.py b/semantic_mapping/semantic_mapping/storage.py
new file mode 100644
index 0000000..6699373
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/storage.py
@@ -0,0 +1,91 @@
+"""Async image saving queue for semantic mapping."""
+
+from __future__ import annotations
+
+from pathlib import Path
+import queue
+import threading
+import time
+
+import cv2
+import numpy as np
+
+
+class ImageSaveQueue:
+    """Background worker for saving object images to disk."""
+
+    def __init__(self, output_dir: Path, max_queue_size: int = 100, save_png: bool = False) -> None:
+        """Initialize the image save queue.
+
+        Args:
+            output_dir: Directory where images are stored.
+            max_queue_size: Maximum number of queued save jobs.
+            save_png: If True, also save a .png copy of the image.
+        """
+        self._output_dir = Path(output_dir)
+        self._output_dir.mkdir(parents=True, exist_ok=True)
+        self._save_png = save_png
+        self._queue: queue.Queue = queue.Queue(maxsize=max_queue_size)
+        self._stop_event = threading.Event()
+        self._thread: threading.Thread | None = None
+
+    def start(self) -> None:
+        """Start the background worker."""
+        if self._thread is not None and self._thread.is_alive():
+            return
+        self._stop_event.clear()
+        self._thread = threading.Thread(target=self._run, daemon=True)
+        self._thread.start()
+
+    def stop(self) -> None:
+        """Stop the background worker."""
+        self._stop_event.set()
+        try:
+            self._queue.put_nowait(None)
+        except queue.Full:
+            pass
+        if self._thread is not None:
+            self._thread.join(timeout=2.0)
+            self._thread = None
+
+    def queue_save(
+        self, object_id: str, image: np.ndarray, mask: np.ndarray | None = None
+    ) -> str:
+        """Queue an image (and optional mask) for saving.
+
+        Args:
+            object_id: Object identifier to embed in filename.
+            image: Image array to save.
+            mask: Optional mask array to save alongside the image.
+
+        Returns:
+            Path to the saved image (.npy).
+        """
+        timestamp_ms = int(time.time() * 1000)
+        image_path = self._output_dir / f"{object_id}_{timestamp_ms}.npy"
+        mask_path = self._output_dir / f"{object_id}_{timestamp_ms}_mask.npy"
+        self._queue.put(("save", str(image_path), str(mask_path), image, mask))
+        return str(image_path)
+
+    def _run(self) -> None:
+        while not self._stop_event.is_set():
+            item = self._queue.get()
+            if item is None:
+                self._queue.task_done()
+                break
+            op, image_path, mask_path, image, mask = item
+            if op == "save":
+                try:
+                    np.save(image_path, image)
+                    if mask is not None:
+                        np.save(mask_path, mask)
+                    if self._save_png:
+                        png_path = Path(image_path).with_suffix(".png")
+                        if image.ndim == 3 and image.shape[2] == 3:
+                            bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+                            cv2.imwrite(str(png_path), bgr)
+                        else:
+                            cv2.imwrite(str(png_path), image)
+                except Exception:
+                    pass
+            self._queue.task_done()
diff --git a/semantic_mapping/setup.cfg b/semantic_mapping/setup.cfg
deleted file mode 100644
index 31239a6..0000000
--- a/semantic_mapping/setup.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-[develop]
-script_dir=$base/lib/semantic_mapping
-[install]
-install_scripts=$base/lib/semantic_mapping
diff --git a/semantic_mapping/setup.py b/semantic_mapping/setup.py
deleted file mode 100644
index fc5260b..0000000
--- a/semantic_mapping/setup.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from glob import glob
-import os
-
-from setuptools import find_packages, setup
-
-package_name = "semantic_mapping"
-
-setup(
-    name=package_name,
-    version="0.0.1",
-    packages=find_packages(exclude=["test"]),
-    package_data={package_name: []},
-    data_files=[
-        ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
-        ("share/" + package_name, ["package.xml"]),
-        (os.path.join("share", package_name, "launch"), glob(os.path.join("launch", "*launch.[pxy][yma]*"))),
-        (os.path.join("share", package_name, "config"), glob(os.path.join("config", "*.yaml"))),
-    ],
-    install_requires=["setuptools"],
-    zip_safe=True,
-    maintainer="Alex Lin",
-    maintainer_email="alex.lin416@outlook.com",
-    description="Semantic mapping utilities with a YOLO-E detector node.",
-    license="MIT",
-    tests_require=["pytest"],
-    entry_points={"console_scripts": []},
-)
diff --git a/vector_perception_utils/vector_perception_utils/pointcloud_utils.py b/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
index 835db76..c27a23b 100644
--- a/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
+++ b/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
@@ -199,42 +199,68 @@ def crop_pointcloud_bbox(
 def compute_centroid(points: np.ndarray) -> np.ndarray:
     """
     Compute centroid of point cloud.
-    
+
     Args:
         points: Point coordinates (N, 3)
-        
+
     Returns:
         Centroid as (3,) array
     """
     return np.mean(points, axis=0)
 
+def get_oriented_bbox(
+    points: np.ndarray,
+) -> Optional[Tuple[np.ndarray, np.ndarray]]:
+    """
+    Compute minimal oriented bounding box from point cloud.
+
+    Args:
+        points: Point coordinates (N, 3)
+
+    Returns:
+        Tuple of (center, corners) where center is (3,) and corners is (8, 3),
+        or None if insufficient points.
+    """
+    if len(points) < 4:
+        return None
+
+    pcd = create_pointcloud(points)
+
+    try:
+        obb = pcd.get_minimal_oriented_bounding_box()
+    except Exception:
+        return None
+
+    center = np.asarray(obb.center)
+    corners = np.asarray(obb.get_box_points())
+    return center, corners
+
 
 def pointcloud_to_bbox3d(points: np.ndarray, header=None):
     """
     Convert point cloud to minimal oriented 3D bounding box (ROS2 message).
-    
+
     Args:
         points: Point coordinates (N, 3)
         header: Optional ROS2 header for the message
-        
+
     Returns:
         vision_msgs/BoundingBox3D message or None if insufficient points
     """
-    
+
     if len(points) < 4:
         return None
     
     pcd = create_pointcloud(points)
-    
+
     try:
         obb = pcd.get_minimal_oriented_bounding_box()
     except:
         return None
-    
     center = obb.center
     extent = obb.extent
     quat = Rotation.from_matrix(obb.R).as_quat()
-    
+
     bbox = BoundingBox3D()
     bbox.center.position = Point(
         x=float(center[0]), y=float(center[1]), z=float(center[2])
@@ -243,7 +269,7 @@ def pointcloud_to_bbox3d(points: np.ndarray, header=None):
         x=float(quat[0]), y=float(quat[1]), z=float(quat[2]), w=float(quat[3])
     )
     bbox.size = Vector3(x=float(extent[0]), y=float(extent[1]), z=float(extent[2]))
-    
+
     return bbox
 
 
@@ -328,6 +354,192 @@ def pointcloud_to_depth(
     return depth_image
 
 
+def project_pointcloud_to_image(
+    points: np.ndarray,
+    projection: str = "equirect",
+    image_width: int = 1920,
+    image_height: int = 640,
+    axis_mode: str = "xz",
+    intrinsics: Optional[np.ndarray] = None,
+    depth_filter: bool = False,
+    depth_filter_margin: float = 0.15,
+    transform: Optional[np.ndarray] = None,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Project a point cloud into image pixel coordinates.
+
+    Args:
+        points: Point coordinates (N, 3).
+        projection: Projection model ("equirect" or "pinhole").
+        image_width: Output image width in pixels.
+        image_height: Output image height in pixels.
+        axis_mode: Equirect axis convention ("xz" or "xy").
+        intrinsics: Pinhole intrinsics (3, 3) if using "pinhole".
+        depth_filter: Enable depth-based occlusion filtering.
+        depth_filter_margin: Depth margin for occlusion filtering (meters).
+        transform: Optional 4x4 transform applied before projection.
+
+    Returns:
+        Tuple of (camera_points, pixel_idx) where pixel_idx is (N, 3) with (u, v, depth).
+    """
+    camera_points = transform_pointcloud(points, transform) if transform is not None else points
+
+    if projection == "pinhole":
+        if intrinsics is None:
+            raise ValueError("intrinsics required for pinhole projection")
+        u = camera_points[:, 0] * intrinsics[0, 0] / (camera_points[:, 2] + 1e-6) + intrinsics[0, 2]
+        v = camera_points[:, 1] * intrinsics[1, 1] / (camera_points[:, 2] + 1e-6) + intrinsics[1, 2]
+        return camera_points, np.stack([u, v, camera_points[:, 2]], axis=-1)
+
+    if axis_mode == "xz":
+        hori_dis = np.sqrt(camera_points[:, 0] ** 2 + camera_points[:, 2] ** 2)
+        u = (
+            image_width / (2 * np.pi) * np.arctan2(camera_points[:, 0], camera_points[:, 2])
+            + image_width / 2
+            + 1
+        )
+        v = (
+            image_width / (2 * np.pi) * np.arctan(camera_points[:, 1] / hori_dis)
+            + image_height / 2
+            + 1
+        )
+    else:
+        hori_dis = np.sqrt(camera_points[:, 0] ** 2 + camera_points[:, 1] ** 2)
+        u = (
+            -image_width / (2 * np.pi) * np.arctan2(camera_points[:, 1], camera_points[:, 0])
+            + image_width / 2
+            + 1
+        )
+        v = (
+            -image_width / (2 * np.pi) * np.arctan2(camera_points[:, 2], hori_dis)
+            + image_height / 2
+            + 1
+        )
+
+    u = np.clip(u, 0, image_width - 1)
+    v = np.clip(v, 0, image_height - 1)
+
+    if depth_filter:
+        depth_map = np.full((image_height, image_width), np.inf)
+        idx = v.astype(int) * image_width + u.astype(int)
+        np.minimum.at(depth_map.ravel(), idx, hori_dis)
+        remove_mask = hori_dis >= depth_map[v.astype(int), u.astype(int)] + depth_filter_margin
+        u = u.copy()
+        v = v.copy()
+        u[remove_mask] = -1
+        v[remove_mask] = -1
+
+    return camera_points, np.stack([u, v, hori_dis], axis=-1)
+
+
+def segment_pointcloud_by_mask(
+    points: np.ndarray,
+    mask: np.ndarray,
+    pixel_idx: np.ndarray,
+) -> np.ndarray:
+    """
+    Segment a point cloud using a binary mask and projected pixels.
+
+    Args:
+        points: Point coordinates (N, 3).
+        mask: Binary mask (H, W).
+        pixel_idx: Pixel coordinates (N, 3) as (u, v, depth).
+
+    Returns:
+        Masked point cloud (M, 3).
+    """
+    h, w = mask.shape[:2]
+    pixel_idx = pixel_idx.astype(int)
+    valid = (
+        (pixel_idx[:, 0] >= 0)
+        & (pixel_idx[:, 0] < w)
+        & (pixel_idx[:, 1] >= 0)
+        & (pixel_idx[:, 1] < h)
+    )
+    pixel_idx = pixel_idx[valid]
+    points = points[valid]
+    keep = mask[pixel_idx[:, 1], pixel_idx[:, 0]].astype(bool)
+    return points[keep]
+
+
+def segment_pointclouds_from_projection(
+    points: np.ndarray,
+    masks: list[np.ndarray],
+    pixel_idx: np.ndarray,
+) -> Tuple[list[np.ndarray], list[np.ndarray]]:
+    """
+    Segment point clouds from projected pixels.
+
+    Args:
+        points: Point coordinates (N, 3).
+        masks: List of binary masks (H, W).
+        pixel_idx: Pixel coordinates (N, 3) as (u, v, depth).
+
+    Returns:
+        Tuple of (clouds, depths) where each entry corresponds to a mask.
+    """
+    if masks is None or len(masks) == 0:
+        return [], []
+
+    image_shape = masks[0].shape
+    valid = (
+        (pixel_idx[:, 0] >= 0)
+        & (pixel_idx[:, 0] < image_shape[1])
+        & (pixel_idx[:, 1] >= 0)
+        & (pixel_idx[:, 1] < image_shape[0])
+    )
+    pixel_idx = pixel_idx[valid]
+    points = points[valid]
+
+    depths = pixel_idx[:, 2]
+    pixel_idx = pixel_idx.astype(int)
+
+    obj_clouds = []
+    obj_depths = []
+    for obj_mask in masks:
+        cloud_mask = obj_mask[pixel_idx[:, 1], pixel_idx[:, 0]].astype(bool)
+        obj_clouds.append(points[cloud_mask])
+        obj_depths.append(depths[cloud_mask].reshape(-1, 1))
+
+    return obj_clouds, obj_depths
+
+
+def transform_segmented_clouds_to_world(
+    clouds: list[np.ndarray],
+    depths: list[np.ndarray],
+    R_b2w: np.ndarray,
+    t_b2w: np.ndarray,
+    depth_jump_threshold: float = 0.3,
+) -> list[np.ndarray]:
+    """
+    Prune depth jumps and transform segmented clouds to world frame.
+
+    Args:
+        clouds: List of segmented point clouds (sensor frame).
+        depths: List of depth arrays aligned with clouds.
+        R_b2w: Body-to-world rotation matrix (3, 3).
+        t_b2w: Body-to-world translation vector (3,).
+        depth_jump_threshold: Depth jump threshold for pruning (meters).
+
+    Returns:
+        List of segmented object clouds in world frame.
+    """
+    obj_cloud_world_list = []
+    for obj_cloud, obj_depth in zip(clouds, depths):
+        if obj_depth.shape[0] > 1:
+            obj_depth_diff = (obj_depth[1:] - obj_depth[:-1]).squeeze()
+            if obj_depth_diff.size > 0 and np.max(obj_depth_diff) > depth_jump_threshold:
+                idx_tmp = np.ones(len(obj_depth), dtype=bool)
+                jump_idx = int(np.argmax(obj_depth_diff))
+                idx_tmp[jump_idx + 1 :] = False
+                obj_cloud = obj_cloud[idx_tmp]
+
+        obj_cloud_world = obj_cloud[:, :3] @ R_b2w.T + t_b2w
+        obj_cloud_world_list.append(obj_cloud_world)
+
+    return obj_cloud_world_list
+
+
 def create_pointcloud2_msg(points: np.ndarray, colors: np.ndarray, header):
     """
     Create PointCloud2 ROS message from points and colors.
diff --git a/vector_perception_utils/vector_perception_utils/transform_utils.py b/vector_perception_utils/vector_perception_utils/transform_utils.py
index 534d8d2..ef3238e 100644
--- a/vector_perception_utils/vector_perception_utils/transform_utils.py
+++ b/vector_perception_utils/vector_perception_utils/transform_utils.py
@@ -8,7 +8,7 @@
 from geometry_msgs.msg import Pose, Quaternion
 
 
-def normalize_angle(angle: float) -> float:
+def normalize_angle(angle: float | np.ndarray) -> float | np.ndarray:
     """
     Normalize angle to [-pi, pi] range.
 
@@ -21,6 +21,26 @@ def normalize_angle(angle: float) -> float:
     return np.arctan2(np.sin(angle), np.cos(angle))
 
 
+def discretize_angles(angles: np.ndarray, num_bins: int) -> np.ndarray:
+    """
+    Discretize angles into bin indices.
+
+    Normalizes angles to [-pi, pi] then maps to bin indices [0, num_bins-1].
+
+    Args:
+        angles: Angles in radians (any range).
+        num_bins: Number of angle bins.
+
+    Returns:
+        Array of bin indices (0 to num_bins-1), same shape as input.
+    """
+    angles = normalize_angle(angles)
+    # Map [-pi, pi] to [0, 2*pi] then to [0, num_bins)
+    normalized = (angles + np.pi) / (2 * np.pi)
+    bins = (normalized * num_bins).astype(int)
+    return np.clip(bins, 0, num_bins - 1)
+
+
 def pose_to_matrix(pose: Pose) -> np.ndarray:
     """
     Convert pose to 4x4 homogeneous transform matrix.

From 88b2be0e227e0e52264346a1403945a0cadacd7a Mon Sep 17 00:00:00 2001
From: alexlin2 <alex.lin416@outlook.com>
Date: Thu, 22 Jan 2026 23:47:38 -0500
Subject: [PATCH 3/6] updated README

---
 semantic_mapping/README.md                    | 58 ++++++++++++-------
 .../docs/SEMANTIC_MAPPING_MIGRATION.md        | 29 ++++++----
 2 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/semantic_mapping/README.md b/semantic_mapping/README.md
index 2dfd1dc..4729659 100644
--- a/semantic_mapping/README.md
+++ b/semantic_mapping/README.md
@@ -5,6 +5,7 @@ ROS2 package for semantic 3D object mapping with VLM integration.
 ## Overview
 
 This package builds a persistent semantic map of objects detected by `detect_anything`, fusing 2D detections with LiDAR point clouds to create 3D object representations. It publishes `ObjectNode` messages compatible with the tare_planner exploration system.
+The pipeline stores voxelized geometry, merges observations, and exports planner‑friendly ROS messages.
 
 ## Status: Work in Progress
 
@@ -12,11 +13,11 @@ This package builds a persistent semantic map of objects detected by `detect_any
 |-----------|------|--------|
 | Messages | `msg/*.msg` | Complete |
 | Data Models | `map_object.py` | Complete |
-| Matching | `matching.py` | Pending |
-| Object Mapper | `mapper.py` | Pending |
-| Cloud Projection | `vector_perception_utils/pointcloud_utils.py` | Pending |
-| Publishers | `map_object_utils.py` | Pending |
-| Image Storage | `storage.py` | Pending |
+| Matching | `matching.py` | Complete |
+| Object Mapper | `mapper.py` | Complete |
+| Cloud Projection | `vector_perception_utils/pointcloud_utils.py` | Complete |
+| Publishers | `map_object_utils.py` | Complete |
+| Image Storage | `storage.py` | Complete |
 | Mapping Node | `mapping_node.py` | Pending |
 | Launch Files | `launch/` | Pending |
 
@@ -49,27 +50,37 @@ Viewpoint trigger for image saving.
 detect_anything/DetectionResult
         │
         ▼
-┌───────────────────────────────────────┐
-│         semantic_mapping_node          │
-│                                        │
-│  ┌─────────────┐    ┌──────────────┐  │
-│  │ CloudImage  │    │  ObjectMapper │  │
-│  │  Fusion     │───▶│  (ObjectDB    │  │
-│  │             │    │   pattern)    │  │
-│  └─────────────┘    └──────────────┘  │
-│         │                   │          │
-│         ▼                   ▼          │
-│  ┌─────────────┐    ┌──────────────┐  │
-│  │  MapObject  │    │  Publishers   │  │
-│  │  (voxels,   │───▶│  (ObjectNode, │  │
-│  │   votes)    │    │   Markers)    │  │
-│  └─────────────┘    └──────────────┘  │
-└───────────────────────────────────────┘
+┌───────────────────────────────────────────────┐
+│              semantic_mapping_node            │
+│                                               │
+│  ┌────────────────────┐   ┌──────────────┐    │
+│  │ pointcloud_utils   │   │  ObjectMapper │    │
+│  │ (projection + mask)├──▶│   (pending/   │    │
+│  │                    │   │  permanent)  │    │
+│  └────────────────────┘   └──────────────┘    │
+│                 │                 │            │
+│                 ▼                 ▼            │
+│        ┌────────────────┐  ┌──────────────┐   │
+│        │    MapObject   │  │ map_object_  │   │
+│        │ (voxels/votes) ├─▶│   utils      │   │
+│        └────────────────┘  └──────────────┘   │
+│                 │                 │            │
+│                 ▼                 ▼            │
+│          storage.py         mapping_node.py    │
+└───────────────────────────────────────────────┘
         │
         ▼
 /object_nodes, /obj_points, /obj_boxes, /obj_labels
 ```
 
+Key components:
+- `map_object.py`: Object state, voxel voting, pose de‑duplication, cached geometry.
+- `matching.py`: Track‑id and spatial matching with IoU/ratio heuristics.
+- `mapper.py`: Pending/permanent tiers, merge rules, and pruning.
+- `vector_perception_utils/pointcloud_utils.py`: Projection + mask segmentation utilities.
+- `map_object_utils.py`: ROS message construction helpers.
+- `storage.py`: Async image save queue.
+
 ## Dependencies
 
 - `detect_anything`: Detection results with masks
@@ -86,6 +97,11 @@ colcon build --packages-select semantic_mapping
 ros2 launch semantic_mapping semantic_mapping.launch.py
 ```
 
+## Configuration
+
+- `config/sensor_to_camera_tf.yaml`: Static sensor→camera transform for projection.
+- `config/objects.yaml`: Object taxonomy for detection labels.
+
 ## Topics
 
 ### Subscriptions
diff --git a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
index 4a23f74..53b4560 100644
--- a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
+++ b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
@@ -22,11 +22,11 @@ This document outlines the comprehensive plan for porting the `semantic_mapping`
 | 1 | Package Setup & Messages | **COMPLETED** |
 | 2 | Data Models (`map_object.py`) | **COMPLETED** |
 | 2.1 | Refactor to use shared utilities | **COMPLETED** |
-| 3 | Matching Utilities (`matching.py`) | Pending |
-| 4 | Object Mapper (`mapper.py`) | Pending |
-| 5 | Cloud-Image Projection (`pointcloud_utils.py`) | Pending |
-| 6 | Message Publishers (`map_object_utils.py`) | Pending |
-| 7 | Image Storage (`storage.py`) | Pending |
+| 3 | Matching Utilities (`matching.py`) | **COMPLETED** |
+| 4 | Object Mapper (`mapper.py`) | **COMPLETED** |
+| 5 | Cloud-Image Projection (`pointcloud_utils.py`) | **COMPLETED** |
+| 6 | Message Publishers (`map_object_utils.py`) | **COMPLETED** |
+| 7 | Image Storage (`storage.py`) | **COMPLETED** |
 | 8 | Mapping Node (`mapping_node.py`) | Pending |
 | 9 | Launch Files & Config | Pending |
 | 10 | Integration Testing | Pending |
@@ -355,6 +355,11 @@ def create_object_pointcloud(objects: list[MapObject], header: Header) -> PointC
 def create_delete_markers(deleted_ids: list[int], header: Header, namespace: str) -> MarkerArray: ...
 ```
 
+Publisher helpers:
+- `map_object_to_object_node` converts a `MapObject` into an `ObjectNode` with centroid, bbox corners, and point cloud.
+- Marker helpers emit bbox wireframes and text labels for RViz.
+- `create_object_pointcloud` builds a colored aggregate cloud from object voxels.
+
 ---
 
 ## Phase 8: Image Storage (storage.py)
@@ -369,6 +374,10 @@ class ImageSaveQueue:
     def queue_save(self, object_id: str, image: np.ndarray, mask: np.ndarray | None = None) -> str: ...
 ```
 
+Image storage:
+- Async worker writes `.npy` (and optional `.png`) crops to disk.
+- Queue is bounded to avoid blocking the main mapping loop.
+
 ---
 
 ## Phase 9: Launch Files
@@ -398,13 +407,13 @@ DeclareLaunchArgument('target_object', default_value='')
 - [x] Add `normalize_angle`, `discretize_angles` to `transform_utils.py`
 - [x] Add `get_oriented_bbox` to `pointcloud_utils.py`
 - [x] Simplify `_compute_geometry()` using Open3D oriented bbox
+- [x] Implement `matching.py` with IoU, distance, merge-compatibility functions
+- [x] Implement `mapper.py` with ObjectDB-style tiering
+- [x] Implement cloud-image projection helpers in `vector_perception_utils/pointcloud_utils.py`
+- [x] Implement `map_object_utils.py` message conversion functions
+- [x] Implement `storage.py` with async queue
 
 ### Remaining
-- [ ] Implement `matching.py` with IoU, distance, merge-compatibility functions
-- [ ] Implement `mapper.py` with ObjectDB-style tiering
-- [ ] Implement cloud-image projection helpers in `vector_perception_utils/pointcloud_utils.py`
-- [ ] Implement `map_object_utils.py` message conversion functions
-- [ ] Implement `storage.py` with async queue
 - [ ] Implement `mapping_node.py` following detect_anything patterns
 - [ ] Create launch files and params.yaml
 - [ ] Integration testing with bag files

From e08f117686fedeb77ca1f00d351f574889688a53 Mon Sep 17 00:00:00 2001
From: alexlin2 <alex.lin416@outlook.com>
Date: Fri, 23 Jan 2026 14:05:07 -0500
Subject: [PATCH 4/6] completed full migration of all core logic

---
 semantic_mapping/README.md                    |  21 +-
 .../docs/SEMANTIC_MAPPING_MIGRATION.md        |  35 +-
 semantic_mapping/semantic_mapping/mapper.py   |   2 +-
 .../semantic_mapping/mapping_node.py          | 686 ++++++++++++++++++
 .../semantic_mapping/utils/__init__.py        |   1 +
 .../{ => utils}/map_object_utils.py           |   9 +-
 .../semantic_mapping/{ => utils}/matching.py  |   4 +-
 .../semantic_mapping/{ => utils}/storage.py   |   0
 8 files changed, 729 insertions(+), 29 deletions(-)
 create mode 100644 semantic_mapping/semantic_mapping/mapping_node.py
 create mode 100644 semantic_mapping/semantic_mapping/utils/__init__.py
 rename semantic_mapping/semantic_mapping/{ => utils}/map_object_utils.py (95%)
 rename semantic_mapping/semantic_mapping/{ => utils}/matching.py (98%)
 rename semantic_mapping/semantic_mapping/{ => utils}/storage.py (100%)

diff --git a/semantic_mapping/README.md b/semantic_mapping/README.md
index 4729659..3428c0b 100644
--- a/semantic_mapping/README.md
+++ b/semantic_mapping/README.md
@@ -13,12 +13,12 @@ The pipeline stores voxelized geometry, merges observations, and exports planner
 |-----------|------|--------|
 | Messages | `msg/*.msg` | Complete |
 | Data Models | `map_object.py` | Complete |
-| Matching | `matching.py` | Complete |
+| Matching | `utils/matching.py` | Complete |
 | Object Mapper | `mapper.py` | Complete |
 | Cloud Projection | `vector_perception_utils/pointcloud_utils.py` | Complete |
-| Publishers | `map_object_utils.py` | Complete |
-| Image Storage | `storage.py` | Complete |
-| Mapping Node | `mapping_node.py` | Pending |
+| Publishers | `utils/map_object_utils.py` | Complete |
+| Image Storage | `utils/storage.py` | Complete |
+| Mapping Node | `mapping_node.py` | Complete |
 | Launch Files | `launch/` | Pending |
 
 ## Messages
@@ -44,6 +44,10 @@ User instruction for target object search.
 ### ViewpointRep.msg
 Viewpoint trigger for image saving.
 
+Note: `detect_anything/DetectionResult` masks are cropped to the bbox and aligned to
+the bbox top-left (as documented in `detect_anything/msg/DetectionResult.msg`).
+The mapping node reconstructs full-image masks before projection.
+
 ## Architecture
 
 ```
@@ -66,7 +70,7 @@ detect_anything/DetectionResult
 │        └────────────────┘  └──────────────┘   │
 │                 │                 │            │
 │                 ▼                 ▼            │
-│          storage.py         mapping_node.py    │
+│       utils/storage.py      mapping_node.py    │
 └───────────────────────────────────────────────┘
         │
         ▼
@@ -75,11 +79,12 @@ detect_anything/DetectionResult
 
 Key components:
 - `map_object.py`: Object state, voxel voting, pose de‑duplication, cached geometry.
-- `matching.py`: Track‑id and spatial matching with IoU/ratio heuristics.
+- `utils/matching.py`: Track‑id and spatial matching with IoU/ratio heuristics.
 - `mapper.py`: Pending/permanent tiers, merge rules, and pruning.
 - `vector_perception_utils/pointcloud_utils.py`: Projection + mask segmentation utilities.
-- `map_object_utils.py`: ROS message construction helpers.
-- `storage.py`: Async image save queue.
+- `utils/map_object_utils.py`: ROS message construction helpers.
+- `utils/storage.py`: Async image save queue.
+- `mapping_node.py`: ROS2 orchestration (subscriptions, sync, publish).
 
 ## Dependencies
 
diff --git a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
index 53b4560..e02e8d0 100644
--- a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
+++ b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
@@ -22,12 +22,12 @@ This document outlines the comprehensive plan for porting the `semantic_mapping`
 | 1 | Package Setup & Messages | **COMPLETED** |
 | 2 | Data Models (`map_object.py`) | **COMPLETED** |
 | 2.1 | Refactor to use shared utilities | **COMPLETED** |
-| 3 | Matching Utilities (`matching.py`) | **COMPLETED** |
+| 3 | Matching Utilities (`utils/matching.py`) | **COMPLETED** |
 | 4 | Object Mapper (`mapper.py`) | **COMPLETED** |
 | 5 | Cloud-Image Projection (`pointcloud_utils.py`) | **COMPLETED** |
-| 6 | Message Publishers (`map_object_utils.py`) | **COMPLETED** |
-| 7 | Image Storage (`storage.py`) | **COMPLETED** |
-| 8 | Mapping Node (`mapping_node.py`) | Pending |
+| 6 | Message Publishers (`utils/map_object_utils.py`) | **COMPLETED** |
+| 7 | Image Storage (`utils/storage.py`) | **COMPLETED** |
+| 8 | Mapping Node (`mapping_node.py`) | **COMPLETED** |
 | 9 | Launch Files & Config | Pending |
 | 10 | Integration Testing | Pending |
 
@@ -62,7 +62,7 @@ Key files in `VLM_ROS/src/semantic_mapping/`:
 - `update_map()` steps:
   1. Transform incoming cloud into body/world frame using odom
   2. Filter detections by confidence threshold
-  3. For each detection: project masked cloud via `CloudImageFusion`, build per-object voxels, then either merge or create a `SingleObject`
+  3. For each detection: reconstruct full-image masks (bbox-cropped in `detect_anything/msg/DetectionResult.msg`), project masked cloud, build per-object voxels, then either merge or create a `SingleObject`
   4. Merge heuristics: match by tracker `obj_id`, 3D IoU, distance thresholds, and optional primitive groups (e.g., chair/sofa)
   5. Maintain delete lists, save queues (best image/mask per object)
   6. Publish ROS messages: `ObjectNode`, bbox markers, label markers, colored point cloud
@@ -101,10 +101,11 @@ semantic_mapping/
 │   ├── mapping_node.py       # ROS2 node (orchestration)
 │   ├── mapper.py             # ObjectMapper class (core logic)
 │   ├── map_object.py         # Data classes: MapObject, Observation, VoxelManager
-│   ├── matching.py           # Object matching utilities (IoU, distance, track_id)
-│   ├── (moved to utils)      # Cloud-image projection helpers in vector_perception_utils
-│   ├── map_object_utils.py   # ROS message conversion helpers
-│   └── storage.py            # Async image saving queue
+│   ├── utils/
+│   │   ├── matching.py       # Object matching utilities (IoU, distance, track_id)
+│   │   ├── map_object_utils.py   # ROS message conversion helpers
+│   │   └── storage.py        # Async image saving queue
+│   ├── (moved to vector_perception_utils)  # Cloud-image projection helpers
 ├── docs/
 │   └── SEMANTIC_MAPPING_MIGRATION.md
 └── resource/
@@ -223,7 +224,7 @@ class MapObject:
 
 ---
 
-## Phase 3: Matching Utilities (matching.py)
+## Phase 3: Matching Utilities (utils/matching.py)
 
 ```python
 def compute_iou_3d(bbox1: np.ndarray, bbox2: np.ndarray) -> float:
@@ -345,10 +346,10 @@ def transform_segmented_clouds_to_world(...): ...
 
 ---
 
-## Phase 7: Message Publishers (map_object_utils.py)
+## Phase 7: Message Publishers (utils/map_object_utils.py)
 
 ```python
-def map_object_to_object_node(obj: MapObject, header: Header, status: str) -> ObjectNode | None: ...
+def map_object_to_object_node(obj: MapObject, header: Header, status: str, target_object: str | None = None) -> ObjectNode | None: ...
 def create_bbox_markers(objects: list[MapObject], header: Header) -> MarkerArray: ...
 def create_label_markers(objects: list[MapObject], header: Header) -> MarkerArray: ...
 def create_object_pointcloud(objects: list[MapObject], header: Header) -> PointCloud2 | None: ...
@@ -362,7 +363,7 @@ Publisher helpers:
 
 ---
 
-## Phase 8: Image Storage (storage.py)
+## Phase 8: Image Storage (utils/storage.py)
 
 ```python
 class ImageSaveQueue:
@@ -407,14 +408,14 @@ DeclareLaunchArgument('target_object', default_value='')
 - [x] Add `normalize_angle`, `discretize_angles` to `transform_utils.py`
 - [x] Add `get_oriented_bbox` to `pointcloud_utils.py`
 - [x] Simplify `_compute_geometry()` using Open3D oriented bbox
-- [x] Implement `matching.py` with IoU, distance, merge-compatibility functions
+- [x] Implement `utils/matching.py` with IoU, distance, merge-compatibility functions
 - [x] Implement `mapper.py` with ObjectDB-style tiering
 - [x] Implement cloud-image projection helpers in `vector_perception_utils/pointcloud_utils.py`
-- [x] Implement `map_object_utils.py` message conversion functions
-- [x] Implement `storage.py` with async queue
+- [x] Implement `utils/map_object_utils.py` message conversion functions
+- [x] Implement `utils/storage.py` with async queue
+- [x] Implement `mapping_node.py` following detect_anything patterns
 
 ### Remaining
-- [ ] Implement `mapping_node.py` following detect_anything patterns
 - [ ] Create launch files and params.yaml
 - [ ] Integration testing with bag files
 
diff --git a/semantic_mapping/semantic_mapping/mapper.py b/semantic_mapping/semantic_mapping/mapper.py
index f8a10ac..bcdfa90 100644
--- a/semantic_mapping/semantic_mapping/mapper.py
+++ b/semantic_mapping/semantic_mapping/mapper.py
@@ -7,7 +7,7 @@
 import time
 import uuid
 
-from .matching import compute_iou_3d_with_ratios, find_best_match
+from .utils.matching import compute_iou_3d_with_ratios, find_best_match
 from .map_object import MapObject, ObjectStatus, Observation
 
 
diff --git a/semantic_mapping/semantic_mapping/mapping_node.py b/semantic_mapping/semantic_mapping/mapping_node.py
new file mode 100644
index 0000000..c33bcc0
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/mapping_node.py
@@ -0,0 +1,686 @@
+"""ROS2 mapping node for semantic 3D object mapping."""
+
+from __future__ import annotations
+
+from collections import deque
+from pathlib import Path
+import time
+
+import numpy as np
+import yaml
+from cv_bridge import CvBridge
+import rclpy
+from rclpy.node import Node
+from rclpy.qos import QoSProfile, ReliabilityPolicy
+from sensor_msgs.msg import CompressedImage, PointCloud2
+from sensor_msgs_py import point_cloud2
+from nav_msgs.msg import Odometry
+from std_msgs.msg import Header
+from visualization_msgs.msg import MarkerArray
+from scipy.spatial.transform import Rotation, Slerp
+
+from detect_anything.msg import DetectionResult
+from semantic_mapping.msg import ObjectType, ObjectNode, TargetObjectInstruction, ViewpointRep
+
+from vector_perception_utils.pointcloud_utils import (
+    project_pointcloud_to_image,
+    segment_pointclouds_from_projection,
+    transform_segmented_clouds_to_world,
+)
+
+from .map_object import Observation
+from .utils.map_object_utils import (
+    create_bbox_markers,
+    create_delete_markers,
+    create_label_markers,
+    create_object_pointcloud,
+    map_object_to_object_node,
+)
+from .mapper import ObjectMapper
+from .utils.storage import ImageSaveQueue
+
+
+class SemanticMappingNode(Node):
+    """ROS2 node for semantic object mapping.
+
+    This node fuses 2D detections and LiDAR point clouds into persistent 3D objects:
+    - Buffers detections, clouds, and odometry with timestamps
+    - Interpolates odometry at detection time (position + orientation)
+    - Segments point clouds by detection masks and projects to world frame
+    - Updates the object mapper and publishes ROS messages
+    - Queues cropped images for VLM queries
+    """
+
+    def __init__(self) -> None:
+        """Initialize the mapping node, parameters, buffers, and ROS I/O."""
+        super().__init__("semantic_mapping_node")
+        self._bridge = CvBridge()
+
+        self.declare_parameter("detection_topic", "/detection_result")
+        self.declare_parameter("cloud_topic", "/registered_scan")
+        self.declare_parameter("odom_topic", "/state_estimation")
+        self.declare_parameter("viewpoint_topic", "/viewpoint_rep_header")
+        self.declare_parameter("object_type_answer_topic", "/object_type_answer")
+        self.declare_parameter("object_type_query_topic", "/object_type_query")
+        self.declare_parameter("target_object_instruction_topic", "/target_object_instruction")
+        self.declare_parameter("object_nodes_topic", "/object_nodes")
+        self.declare_parameter("obj_points_topic", "/obj_points")
+        self.declare_parameter("obj_boxes_topic", "/obj_boxes")
+        self.declare_parameter("obj_labels_topic", "/obj_labels")
+        self.declare_parameter("map_frame", "map")
+
+        self.declare_parameter("confidence_threshold", 0.3)
+        self.declare_parameter("voxel_size", 0.03)
+        self.declare_parameter("distance_threshold", 0.5)
+        self.declare_parameter("iou_threshold", 0.2)
+        self.declare_parameter("min_detections_for_permanent", 3)
+        self.declare_parameter("pending_ttl_s", 10.0)
+        self.declare_parameter("max_inactive_frames", 20)
+        self.declare_parameter("num_angle_bins", 15)
+
+        self.declare_parameter("detection_linear_state_time_bias", 0.0)
+        self.declare_parameter("detection_angular_state_time_bias", 0.0)
+        self.declare_parameter("cloud_window_before", 0.5)
+        self.declare_parameter("cloud_window_after", 0.1)
+
+        self.declare_parameter("projection", "equirect")
+        self.declare_parameter("axis_mode", "xz")
+        self.declare_parameter("image_width", 1920)
+        self.declare_parameter("image_height", 640)
+        self.declare_parameter("depth_filter", False)
+        self.declare_parameter("depth_filter_margin", 0.15)
+        self.declare_parameter("depth_jump_threshold", 0.3)
+        self.declare_parameter("sensor_to_camera_tf", "config/sensor_to_camera_tf.yaml")
+
+        self.declare_parameter("target_object", "")
+        self.declare_parameter("save_png", False)
+        self.declare_parameter("output_dir", "output/object_images")
+
+        detection_topic = str(self.get_parameter("detection_topic").value)
+        cloud_topic = str(self.get_parameter("cloud_topic").value)
+        odom_topic = str(self.get_parameter("odom_topic").value)
+        viewpoint_topic = str(self.get_parameter("viewpoint_topic").value)
+        object_type_answer_topic = str(self.get_parameter("object_type_answer_topic").value)
+        object_type_query_topic = str(self.get_parameter("object_type_query_topic").value)
+        target_object_instruction_topic = str(self.get_parameter("target_object_instruction_topic").value)
+        object_nodes_topic = str(self.get_parameter("object_nodes_topic").value)
+        obj_points_topic = str(self.get_parameter("obj_points_topic").value)
+        obj_boxes_topic = str(self.get_parameter("obj_boxes_topic").value)
+        obj_labels_topic = str(self.get_parameter("obj_labels_topic").value)
+        self._map_frame = str(self.get_parameter("map_frame").value)
+
+        self._confidence_threshold = float(self.get_parameter("confidence_threshold").value)
+        self._detection_linear_bias = float(self.get_parameter("detection_linear_state_time_bias").value)
+        self._detection_angular_bias = float(self.get_parameter("detection_angular_state_time_bias").value)
+        self._cloud_window_before = float(self.get_parameter("cloud_window_before").value)
+        self._cloud_window_after = float(self.get_parameter("cloud_window_after").value)
+
+        self._projection = str(self.get_parameter("projection").value)
+        self._axis_mode = str(self.get_parameter("axis_mode").value)
+        self._image_width = int(self.get_parameter("image_width").value)
+        self._image_height = int(self.get_parameter("image_height").value)
+        self._depth_filter = bool(self.get_parameter("depth_filter").value)
+        self._depth_filter_margin = float(self.get_parameter("depth_filter_margin").value)
+        self._depth_jump_threshold = float(self.get_parameter("depth_jump_threshold").value)
+        self._sensor_to_camera = self._load_sensor_to_camera(
+            str(self.get_parameter("sensor_to_camera_tf").value)
+        )
+
+        self._target_object = str(self.get_parameter("target_object").value)
+        self._save_png = bool(self.get_parameter("save_png").value)
+        self._output_dir = Path(str(self.get_parameter("output_dir").value))
+
+        self._mapper = ObjectMapper(
+            voxel_size=float(self.get_parameter("voxel_size").value),
+            confidence_threshold=self._confidence_threshold,
+            distance_threshold=float(self.get_parameter("distance_threshold").value),
+            iou_threshold=float(self.get_parameter("iou_threshold").value),
+            min_detections_for_permanent=int(self.get_parameter("min_detections_for_permanent").value),
+            pending_ttl_s=float(self.get_parameter("pending_ttl_s").value),
+            max_inactive_frames=int(self.get_parameter("max_inactive_frames").value),
+            num_angle_bins=int(self.get_parameter("num_angle_bins").value),
+        )
+
+        self._image_saver = ImageSaveQueue(
+            output_dir=self._output_dir,
+            max_queue_size=100,
+            save_png=self._save_png,
+        )
+        self._image_saver.start()
+
+        self._cloud_buffer: deque[tuple[float, np.ndarray]] = deque(maxlen=200)
+        self._odom_buffer: deque[tuple[float, dict]] = deque(maxlen=400)
+        self._detection_buffer: deque[DetectionResult] = deque(maxlen=100)
+        self._viewpoint_queue: deque[tuple[float, int]] = deque(maxlen=50)
+        self._processed_viewpoints: set[float] = set()
+        self._published_ids: set[int] = set()
+        self._mapping_busy = False
+
+        qos_cloud = QoSProfile(depth=10, reliability=ReliabilityPolicy.BEST_EFFORT)
+        qos_default = QoSProfile(depth=10, reliability=ReliabilityPolicy.RELIABLE)
+
+        self.create_subscription(DetectionResult, detection_topic, self._detection_callback, qos_default)
+        self.create_subscription(PointCloud2, cloud_topic, self._cloud_callback, qos_cloud)
+        self.create_subscription(Odometry, odom_topic, self._odom_callback, qos_cloud)
+        self.create_subscription(ViewpointRep, viewpoint_topic, self._viewpoint_callback, qos_default)
+        self.create_subscription(ObjectType, object_type_answer_topic, self._object_type_answer_callback, qos_default)
+        self.create_subscription(
+            TargetObjectInstruction, target_object_instruction_topic, self._target_object_callback, qos_default
+        )
+
+        self._object_type_query_pub = self.create_publisher(ObjectType, object_type_query_topic, qos_default)
+        self._object_nodes_pub = self.create_publisher(ObjectNode, object_nodes_topic, qos_default)
+        self._obj_points_pub = self.create_publisher(PointCloud2, obj_points_topic, qos_default)
+        self._obj_boxes_pub = self.create_publisher(MarkerArray, obj_boxes_topic, qos_default)
+        self._obj_labels_pub = self.create_publisher(MarkerArray, obj_labels_topic, qos_default)
+
+        self.create_timer(0.5, self._mapping_callback)
+
+    def destroy_node(self) -> bool:
+        """Stop background workers and destroy the ROS node."""
+        self._image_saver.stop()
+        return super().destroy_node()
+
+    def _detection_callback(self, msg: DetectionResult) -> None:
+        """Buffer detection results for later synchronization.
+
+        Args:
+            msg: DetectionResult message with bboxes, labels, and masks.
+        """
+        self._detection_buffer.append(msg)
+
+    def _cloud_callback(self, msg: PointCloud2) -> None:
+        """Buffer incoming point clouds with timestamps.
+
+        Args:
+            msg: PointCloud2 message in the sensor frame.
+        """
+        stamp = msg.header.stamp.sec + msg.header.stamp.nanosec / 1e9
+        points = point_cloud2.read_points_numpy(msg, field_names=("x", "y", "z"))
+        self._cloud_buffer.append((stamp, points))
+
+    def _odom_callback(self, msg: Odometry) -> None:
+        """Buffer odometry data for interpolation.
+
+        Args:
+            msg: Odometry message containing pose and twist.
+        """
+        stamp = msg.header.stamp.sec + msg.header.stamp.nanosec / 1e9
+        odom = {
+            "position": np.array(
+                [msg.pose.pose.position.x, msg.pose.pose.position.y, msg.pose.pose.position.z]
+            ),
+            "orientation": np.array(
+                [
+                    msg.pose.pose.orientation.x,
+                    msg.pose.pose.orientation.y,
+                    msg.pose.pose.orientation.z,
+                    msg.pose.pose.orientation.w,
+                ]
+            ),
+            "linear_velocity": np.array(
+                [msg.twist.twist.linear.x, msg.twist.twist.linear.y, msg.twist.twist.linear.z]
+            ),
+            "angular_velocity": np.array(
+                [msg.twist.twist.angular.x, msg.twist.twist.angular.y, msg.twist.twist.angular.z]
+            ),
+        }
+        self._odom_buffer.append((stamp, odom))
+
+    def _viewpoint_callback(self, msg: ViewpointRep) -> None:
+        """Queue viewpoint trigger requests.
+
+        Args:
+            msg: ViewpointRep message with timestamp and viewpoint id.
+        """
+        stamp = msg.header.stamp.sec + msg.header.stamp.nanosec / 1e9
+        if stamp in self._processed_viewpoints:
+            return
+        self._viewpoint_queue.append((stamp, msg.viewpoint_id))
+
+    def _target_object_callback(self, msg: TargetObjectInstruction) -> None:
+        """Update target object label used for VLM query decisions.
+
+        Args:
+            msg: TargetObjectInstruction message.
+        """
+        self._target_object = msg.target_object
+
+    def _object_type_answer_callback(self, msg: ObjectType) -> None:
+        """Apply VLM relabeling updates to tracked objects.
+
+        Args:
+            msg: ObjectType message with final label for a track id.
+        """
+        for obj in self._mapper.get_objects(include_pending=True):
+            if msg.object_id in obj.track_ids:
+                obj.class_votes[msg.final_label] = obj.class_votes.get(msg.final_label, 0) + 50
+                obj.confidence_scores[msg.final_label] = max(
+                    obj.confidence_scores.get(msg.final_label, 0.0), 1.0
+                )
+                obj.points_count[msg.final_label] = obj.points_count.get(msg.final_label, 0)
+                obj.updated_by_vlm = True
+                obj._invalidate_cache()
+                break
+
+    def _mapping_callback(self) -> None:
+        """Main mapping loop: synchronize inputs, update map, and publish outputs."""
+        if self._mapping_busy:
+            return
+        if len(self._detection_buffer) < 1 or len(self._cloud_buffer) < 1 or len(self._odom_buffer) < 2:
+            return
+
+        self._mapping_busy = True
+        try:
+            detection_msg, viewpoint_stamp, viewpoint_id = self._select_detection()
+            if detection_msg is None:
+                return
+
+            detection_stamp = detection_msg.header.stamp.sec + detection_msg.header.stamp.nanosec / 1e9
+            image = self._bridge.compressed_imgmsg_to_cv2(detection_msg.image, desired_encoding="bgr8")
+
+            odom = self._interpolate_odom(detection_stamp)
+            if odom is None:
+                return
+
+            neighbor_cloud = self._collect_cloud_window(detection_stamp)
+            if neighbor_cloud is None:
+                return
+
+            observations = self._build_observations(
+                detection_msg=detection_msg,
+                image=image,
+                odom=odom,
+                cloud=neighbor_cloud,
+            )
+            if len(observations) == 0:
+                return
+
+            self._mapper.update(observations, target_object=self._target_object)
+            self._update_best_images(observations, image)
+            self._publish_map(detection_stamp, viewpoint_id)
+            self._publish_object_type_queries()
+
+            if viewpoint_stamp is not None:
+                self._processed_viewpoints.add(viewpoint_stamp)
+        finally:
+            self._mapping_busy = False
+
+    def _select_detection(self) -> tuple[DetectionResult | None, float | None, int]:
+        """Pick the detection to process, optionally aligned to a viewpoint.
+
+        Returns:
+            Tuple of (detection_msg, viewpoint_stamp, viewpoint_id).
+        """
+        viewpoint_stamp = None
+        viewpoint_id = -1
+
+        if len(self._viewpoint_queue) > 0:
+            stamp, vid = self._viewpoint_queue[0]
+            detections = list(self._detection_buffer)
+            stamps = [msg.header.stamp.sec + msg.header.stamp.nanosec / 1e9 for msg in detections]
+            diffs = np.abs(np.array(stamps) - stamp)
+            idx = int(np.argmin(diffs))
+            if diffs[idx] <= 1.0:
+                detection_msg = detections[idx]
+                viewpoint_stamp = stamp
+                viewpoint_id = vid
+                self._viewpoint_queue.popleft()
+                return detection_msg, viewpoint_stamp, viewpoint_id
+            self._viewpoint_queue.popleft()
+
+        idx = -2 if len(self._detection_buffer) > 1 else -1
+        return self._detection_buffer[idx], viewpoint_stamp, viewpoint_id
+
+    def _interpolate_odom(self, detection_stamp: float) -> dict | None:
+        """Interpolate odometry at the detection timestamp.
+
+        Args:
+            detection_stamp: Timestamp in seconds.
+
+        Returns:
+            Interpolated odometry dict or None if interpolation is not possible.
+        """
+        target_stamp = detection_stamp + self._detection_linear_bias
+        angular_stamp = detection_stamp + self._detection_angular_bias
+        stamps = [s for s, _ in self._odom_buffer]
+        if len(stamps) < 2:
+            return None
+
+        if target_stamp < stamps[0] or target_stamp > stamps[-1]:
+            return None
+        if angular_stamp < stamps[0] or angular_stamp > stamps[-1]:
+            return None
+
+        right_idx = next(i for i, s in enumerate(stamps) if s >= target_stamp)
+        left_idx = max(right_idx - 1, 0)
+        angular_right_idx = next(i for i, s in enumerate(stamps) if s >= angular_stamp)
+        angular_left_idx = max(angular_right_idx - 1, 0)
+
+        left_stamp, left_odom = self._odom_buffer[left_idx]
+        right_stamp, right_odom = self._odom_buffer[right_idx]
+        angular_left_stamp, angular_left_odom = self._odom_buffer[angular_left_idx]
+        angular_right_stamp, angular_right_odom = self._odom_buffer[angular_right_idx]
+        denom = right_stamp - left_stamp
+        ratio = (target_stamp - left_stamp) / denom if denom > 0 else 0.5
+        angular_denom = angular_right_stamp - angular_left_stamp
+        angular_ratio = (angular_stamp - angular_left_stamp) / angular_denom if angular_denom > 0 else 0.5
+
+        camera_odom = {}
+        camera_odom["position"] = right_odom["position"] * ratio + left_odom["position"] * (1 - ratio)
+        camera_odom["linear_velocity"] = (
+            right_odom["linear_velocity"] * ratio + left_odom["linear_velocity"] * (1 - ratio)
+        )
+        rotations = Rotation.from_quat(
+            [angular_left_odom["orientation"], angular_right_odom["orientation"]]
+        )
+        slerp = Slerp([0, 1], rotations)
+        camera_odom["orientation"] = slerp(angular_ratio).as_quat()
+        camera_odom["angular_velocity"] = (
+            angular_right_odom["angular_velocity"] * angular_ratio
+            + angular_left_odom["angular_velocity"] * (1 - angular_ratio)
+        )
+        return camera_odom
+
+    def _collect_cloud_window(self, detection_stamp: float) -> np.ndarray | None:
+        """Aggregate point clouds around a detection timestamp.
+
+        Args:
+            detection_stamp: Timestamp in seconds.
+
+        Returns:
+            Concatenated point cloud or None if no data in window.
+        """
+        start = detection_stamp - self._cloud_window_before
+        end = detection_stamp + self._cloud_window_after
+        clouds = [pts for stamp, pts in self._cloud_buffer if start <= stamp <= end]
+        if len(clouds) == 0:
+            return None
+        return np.concatenate(clouds, axis=0)
+
+    def _build_observations(
+        self,
+        detection_msg: DetectionResult,
+        image: np.ndarray,
+        odom: dict,
+        cloud: np.ndarray,
+    ) -> list[Observation]:
+        """Build Observation objects from detections and segmented point clouds.
+
+        Args:
+            detection_msg: DetectionResult with bboxes, labels, and masks.
+            image: RGB image used for masks and cropping.
+            odom: Interpolated odometry dict (position + orientation).
+            cloud: Aggregated point cloud in sensor frame.
+
+        Returns:
+            List of Observation instances for object updates.
+        """
+        det_stamp = detection_msg.header.stamp.sec + detection_msg.header.stamp.nanosec / 1e9
+        image_shape = image.shape[:2]
+
+        masks = []
+        labels = []
+        confidences = []
+        track_ids = []
+
+        for i in range(len(detection_msg.track_id)):
+            conf = float(detection_msg.confidence[i])
+            if conf < self._confidence_threshold:
+                continue
+            x1 = int(detection_msg.x1[i])
+            y1 = int(detection_msg.y1[i])
+            x2 = int(detection_msg.x2[i])
+            y2 = int(detection_msg.y2[i])
+            x1 = max(0, min(x1, image_shape[1] - 1))
+            x2 = max(0, min(x2, image_shape[1]))
+            y1 = max(0, min(y1, image_shape[0] - 1))
+            y2 = max(0, min(y2, image_shape[0]))
+            if x2 <= x1 or y2 <= y1:
+                continue
+
+            full_mask = np.zeros(image_shape, dtype=bool)
+            if i < len(detection_msg.masks):
+                mask_msg: CompressedImage = detection_msg.masks[i]
+                mask_crop = self._bridge.compressed_imgmsg_to_cv2(mask_msg, desired_encoding="mono8")
+                mask_bool = mask_crop.astype(bool)
+                crop_h = min(mask_bool.shape[0], y2 - y1)
+                crop_w = min(mask_bool.shape[1], x2 - x1)
+                full_mask[y1 : y1 + crop_h, x1 : x1 + crop_w] = mask_bool[:crop_h, :crop_w]
+
+            masks.append(full_mask)
+            labels.append(detection_msg.label[i])
+            confidences.append(conf)
+            track_ids.append(int(detection_msg.track_id[i]))
+
+        if len(masks) == 0:
+            return []
+
+        R_b2w = Rotation.from_quat(odom["orientation"]).as_matrix()
+        t_b2w = np.array(odom["position"])
+        R_w2b = R_b2w.T
+        t_w2b = -R_w2b @ t_b2w
+        cloud_body = cloud @ R_w2b.T + t_w2b
+
+        _, pixel_idx = project_pointcloud_to_image(
+            points=cloud_body,
+            projection=self._projection,
+            image_width=self._image_width,
+            image_height=self._image_height,
+            axis_mode=self._axis_mode,
+            intrinsics=None,
+            depth_filter=self._depth_filter,
+            depth_filter_margin=self._depth_filter_margin,
+            transform=self._sensor_to_camera,
+        )
+        seg_clouds, seg_depths = segment_pointclouds_from_projection(
+            points=cloud_body,
+            masks=masks,
+            pixel_idx=pixel_idx,
+        )
+        world_clouds = transform_segmented_clouds_to_world(
+            clouds=seg_clouds,
+            depths=seg_depths,
+            R_b2w=R_b2w,
+            t_b2w=t_b2w,
+            depth_jump_threshold=self._depth_jump_threshold,
+        )
+
+        observations = []
+        for idx, world_cloud in enumerate(world_clouds):
+            if world_cloud.shape[0] == 0:
+                continue
+            observations.append(
+                Observation(
+                    track_id=track_ids[idx],
+                    label=labels[idx],
+                    confidence=confidences[idx],
+                    points=world_cloud,
+                    robot_R=R_b2w,
+                    robot_t=t_b2w,
+                    mask=masks[idx],
+                    image=image,
+                    timestamp=det_stamp,
+                )
+            )
+
+        return observations
+
+    def _update_best_images(self, observations: list[Observation], image: np.ndarray) -> None:
+        """Update best image crops for objects based on mask area.
+
+        Args:
+            observations: Observations created for the current frame.
+            image: Full RGB image associated with the detections.
+        """
+        for obs in observations:
+            obj = None
+            for candidate in self._mapper.get_objects(include_pending=True):
+                if obs.track_id in candidate.track_ids:
+                    obj = candidate
+                    break
+            if obj is None or obs.mask is None:
+                continue
+
+            coords = np.argwhere(obs.mask)
+            if coords.size == 0:
+                continue
+            mask_area = coords.shape[0]
+            if mask_area <= obj.best_image_score:
+                continue
+
+            y0, x0 = coords.min(axis=0)
+            y1, x1 = coords.max(axis=0) + 1
+            padding = max(int(y1 - y0), int(x1 - x0), 80)
+            y0 = max(0, y0 - padding)
+            x0 = max(0, x0 - padding)
+            y1 = min(obs.mask.shape[0], y1 + padding)
+            x1 = min(obs.mask.shape[1], x1 + padding)
+
+            if obj.best_image_path:
+                image_path = Path(obj.best_image_path)
+                mask_path = image_path.with_name(image_path.stem + "_mask.npy")
+                if image_path.exists():
+                    image_path.unlink()
+                if mask_path.exists():
+                    mask_path.unlink()
+                if self._save_png:
+                    png_path = image_path.with_suffix(".png")
+                    mask_png_path = image_path.with_name(image_path.stem + "_mask.png")
+                    if png_path.exists():
+                        png_path.unlink()
+                    if mask_png_path.exists():
+                        mask_png_path.unlink()
+
+            cropped_image = image[y0:y1, x0:x1]
+            cropped_mask = obs.mask[y0:y1, x0:x1]
+
+            obj.best_image_path = self._image_saver.queue_save(
+                object_id=str(obs.track_id),
+                image=cropped_image,
+                mask=cropped_mask,
+            )
+            obj.best_image_score = float(mask_area)
+            obj.is_asked_vlm = False
+
+    def _publish_map(self, stamp: float, viewpoint_id: int) -> None:
+        """Publish object nodes, markers, and aggregated point clouds.
+
+        Args:
+            stamp: Timestamp (seconds) for message headers.
+            viewpoint_id: Viewpoint id for ObjectNode messages.
+        """
+        seconds = int(stamp)
+        nanoseconds = int((stamp - seconds) * 1e9)
+        header = Header()
+        header.stamp.sec = seconds
+        header.stamp.nanosec = nanoseconds
+        header.frame_id = self._map_frame
+
+        objects = self._mapper.get_objects(include_pending=True)
+        for obj in objects:
+            if obj.detection_count == 1:
+                status = "new"
+            elif obj.inactive_frames == 0:
+                status = "updated"
+            else:
+                status = "unchanged"
+            msg = map_object_to_object_node(
+                obj,
+                header,
+                status=status,
+                viewpoint_id=viewpoint_id,
+                target_object=self._target_object,
+            )
+            if msg is not None:
+                self._object_nodes_pub.publish(msg)
+
+        bbox_markers = create_bbox_markers(objects, header)
+        label_markers = create_label_markers(objects, header)
+        self._obj_boxes_pub.publish(bbox_markers)
+        self._obj_labels_pub.publish(label_markers)
+
+        obj_cloud = create_object_pointcloud(objects, header)
+        if obj_cloud is not None:
+            self._obj_points_pub.publish(obj_cloud)
+
+        current_ids = {tid for obj in objects for tid in obj.track_ids}
+        deleted_ids = list(self._published_ids - current_ids)
+        if len(deleted_ids) > 0:
+            delete_markers = create_delete_markers(deleted_ids, header, namespace="bbox")
+            self._obj_boxes_pub.publish(delete_markers)
+            delete_labels = create_delete_markers(deleted_ids, header, namespace="labels")
+            self._obj_labels_pub.publish(delete_labels)
+        self._published_ids = current_ids
+
+    def _publish_object_type_queries(self) -> None:
+        """Publish VLM queries for target objects with good image crops."""
+        for obj in self._mapper.get_objects(include_pending=False):
+            if obj.label != self._target_object:
+                continue
+            if obj.is_asked_vlm:
+                continue
+            if obj.best_image_score <= 500:
+                continue
+            msg = ObjectType()
+            msg.object_id = int(obj.track_ids[0]) if obj.track_ids else -1
+            msg.img_path = obj.best_image_path or ""
+            msg.labels = list(obj.class_votes.keys())
+            self._object_type_query_pub.publish(msg)
+            obj.is_asked_vlm = True
+
+    def _load_sensor_to_camera(self, path: str) -> np.ndarray | None:
+        """Load sensor->camera transform from YAML.
+
+        Args:
+            path: Path to YAML config with translation and rpy.
+
+        Returns:
+            4x4 transform matrix or None if config not found.
+        """
+        config_path = Path(path)
+        if not config_path.is_absolute():
+            config_path = Path(__file__).resolve().parent.parent / config_path
+        if not config_path.exists():
+            return None
+
+        data = yaml.safe_load(config_path.read_text()) or {}
+        tf_data = data.get("sensor_to_camera", {})
+        trans = tf_data.get("translation", {})
+        rpy = tf_data.get("rotation_rpy", {})
+        translation = np.array(
+            [
+                float(trans.get("x", 0.0)),
+                float(trans.get("y", 0.0)),
+                float(trans.get("z", 0.0)),
+            ]
+        )
+        rotation = Rotation.from_euler(
+            "xyz",
+            [
+                float(rpy.get("roll", 0.0)),
+                float(rpy.get("pitch", 0.0)),
+                float(rpy.get("yaw", 0.0)),
+            ],
+        ).as_matrix()
+        transform = np.eye(4)
+        transform[:3, :3] = rotation
+        transform[:3, 3] = translation
+        return transform
+
+
+def main(args: list[str] | None = None) -> None:
+    """Entry point for the semantic mapping node."""
+    rclpy.init(args=args)
+    node = SemanticMappingNode()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        node.destroy_node()
+        rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/semantic_mapping/semantic_mapping/utils/__init__.py b/semantic_mapping/semantic_mapping/utils/__init__.py
new file mode 100644
index 0000000..d949aeb
--- /dev/null
+++ b/semantic_mapping/semantic_mapping/utils/__init__.py
@@ -0,0 +1 @@
+"""Utility modules for semantic mapping."""
diff --git a/semantic_mapping/semantic_mapping/map_object_utils.py b/semantic_mapping/semantic_mapping/utils/map_object_utils.py
similarity index 95%
rename from semantic_mapping/semantic_mapping/map_object_utils.py
rename to semantic_mapping/semantic_mapping/utils/map_object_utils.py
index 6bc3d8d..b098c8c 100644
--- a/semantic_mapping/semantic_mapping/map_object_utils.py
+++ b/semantic_mapping/semantic_mapping/utils/map_object_utils.py
@@ -14,7 +14,7 @@
 from semantic_mapping.msg import ObjectNode
 from vector_perception_utils.pointcloud_utils import create_pointcloud2_msg
 
-from .map_object import MapObject
+from ..map_object import MapObject
 
 
 def map_object_to_object_node(
@@ -22,6 +22,7 @@ def map_object_to_object_node(
     header: Header,
     status: str = "new",
     viewpoint_id: int = -1,
+    target_object: str | None = None,
 ) -> Optional[ObjectNode]:
     """Convert a MapObject into an ObjectNode message.
 
@@ -30,6 +31,7 @@ def map_object_to_object_node(
         header: ROS header to attach to the message.
         status: Lifecycle status string ("new", "updated", "unchanged").
         viewpoint_id: Viewpoint identifier for image saving.
+        target_object: Target object label for potential labeling.
 
     Returns:
         ObjectNode message or None if required geometry is missing.
@@ -40,7 +42,10 @@ def map_object_to_object_node(
     msg = ObjectNode()
     msg.header = header
     msg.object_id = obj.track_ids
-    msg.label = obj.label
+    if target_object and obj.label == target_object and not obj.is_asked_vlm:
+        msg.label = "Potential Target"
+    else:
+        msg.label = obj.label
     msg.status = status
     msg.position.x = float(obj.centroid[0])
     msg.position.y = float(obj.centroid[1])
diff --git a/semantic_mapping/semantic_mapping/matching.py b/semantic_mapping/semantic_mapping/utils/matching.py
similarity index 98%
rename from semantic_mapping/semantic_mapping/matching.py
rename to semantic_mapping/semantic_mapping/utils/matching.py
index 418521e..fcfb7cd 100644
--- a/semantic_mapping/semantic_mapping/matching.py
+++ b/semantic_mapping/semantic_mapping/utils/matching.py
@@ -8,7 +8,7 @@
 
 from vector_perception_utils.pointcloud_utils import compute_centroid, get_oriented_bbox
 
-from .map_object import MapObject, Observation
+from ..map_object import MapObject, Observation
 
 
 def _convex_hull_2d(points: np.ndarray) -> np.ndarray:
@@ -208,6 +208,8 @@ def find_best_match(
             return candidate
 
         cand_center = candidate.centroid
+        if cand_center is None:
+            continue
 
         dist = float(np.linalg.norm(obs_center - cand_center))
         if dist > distance_threshold:
diff --git a/semantic_mapping/semantic_mapping/storage.py b/semantic_mapping/semantic_mapping/utils/storage.py
similarity index 100%
rename from semantic_mapping/semantic_mapping/storage.py
rename to semantic_mapping/semantic_mapping/utils/storage.py

From 1cf8b9ec2f506401cd44ecbc4ff522185f83892f Mon Sep 17 00:00:00 2001
From: alexlin2 <alex.lin416@outlook.com>
Date: Sat, 24 Jan 2026 14:55:40 -0500
Subject: [PATCH 5/6] Working detections, tuning in progress

---
 detect_anything/detect_anything/yoloe.py      |   3 +
 rviz/semantic_mapping.rviz                    | 267 ++++++++++++++++++
 semantic_mapping/README.md                    |   3 +-
 semantic_mapping/config/params.yaml           |  55 ++++
 .../config/sensor_to_camera_tf.yaml           |   4 +-
 .../docs/SEMANTIC_MAPPING_MIGRATION.md        |  55 ++--
 .../launch/semantic_mapping.launch.py         | 137 +++++++++
 .../scripts/semantic_mapping_node             |   0
 .../semantic_mapping/map_object.py            | 180 ++++++++----
 semantic_mapping/semantic_mapping/mapper.py   |  64 ++++-
 .../semantic_mapping/mapping_node.py          | 197 ++++++++++++-
 .../utils/map_object_utils.py                 |  54 +++-
 .../semantic_mapping/utils/matching.py        | 126 +++++----
 .../semantic_mapping/utils/storage.py         |   9 +-
 .../pointcloud_utils.py                       | 141 ++++++++-
 15 files changed, 1120 insertions(+), 175 deletions(-)
 create mode 100644 rviz/semantic_mapping.rviz
 create mode 100644 semantic_mapping/config/params.yaml
 create mode 100644 semantic_mapping/launch/semantic_mapping.launch.py
 mode change 100644 => 100755 semantic_mapping/scripts/semantic_mapping_node

diff --git a/detect_anything/detect_anything/yoloe.py b/detect_anything/detect_anything/yoloe.py
index c0695c3..303afb9 100644
--- a/detect_anything/detect_anything/yoloe.py
+++ b/detect_anything/detect_anything/yoloe.py
@@ -9,6 +9,7 @@
 import numpy as np
 import torch
 from ultralytics import YOLOE  # type: ignore[attr-defined, import-not-found]
+from ultralytics.utils import SETTINGS
 
 from detect_anything.detection import (
     DetectionResults,
@@ -41,6 +42,8 @@ def __init__(
             model_name = "yoloe-11s-seg-pf.pt" if prompt_mode == YoloePromptMode.LRPC else "yoloe-11s-seg.pt"
 
         model_dir = Path(model_path) if model_path else Path("models/yoloe")
+        model_dir = model_dir.expanduser().resolve()
+        SETTINGS["weights_dir"] = str(model_dir)
         self.model = YOLOE(model_dir / model_name)
         self.prompt_mode = prompt_mode
         self._visual_prompts: dict[str, np.ndarray] | None = None
diff --git a/rviz/semantic_mapping.rviz b/rviz/semantic_mapping.rviz
new file mode 100644
index 0000000..d56223b
--- /dev/null
+++ b/rviz/semantic_mapping.rviz
@@ -0,0 +1,267 @@
+Panels:
+  - Class: rviz_common/Displays
+    Help Height: 78
+    Name: Displays
+    Property Tree Widget:
+      Expanded:
+        - /Global Options1
+        - /Status1
+        - /Odometry1
+      Splitter Ratio: 0.5
+    Tree Height: 413
+  - Class: rviz_common/Selection
+    Name: Selection
+  - Class: rviz_common/Tool Properties
+    Expanded:
+      - /2D Goal Pose1
+      - /Publish Point1
+    Name: Tool Properties
+    Splitter Ratio: 0.5886790156364441
+  - Class: rviz_common/Views
+    Expanded:
+      - /Current View1
+    Name: Views
+    Splitter Ratio: 0.5
+  - Class: rviz_common/Time
+    Experimental: false
+    Name: Time
+    SyncMode: 0
+    SyncSource: PointCloud2
+Visualization Manager:
+  Class: ""
+  Displays:
+    - Alpha: 0.5
+      Cell Size: 1
+      Class: rviz_default_plugins/Grid
+      Color: 160; 160; 164
+      Enabled: true
+      Line Style:
+        Line Width: 0.029999999329447746
+        Value: Lines
+      Name: Grid
+      Normal Cell Count: 0
+      Offset:
+        X: 0
+        Y: 0
+        Z: 0
+      Plane: XY
+      Plane Cell Count: 10
+      Reference Frame: <Fixed Frame>
+      Value: true
+    - Class: rviz_default_plugins/Image
+      Enabled: true
+      Max Value: 1
+      Median window: 5
+      Min Value: 0
+      Name: Image
+      Normalize Range: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /annotated_image_detection
+      Value: true
+    - Angle Tolerance: 0.10000000149011612
+      Class: rviz_default_plugins/Odometry
+      Covariance:
+        Orientation:
+          Alpha: 0.5
+          Color: 255; 255; 127
+          Color Style: Unique
+          Frame: Local
+          Offset: 1
+          Scale: 1
+          Value: true
+        Position:
+          Alpha: 0.30000001192092896
+          Color: 204; 51; 204
+          Scale: 1
+          Value: true
+        Value: false
+      Enabled: true
+      Keep: 5
+      Name: Odometry
+      Position Tolerance: 0.10000000149011612
+      Shape:
+        Alpha: 1
+        Axes Length: 1
+        Axes Radius: 0.10000000149011612
+        Color: 255; 25; 0
+        Head Length: 0.30000001192092896
+        Head Radius: 0.10000000149011612
+        Shaft Length: 1
+        Shaft Radius: 0.05000000074505806
+        Value: Arrow
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        Filter size: 10
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /state_estimation
+      Value: true
+    - Alpha: 0.10000000149011612
+      Autocompute Intensity Bounds: true
+      Autocompute Value Bounds:
+        Max Value: 10
+        Min Value: -10
+        Value: true
+      Axis: Z
+      Channel Name: intensity
+      Class: rviz_default_plugins/PointCloud2
+      Color: 255; 255; 255
+      Color Transformer: FlatColor
+      Decay Time: 3
+      Enabled: true
+      Invert Rainbow: false
+      Max Color: 255; 255; 255
+      Max Intensity: 165
+      Min Color: 0; 0; 0
+      Min Intensity: 0
+      Name: PointCloud2
+      Position Transformer: XYZ
+      Selectable: true
+      Size (Pixels): 3
+      Size (m): 0.004999999888241291
+      Style: Flat Squares
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /registered_scan
+      Use Fixed Frame: true
+      Use rainbow: true
+      Value: true
+    - Class: rviz_default_plugins/MarkerArray
+      Enabled: true
+      Name: MarkerArray
+      Namespaces:
+        bbox: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /obj_boxes
+      Value: true
+    - Alpha: 1
+      Autocompute Intensity Bounds: true
+      Autocompute Value Bounds:
+        Max Value: 10
+        Min Value: -10
+        Value: true
+      Axis: Z
+      Channel Name: intensity
+      Class: rviz_default_plugins/PointCloud2
+      Color: 255; 255; 255
+      Color Transformer: RGB8
+      Decay Time: 0
+      Enabled: true
+      Invert Rainbow: false
+      Max Color: 255; 255; 255
+      Max Intensity: 4096
+      Min Color: 0; 0; 0
+      Min Intensity: 0
+      Name: PointCloud2
+      Position Transformer: XYZ
+      Selectable: true
+      Size (Pixels): 3
+      Size (m): 0.009999999776482582
+      Style: Flat Squares
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /obj_points
+      Use Fixed Frame: true
+      Use rainbow: true
+      Value: true
+  Enabled: true
+  Global Options:
+    Background Color: 48; 48; 48
+    Fixed Frame: map
+    Frame Rate: 30
+  Name: root
+  Tools:
+    - Class: rviz_default_plugins/Interact
+      Hide Inactive Objects: true
+    - Class: rviz_default_plugins/MoveCamera
+    - Class: rviz_default_plugins/Select
+    - Class: rviz_default_plugins/FocusCamera
+    - Class: rviz_default_plugins/Measure
+      Line color: 128; 128; 0
+    - Class: rviz_default_plugins/SetInitialPose
+      Covariance x: 0.25
+      Covariance y: 0.25
+      Covariance yaw: 0.06853891909122467
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /initialpose
+    - Class: rviz_default_plugins/SetGoal
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /goal_pose
+    - Class: rviz_default_plugins/PublishPoint
+      Single click: true
+      Topic:
+        Depth: 5
+        Durability Policy: Volatile
+        History Policy: Keep Last
+        Reliability Policy: Reliable
+        Value: /clicked_point
+  Transformation:
+    Current:
+      Class: rviz_default_plugins/TF
+  Value: true
+  Views:
+    Current:
+      Class: rviz_default_plugins/Orbit
+      Distance: 21.720943450927734
+      Enable Stereo Rendering:
+        Stereo Eye Separation: 0.05999999865889549
+        Stereo Focal Distance: 1
+        Swap Stereo Eyes: false
+        Value: false
+      Focal Point:
+        X: 3.1760172843933105
+        Y: 11.240063667297363
+        Z: 2.561012029647827
+      Focal Shape Fixed Size: false
+      Focal Shape Size: 0.05000000074505806
+      Invert Z Axis: false
+      Name: Current View
+      Near Clip Distance: 0.009999999776482582
+      Pitch: 1.1497966051101685
+      Target Frame: <Fixed Frame>
+      Value: Orbit (rviz_default_plugins)
+      Yaw: 3.4885518550872803
+    Saved: ~
+Window Geometry:
+  Displays:
+    collapsed: false
+  Height: 996
+  Hide Left Dock: false
+  Hide Right Dock: false
+  Image:
+    collapsed: false
+  QMainWindow State: 000000ff00000000fd00000004000000000000027b00000342fc0200000009fb0000001200530065006c0065006300740069006f006e00000001e10000009b0000005d00fffffffb0000001e0054006f006f006c002000500072006f007000650072007400690065007302000001ed000001df00000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb000000100044006900730070006c006100790073010000003f0000022b000000cc00fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261fb0000000a0049006d0061006700650100000270000001110000001700ffffff000000010000010f00000342fc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a00560069006500770073010000003f00000342000000a900fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e10000019700000003000006b90000003efc0100000002fb0000000800540069006d00650100000000000006b90000026f00fffffffb0000000800540069006d00650100000000000004500000000000000000000003230000034200000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
+  Selection:
+    collapsed: false
+  Time:
+    collapsed: false
+  Tool Properties:
+    collapsed: false
+  Views:
+    collapsed: false
+  Width: 1721
+  X: 3248
+  Y: 331
diff --git a/semantic_mapping/README.md b/semantic_mapping/README.md
index 3428c0b..d7f6c88 100644
--- a/semantic_mapping/README.md
+++ b/semantic_mapping/README.md
@@ -19,7 +19,7 @@ The pipeline stores voxelized geometry, merges observations, and exports planner
 | Publishers | `utils/map_object_utils.py` | Complete |
 | Image Storage | `utils/storage.py` | Complete |
 | Mapping Node | `mapping_node.py` | Complete |
-| Launch Files | `launch/` | Pending |
+| Launch Files | `launch/` | Complete |
 
 ## Messages
 
@@ -104,6 +104,7 @@ ros2 launch semantic_mapping semantic_mapping.launch.py
 
 ## Configuration
 
+- `config/params.yaml`: Default node parameters for `semantic_mapping_node`.
 - `config/sensor_to_camera_tf.yaml`: Static sensor→camera transform for projection.
 - `config/objects.yaml`: Object taxonomy for detection labels.
 
diff --git a/semantic_mapping/config/params.yaml b/semantic_mapping/config/params.yaml
new file mode 100644
index 0000000..370d9c9
--- /dev/null
+++ b/semantic_mapping/config/params.yaml
@@ -0,0 +1,55 @@
+semantic_mapping_node:
+  ros__parameters:
+    detection_topic: /detection_result
+    cloud_topic: /registered_scan
+    odom_topic: /state_estimation
+    viewpoint_topic: /viewpoint_rep_header
+    object_type_answer_topic: /object_type_answer
+    object_type_query_topic: /object_type_query
+    target_object_instruction_topic: /target_object_instruction
+    object_nodes_topic: /object_nodes
+    obj_points_topic: /obj_points
+    obj_boxes_topic: /obj_boxes
+    obj_labels_topic: /obj_labels
+    map_frame: map
+
+    confidence_threshold: 0.35
+    voxel_size: 0.03
+    distance_threshold: 0.3
+    iou_threshold: 0.5
+    min_detections_for_permanent: 3
+    pending_ttl_s: 10.0
+    max_inactive_frames: 20
+    num_angle_bins: 15
+    dbscan_eps_scale: 2.0
+    dbscan_min_points_initial: 6
+    dbscan_min_points_mature: 12
+
+    detection_linear_state_time_bias: 0.0
+    detection_angular_state_time_bias: 0.0
+    cloud_window_before: 0.5
+    cloud_window_after: 0.1
+
+    projection: equirect
+    axis_mode: xz
+    image_width: 1920
+    image_height: 640
+    depth_filter: true
+    depth_filter_margin: 0.15
+    depth_jump_threshold: 0.3
+    sensor_to_camera_tf: config/sensor_to_camera_tf.yaml
+    mask_erode_kernel: 4
+    mask_erode_iterations: 3
+    max_cloud_distance: 8.0
+    outlier_filter: true
+    outlier_nb_neighbors: 12
+    outlier_std_ratio: 1.6
+    outlier_min_points: 20
+    visualization_max_objects: 50
+    ground_filter: true
+    ground_radius: 2.0
+    ground_z_clearance: 0.05
+
+    target_object: refrigerator
+    save_png: True
+    output_dir: output/object_images
diff --git a/semantic_mapping/config/sensor_to_camera_tf.yaml b/semantic_mapping/config/sensor_to_camera_tf.yaml
index baa6724..e48c06f 100644
--- a/semantic_mapping/config/sensor_to_camera_tf.yaml
+++ b/semantic_mapping/config/sensor_to_camera_tf.yaml
@@ -2,8 +2,8 @@ sensor_to_camera:
   parent_frame: lidar
   child_frame: camera
   translation:
-    x: 0.0
-    y: 0.0
+    x: -0.12
+    y: -0.075
     z: 0.265
   rotation_rpy:
     roll: -1.5707963
diff --git a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
index e02e8d0..07ef2c5 100644
--- a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
+++ b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
@@ -28,8 +28,8 @@ This document outlines the comprehensive plan for porting the `semantic_mapping`
 | 6 | Message Publishers (`utils/map_object_utils.py`) | **COMPLETED** |
 | 7 | Image Storage (`utils/storage.py`) | **COMPLETED** |
 | 8 | Mapping Node (`mapping_node.py`) | **COMPLETED** |
-| 9 | Launch Files & Config | Pending |
-| 10 | Integration Testing | Pending |
+| 9 | Launch Files & Config | **COMPLETED** |
+| 10 | Integration Testing | **COMPLETED** |
 
 ### Step 2.1: Refactored map_object.py to use shared utilities
 
@@ -93,7 +93,7 @@ semantic_mapping/
 │   └── ViewpointRep.msg
 ├── config/
 │   ├── objects.yaml          # Object taxonomy (to be copied from VLM_ROS)
-│   └── params.yaml           # Default node parameters (to be created)
+│   └── params.yaml           # Default node parameters
 ├── launch/
 │   └── semantic_mapping.launch.py
 ├── semantic_mapping/
@@ -416,8 +416,8 @@ DeclareLaunchArgument('target_object', default_value='')
 - [x] Implement `mapping_node.py` following detect_anything patterns
 
 ### Remaining
-- [ ] Create launch files and params.yaml
-- [ ] Integration testing with bag files
+- [x] Create launch files and params.yaml
+- [x] Integration testing with bag files
 
 ---
 
@@ -437,19 +437,38 @@ Items to remove:
 
 ## Compatibility Checklist
 
-- [ ] `/object_nodes` publishes ObjectNode with exact schema
-- [ ] `/obj_points` publishes colored PointCloud2
-- [ ] `/obj_boxes` publishes MarkerArray wireframes
-- [ ] `/obj_labels` publishes MarkerArray text
-- [ ] `/object_type_query` publishes ObjectType for VLM
-- [ ] Subscribes to `/detection_result` from detect_anything
-- [ ] Subscribes to `/registered_scan` PointCloud2
-- [ ] Subscribes to `/state_estimation` Odometry
-- [ ] Subscribes to `/target_object_instruction`
-- [ ] Subscribes to `/viewpoint_rep_header`
-- [ ] Subscribes to `/object_type_answer`
-- [ ] Object positions, bbox corners, status flags match expected semantics
-- [ ] Delete markers properly remove objects from RViz
+- [x] `/object_nodes` publishes ObjectNode with exact schema
+- [x] `/obj_points` publishes colored PointCloud2
+- [x] `/obj_boxes` publishes MarkerArray wireframes
+- [x] `/obj_labels` publishes MarkerArray text
+- [x] `/object_type_query` publishes ObjectType for VLM
+- [x] Subscribes to `/detection_result` from detect_anything
+- [x] Subscribes to `/registered_scan` PointCloud2
+- [x] Subscribes to `/state_estimation` Odometry
+- [x] Subscribes to `/target_object_instruction`
+- [x] Subscribes to `/viewpoint_rep_header`
+- [x] Subscribes to `/object_type_answer`
+- [x] Object positions, bbox corners, status flags match expected semantics
+- [x] Delete markers properly remove objects from RViz
+
+---
+
+## Recent Updates (Jan 2026)
+
+- Unified launch to `semantic_mapping.launch.py` with mode handling, detect_anything integration, and bagfile defaults.
+- Fixed cloud/image projection transform to match legacy `CloudImageFusion` math.
+- Added mask erosion, depth-jump pruning, distance gating, and light outlier filtering for cleaner object clouds.
+- Added reprojected observation-angle voting for better multi-view aggregation.
+- Fixed bbox ordering and switched `/obj_boxes` to axis-aligned markers (via `get_oriented_bbox(axis_aligned=True)`).
+- Updated DBSCAN regularization to v2 logic and exposed DBSCAN tuning params.
+- Fixed image saving color order and ensured output dir is cleared on start.
+- Set MobileCLIP weights cache to `detect_anything/models` and removed corrupted caches.
+
+## Temporary Handoff Notes
+
+- Add ground-height clamp to further suppress ground/background leakage.
+- If bagfile sync issues persist, consider switching mapping inputs to `/sensor_scan` + `/state_estimation_at_scan`.
+- Verify VLM objects.yaml path in VLM_ROS (`vlm_reasoning_node` still expects old path).
 
 ---
 
diff --git a/semantic_mapping/launch/semantic_mapping.launch.py b/semantic_mapping/launch/semantic_mapping.launch.py
new file mode 100644
index 0000000..0b7d8ab
--- /dev/null
+++ b/semantic_mapping/launch/semantic_mapping.launch.py
@@ -0,0 +1,137 @@
+"""Launch semantic mapping and detect_anything with a mode selector."""
+
+import os
+
+from ament_index_python.packages import get_package_share_directory
+from launch import LaunchDescription
+from launch.actions import DeclareLaunchArgument, OpaqueFunction
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import Node
+
+
+def _parse_bool(value: str, default: bool) -> bool:
+    if value == "":
+        return default
+    return value.lower() in ("true", "1", "yes")
+
+
+def _launch_nodes(context):
+    mode = LaunchConfiguration("mode").perform(context).lower()
+    use_sim_time_arg = LaunchConfiguration("use_sim_time").perform(context)
+    use_compressed_arg = LaunchConfiguration("use_compressed").perform(context)
+    odom_topic_arg = LaunchConfiguration("odom_topic").perform(context)
+
+    use_sim_time = _parse_bool(use_sim_time_arg, default=mode == "bagfile")
+    use_compressed = _parse_bool(use_compressed_arg, default=mode in ("bagfile", "sim"))
+
+    if odom_topic_arg:
+        odom_topic = odom_topic_arg
+    else:
+        odom_topic = "/aft_mapped_to_init_incremental" if mode == "bagfile" else "/state_estimation"
+
+    params_file = LaunchConfiguration("params_file").perform(context)
+    sensor_to_camera_tf = LaunchConfiguration("sensor_to_camera_tf").perform(context)
+    detection_topic = LaunchConfiguration("detection_topic").perform(context)
+    cloud_topic = LaunchConfiguration("cloud_topic").perform(context)
+    viewpoint_topic = LaunchConfiguration("viewpoint_topic").perform(context)
+    object_type_answer_topic = LaunchConfiguration("object_type_answer_topic").perform(context)
+    object_type_query_topic = LaunchConfiguration("object_type_query_topic").perform(context)
+    target_object_instruction_topic = LaunchConfiguration("target_object_instruction_topic").perform(
+        context
+    )
+    object_nodes_topic = LaunchConfiguration("object_nodes_topic").perform(context)
+    obj_points_topic = LaunchConfiguration("obj_points_topic").perform(context)
+    obj_boxes_topic = LaunchConfiguration("obj_boxes_topic").perform(context)
+    obj_labels_topic = LaunchConfiguration("obj_labels_topic").perform(context)
+    map_frame = LaunchConfiguration("map_frame").perform(context)
+    target_object = LaunchConfiguration("target_object").perform(context)
+    save_png = _parse_bool(LaunchConfiguration("save_png").perform(context), default=False)
+    output_dir = LaunchConfiguration("output_dir").perform(context)
+    image_topic = LaunchConfiguration("image_topic").perform(context)
+    compressed_image_topic = LaunchConfiguration("compressed_image_topic").perform(context)
+    conf = float(LaunchConfiguration("conf").perform(context))
+    max_area_ratio = float(LaunchConfiguration("max_area_ratio").perform(context))
+
+    detect_anything_node = Node(
+        package="detect_anything",
+        executable="detection_node",
+        name="detect_anything_detection_node",
+        output="screen",
+        parameters=[
+            {
+                "use_sim_time": use_sim_time,
+                "use_compressed": use_compressed,
+                "image_topic": image_topic,
+                "compressed_image_topic": compressed_image_topic,
+                "conf": conf,
+                "max_area_ratio": max_area_ratio,
+            }
+        ],
+    )
+
+    semantic_mapping_node = Node(
+        package="semantic_mapping",
+        executable="semantic_mapping_node",
+        name="semantic_mapping_node",
+        output="screen",
+        parameters=[
+            params_file,
+            {
+                "use_sim_time": use_sim_time,
+                "sensor_to_camera_tf": sensor_to_camera_tf,
+                "detection_topic": detection_topic,
+                "cloud_topic": cloud_topic,
+                "odom_topic": odom_topic,
+                "viewpoint_topic": viewpoint_topic,
+                "object_type_answer_topic": object_type_answer_topic,
+                "object_type_query_topic": object_type_query_topic,
+                "target_object_instruction_topic": target_object_instruction_topic,
+                "object_nodes_topic": object_nodes_topic,
+                "obj_points_topic": obj_points_topic,
+                "obj_boxes_topic": obj_boxes_topic,
+                "obj_labels_topic": obj_labels_topic,
+                "map_frame": map_frame,
+                "target_object": target_object,
+                "save_png": save_png,
+                "output_dir": output_dir,
+            },
+        ],
+    )
+
+    return [detect_anything_node, semantic_mapping_node]
+
+
+def generate_launch_description() -> LaunchDescription:
+    pkg_share = get_package_share_directory("semantic_mapping")
+    default_params = os.path.join(pkg_share, "config", "params.yaml")
+    default_sensor_tf = os.path.join(pkg_share, "config", "sensor_to_camera_tf.yaml")
+
+    return LaunchDescription(
+        [
+            DeclareLaunchArgument("mode", default_value="sim"),
+            DeclareLaunchArgument("use_sim_time", default_value=""),
+            DeclareLaunchArgument("use_compressed", default_value=""),
+            DeclareLaunchArgument("params_file", default_value=default_params),
+            DeclareLaunchArgument("sensor_to_camera_tf", default_value=default_sensor_tf),
+            DeclareLaunchArgument("detection_topic", default_value="/detection_result"),
+            DeclareLaunchArgument("cloud_topic", default_value="/registered_scan"),
+            DeclareLaunchArgument("odom_topic", default_value=""),
+            DeclareLaunchArgument("viewpoint_topic", default_value="/viewpoint_rep_header"),
+            DeclareLaunchArgument("object_type_answer_topic", default_value="/object_type_answer"),
+            DeclareLaunchArgument("object_type_query_topic", default_value="/object_type_query"),
+            DeclareLaunchArgument("target_object_instruction_topic", default_value="/target_object_instruction"),
+            DeclareLaunchArgument("object_nodes_topic", default_value="/object_nodes"),
+            DeclareLaunchArgument("obj_points_topic", default_value="/obj_points"),
+            DeclareLaunchArgument("obj_boxes_topic", default_value="/obj_boxes"),
+            DeclareLaunchArgument("obj_labels_topic", default_value="/obj_labels"),
+            DeclareLaunchArgument("map_frame", default_value="map"),
+            DeclareLaunchArgument("target_object", default_value="refrigerator"),
+            DeclareLaunchArgument("save_png", default_value="false"),
+            DeclareLaunchArgument("output_dir", default_value="output/object_images"),
+            DeclareLaunchArgument("image_topic", default_value="/camera/image"),
+            DeclareLaunchArgument("compressed_image_topic", default_value="/camera/image/compressed"),
+            DeclareLaunchArgument("conf", default_value="0.35"),
+            DeclareLaunchArgument("max_area_ratio", default_value="0.3"),
+            OpaqueFunction(function=_launch_nodes),
+        ]
+    )
diff --git a/semantic_mapping/scripts/semantic_mapping_node b/semantic_mapping/scripts/semantic_mapping_node
old mode 100644
new mode 100755
diff --git a/semantic_mapping/semantic_mapping/map_object.py b/semantic_mapping/semantic_mapping/map_object.py
index 8140835..5875d9c 100644
--- a/semantic_mapping/semantic_mapping/map_object.py
+++ b/semantic_mapping/semantic_mapping/map_object.py
@@ -20,6 +20,7 @@
 from vector_perception_utils.pointcloud_utils import (
     compute_centroid,
     create_pointcloud,
+    remove_statistical_outliers,
 )
 
 
@@ -180,6 +181,12 @@ def update(
             # Rebuild KDTree
             self._tree = cKDTree(self.voxels)
 
+    def update_observation_angles(self, voxel_indices: np.ndarray, obs_bins: np.ndarray) -> None:
+        """Update observation angle histogram for selected voxels."""
+        if voxel_indices.size == 0:
+            return
+        self.observation_angles[voxel_indices, obs_bins] = 1
+
     def merge_from(self, other: VoxelFeatureManager) -> None:
         """Merge voxels from another VoxelFeatureManager.
 
@@ -368,6 +375,13 @@ class MapObject:
     _diversity_percentile: float = field(default=0.85, repr=False)
     _angle_threshold: float = field(default=np.deg2rad(5), repr=False)
     _distance_threshold: float = field(default=0.3, repr=False)
+    _dbscan_eps_scale: float = field(default=2.0, repr=False)
+    _dbscan_min_points_initial: int = field(default=6, repr=False)
+    _dbscan_min_points_mature: int = field(default=12, repr=False)
+    _outlier_filter: bool = field(default=True, repr=False)
+    _outlier_nb_neighbors: int = field(default=10, repr=False)
+    _outlier_std_ratio: float = field(default=2.0, repr=False)
+    _outlier_min_points: int = field(default=30, repr=False)
 
     @classmethod
     def from_observation(
@@ -376,6 +390,13 @@ def from_observation(
         obs: Observation,
         voxel_size: float = 0.03,
         num_angle_bins: int = 15,
+        dbscan_eps_scale: float = 2.0,
+        dbscan_min_points_initial: int = 6,
+        dbscan_min_points_mature: int = 12,
+        outlier_filter: bool = True,
+        outlier_nb_neighbors: int = 10,
+        outlier_std_ratio: float = 2.0,
+        outlier_min_points: int = 30,
     ) -> MapObject:
         """Create a new MapObject from an initial observation.
 
@@ -384,6 +405,13 @@ def from_observation(
             obs: Initial observation.
             voxel_size: Voxel grid cell size in meters.
             num_angle_bins: Number of angle bins for observation tracking.
+            dbscan_eps_scale: DBSCAN eps scale (multiplier on voxel size).
+            dbscan_min_points_initial: Min points for new objects.
+            dbscan_min_points_mature: Min points for mature objects.
+            outlier_filter: Whether to run statistical outlier filtering.
+            outlier_nb_neighbors: Neighbor count for outlier filtering.
+            outlier_std_ratio: Standard deviation ratio for outlier filtering.
+            outlier_min_points: Minimum points to apply outlier filtering.
 
         Returns:
             New MapObject instance.
@@ -405,6 +433,13 @@ def from_observation(
             points_count={obs.label: len(obs.points)},
             robot_poses=[{"R": obs.robot_R.copy(), "t": obs.robot_t.copy()}],
             last_update_time=obs.timestamp,
+            _dbscan_eps_scale=dbscan_eps_scale,
+            _dbscan_min_points_initial=dbscan_min_points_initial,
+            _dbscan_min_points_mature=dbscan_min_points_mature,
+            _outlier_filter=outlier_filter,
+            _outlier_nb_neighbors=outlier_nb_neighbors,
+            _outlier_std_ratio=outlier_std_ratio,
+            _outlier_min_points=outlier_min_points,
         )
         return obj
 
@@ -592,92 +627,131 @@ def _compute_geometry(self) -> None:
             pcd = create_pointcloud(valid_voxels)
             obb = pcd.get_minimal_oriented_bounding_box()
             self._centroid = np.asarray(obb.center)
-            self._bbox3d_corners = np.asarray(obb.get_box_points())
+            half_extent = 0.5 * np.asarray(obb.extent)
+            local_corners = np.array(
+                [
+                    [-half_extent[0], -half_extent[1], -half_extent[2]],
+                    [half_extent[0], -half_extent[1], -half_extent[2]],
+                    [half_extent[0], half_extent[1], -half_extent[2]],
+                    [-half_extent[0], half_extent[1], -half_extent[2]],
+                    [-half_extent[0], -half_extent[1], half_extent[2]],
+                    [half_extent[0], -half_extent[1], half_extent[2]],
+                    [half_extent[0], half_extent[1], half_extent[2]],
+                    [-half_extent[0], half_extent[1], half_extent[2]],
+                ],
+                dtype=float,
+            )
+            self._bbox3d_corners = local_corners @ np.asarray(obb.R).T + np.asarray(obb.center)
 
         self._needs_recompute = False
 
     def regularize_shape(self, percentile: float = 0.85) -> None:
-        """Apply DBSCAN clustering to filter outlier voxels.
-
-        This method clusters the voxels using DBSCAN, then selects clusters
-        by descending weight (observation angle diversity) until the target
-        percentile of total weight is reached. Clusters far from the main
-        cluster are down-weighted.
-
-        Args:
-            percentile: Target fraction of total weight to include (0-1).
-        """
+        """Apply v2 shape regularization (ported from legacy pipeline)."""
         voxels = self.voxel_manager.voxels
         if len(voxels) < 3:
             self.voxel_manager.regularized_mask = np.ones(len(voxels), dtype=bool)
             self._invalidate_cache()
             return
 
-        # DBSCAN clustering
         pcd = create_pointcloud(voxels)
-
-        eps = self.voxel_manager.voxel_size * 2.5
-        min_points = 10 if self.detection_count >= 3 else 5
-
+        eps = self.voxel_manager.voxel_size * self._dbscan_eps_scale
+        min_points = (
+            self._dbscan_min_points_mature
+            if self.detection_count >= 3
+            else self._dbscan_min_points_initial
+        )
         labels = np.array(pcd.cluster_dbscan(eps=eps, min_points=min_points))
         unique_labels = np.unique(labels)
 
-        # Collect cluster info
         cluster_masks = []
         cluster_weights = []
         cluster_centroids = []
-
         for label in unique_labels:
-            if label == -1:  # Noise
+            if label == -1:
                 continue
             mask = labels == label
+            if not np.any(mask):
+                continue
             weight = float(np.sum(self.voxel_manager.observation_angles[mask]))
-            if weight < 5:  # Skip tiny clusters
+            if weight < 5:
                 continue
             cluster_masks.append(mask)
             cluster_weights.append(weight)
             cluster_centroids.append(compute_centroid(voxels[mask]))
 
         if len(cluster_masks) == 0:
-            self.voxel_manager.regularized_mask = np.zeros(len(voxels), dtype=bool)
+            self.voxel_manager.regularized_mask = np.zeros(
+                self.voxel_manager.voxels.shape[0], dtype=bool
+            )
         elif len(cluster_masks) == 1:
             self.voxel_manager.regularized_mask = cluster_masks[0]
         else:
-            weights = np.array(cluster_weights)
-            centroids = np.array(cluster_centroids)
-
-            # Find main cluster
-            main_idx = int(np.argmax(weights))
-            main_centroid = centroids[main_idx]
-
-            # Distance-based weight adjustment
-            dists = np.linalg.norm(centroids - main_centroid, axis=1)
-            dist_base = np.max(dists) if np.max(dists) > 0 else 1.0
-
-            alpha = 3.0  # Decay rate
-            scale = np.exp(-alpha * dists / dist_base)
-            adjusted_weights = weights * scale
-
-            # Select clusters up to percentile
-            target = percentile * np.sum(adjusted_weights)
-            order = np.argsort(adjusted_weights)[::-1]
-
+            clustering_weight = np.array(cluster_weights, dtype=float, copy=False)
+            cluster_centroids = np.array(cluster_centroids, dtype=float, copy=False)
+
+            main_idx = int(np.argmax(clustering_weight))
+            main_centroid = cluster_centroids[main_idx]
+
+            dists = np.linalg.norm(cluster_centroids - main_centroid, axis=1)
+            if len(voxels) >= 4:
+                try:
+                    obb = pcd.get_minimal_oriented_bounding_box()
+                    dist_base = float(np.linalg.norm(np.asarray(obb.extent)))
+                except Exception:
+                    dist_base = float(np.max(dists))
+            else:
+                dist_base = float(np.max(dists))
+            if dist_base == 0:
+                adjusted_weight = clustering_weight.copy()
+            else:
+                alpha = 3.0
+                scale = np.exp(-alpha * dists / dist_base)
+                adjusted_weight = clustering_weight * scale
+
+            main_adjusted_idx = int(np.argmax(adjusted_weight))
             chosen = np.zeros(len(cluster_masks), dtype=bool)
-            acc = 0.0
-
-            for i in order:
-                if acc + adjusted_weights[i] <= target + 1e-8:
-                    chosen[i] = True
-                    acc += adjusted_weights[i]
-                if acc >= target:
-                    break
 
-            # Merge chosen clusters
-            valid_mask = np.zeros(len(voxels), dtype=bool)
+            if percentile is not None:
+                target = percentile * np.sum(adjusted_weight)
+                acc = adjusted_weight[main_adjusted_idx]
+                chosen[main_adjusted_idx] = True
+                order = np.argsort(adjusted_weight)[::-1]
+                for i in order:
+                    if chosen[i]:
+                        continue
+                    if acc + adjusted_weight[i] <= target + 1e-8:
+                        chosen[i] = True
+                        acc += adjusted_weight[i]
+                    if acc >= target:
+                        break
+            else:
+                chosen[:] = True
+
+            valid_cluster_mask = np.zeros(self.voxel_manager.voxels.shape[0], dtype=bool)
             for i, cmask in enumerate(cluster_masks):
                 if chosen[i]:
-                    valid_mask |= cmask
-
-            self.voxel_manager.regularized_mask = valid_mask
+                    valid_cluster_mask |= cmask
+            self.voxel_manager.regularized_mask = valid_cluster_mask
+
+        if (
+            self._outlier_filter
+            and np.any(self.voxel_manager.regularized_mask)
+            and np.sum(self.voxel_manager.regularized_mask) >= self._outlier_min_points
+        ):
+            keep_indices = np.nonzero(self.voxel_manager.regularized_mask)[0]
+            keep_points = voxels[keep_indices]
+            _, _, inlier_indices = remove_statistical_outliers(
+                keep_points,
+                nb_neighbors=self._outlier_nb_neighbors,
+                std_ratio=self._outlier_std_ratio,
+            )
+            if inlier_indices.size > 0:
+                refined_mask = np.zeros_like(self.voxel_manager.regularized_mask)
+                refined_mask[keep_indices[inlier_indices]] = True
+                self.voxel_manager.regularized_mask = refined_mask
+            else:
+                self.voxel_manager.regularized_mask = np.zeros_like(
+                    self.voxel_manager.regularized_mask
+                )
 
         self._invalidate_cache()
diff --git a/semantic_mapping/semantic_mapping/mapper.py b/semantic_mapping/semantic_mapping/mapper.py
index bcdfa90..8b1b259 100644
--- a/semantic_mapping/semantic_mapping/mapper.py
+++ b/semantic_mapping/semantic_mapping/mapper.py
@@ -7,7 +7,7 @@
 import time
 import uuid
 
-from .utils.matching import compute_iou_3d_with_ratios, find_best_match
+from .utils.matching import compute_bbox, compute_iou_3d_with_ratios, find_best_match
 from .map_object import MapObject, ObjectStatus, Observation
 
 
@@ -36,6 +36,13 @@ def __init__(
         pending_ttl_s: float = 10.0,
         max_inactive_frames: int = 20,
         num_angle_bins: int = 15,
+        dbscan_eps_scale: float = 2.0,
+        dbscan_min_points_initial: int = 6,
+        dbscan_min_points_mature: int = 12,
+        outlier_filter: bool = True,
+        outlier_nb_neighbors: int = 10,
+        outlier_std_ratio: float = 2.0,
+        outlier_min_points: int = 30,
     ) -> None:
         """Initialize the object mapper and its thresholds.
 
@@ -48,6 +55,13 @@ def __init__(
             pending_ttl_s: Time-to-live for pending objects.
             max_inactive_frames: Frames before pruning inactive objects.
             num_angle_bins: Observation angle bins for voxel manager.
+            dbscan_eps_scale: DBSCAN eps scale (multiplier on voxel size).
+            dbscan_min_points_initial: DBSCAN min points for new objects.
+            dbscan_min_points_mature: DBSCAN min points for mature objects.
+            outlier_filter: Whether to run statistical outlier filtering.
+            outlier_nb_neighbors: Neighbor count for outlier filtering.
+            outlier_std_ratio: Standard deviation ratio for outlier filtering.
+            outlier_min_points: Minimum points to apply outlier filtering.
         """
         self._pending_objects: dict[str, MapObject] = {}
         self._permanent_objects: dict[str, MapObject] = {}
@@ -62,6 +76,13 @@ def __init__(
         self._pending_ttl_s = pending_ttl_s
         self._max_inactive_frames = max_inactive_frames
         self._num_angle_bins = num_angle_bins
+        self._dbscan_eps_scale = dbscan_eps_scale
+        self._dbscan_min_points_initial = dbscan_min_points_initial
+        self._dbscan_min_points_mature = dbscan_min_points_mature
+        self._outlier_filter = outlier_filter
+        self._outlier_nb_neighbors = outlier_nb_neighbors
+        self._outlier_std_ratio = outlier_std_ratio
+        self._outlier_min_points = outlier_min_points
 
     def update(self, observations: list[Observation], target_object: str | None = None) -> UpdateStats:
         """Update the map with new observations.
@@ -103,6 +124,7 @@ def update(self, observations: list[Observation], target_object: str | None = No
             for obj in list(self._pending_objects.values()) + list(self._permanent_objects.values()):
                 if obj.object_id not in updated_ids:
                     obj.inactive_frames += 1
+                    obj.regularize_shape(percentile=obj._diversity_percentile)
 
             stats.deleted += len(self._prune_inactive(target_object, now))
             stats.merged += self.merge_objects()
@@ -134,6 +156,13 @@ def _create_object(self, obs: Observation) -> MapObject:
             obs=obs,
             voxel_size=self._voxel_size,
             num_angle_bins=self._num_angle_bins,
+            dbscan_eps_scale=self._dbscan_eps_scale,
+            dbscan_min_points_initial=self._dbscan_min_points_initial,
+            dbscan_min_points_mature=self._dbscan_min_points_mature,
+            outlier_filter=self._outlier_filter,
+            outlier_nb_neighbors=self._outlier_nb_neighbors,
+            outlier_std_ratio=self._outlier_std_ratio,
+            outlier_min_points=self._outlier_min_points,
         )
         self._track_id_map[obs.track_id] = obj.object_id
         return obj
@@ -176,18 +205,31 @@ def merge_objects(self) -> int:
                         j += 1
                         continue
 
-                    obj_corners = obj.bbox3d_corners
-                    other_corners = other.bbox3d_corners
-                    obj_extent = (
-                        obj_corners.max(axis=0) - obj_corners.min(axis=0)
-                        if obj_corners is not None and obj_corners.size > 0
-                        else None
+                    obj_points = obj.voxel_manager.get_valid_voxels(
+                        diversity_percentile=obj._diversity_percentile,
+                        use_regularized=True,
                     )
-                    other_extent = (
-                        other_corners.max(axis=0) - other_corners.min(axis=0)
-                        if other_corners is not None and other_corners.size > 0
-                        else None
+                    if len(obj_points) == 0:
+                        obj_points = obj.voxel_manager.get_valid_voxels(
+                            diversity_percentile=obj._diversity_percentile,
+                            use_regularized=False,
+                        )
+                    other_points = other.voxel_manager.get_valid_voxels(
+                        diversity_percentile=other._diversity_percentile,
+                        use_regularized=True,
                     )
+                    if len(other_points) == 0:
+                        other_points = other.voxel_manager.get_valid_voxels(
+                            diversity_percentile=other._diversity_percentile,
+                            use_regularized=False,
+                        )
+
+                    obj_bbox = compute_bbox(obj_points) if len(obj_points) >= 3 else None
+                    other_bbox = compute_bbox(other_points) if len(other_points) >= 3 else None
+                    obj_corners = obj_bbox[0] if obj_bbox is not None else None
+                    other_corners = other_bbox[0] if other_bbox is not None else None
+                    obj_extent = obj_bbox[1] if obj_bbox is not None else None
+                    other_extent = other_bbox[1] if other_bbox is not None else None
 
                     dist_merge = dist < 0.25
                     if obj_extent is not None and other_extent is not None:
diff --git a/semantic_mapping/semantic_mapping/mapping_node.py b/semantic_mapping/semantic_mapping/mapping_node.py
index c33bcc0..0a4cf37 100644
--- a/semantic_mapping/semantic_mapping/mapping_node.py
+++ b/semantic_mapping/semantic_mapping/mapping_node.py
@@ -9,6 +9,7 @@
 import numpy as np
 import yaml
 from cv_bridge import CvBridge
+import cv2
 import rclpy
 from rclpy.node import Node
 from rclpy.qos import QoSProfile, ReliabilityPolicy
@@ -27,6 +28,7 @@
     segment_pointclouds_from_projection,
     transform_segmented_clouds_to_world,
 )
+from vector_perception_utils.transform_utils import discretize_angles
 
 from .map_object import Observation
 from .utils.map_object_utils import (
@@ -77,6 +79,9 @@ def __init__(self) -> None:
         self.declare_parameter("pending_ttl_s", 10.0)
         self.declare_parameter("max_inactive_frames", 20)
         self.declare_parameter("num_angle_bins", 15)
+        self.declare_parameter("dbscan_eps_scale", 2.0)
+        self.declare_parameter("dbscan_min_points_initial", 6)
+        self.declare_parameter("dbscan_min_points_mature", 12)
 
         self.declare_parameter("detection_linear_state_time_bias", 0.0)
         self.declare_parameter("detection_angular_state_time_bias", 0.0)
@@ -91,6 +96,17 @@ def __init__(self) -> None:
         self.declare_parameter("depth_filter_margin", 0.15)
         self.declare_parameter("depth_jump_threshold", 0.3)
         self.declare_parameter("sensor_to_camera_tf", "config/sensor_to_camera_tf.yaml")
+        self.declare_parameter("mask_erode_kernel", 3)
+        self.declare_parameter("mask_erode_iterations", 2)
+        self.declare_parameter("max_cloud_distance", 8.0)
+        self.declare_parameter("outlier_filter", True)
+        self.declare_parameter("outlier_nb_neighbors", 10)
+        self.declare_parameter("outlier_std_ratio", 2.0)
+        self.declare_parameter("outlier_min_points", 30)
+        self.declare_parameter("visualization_max_objects", 50)
+        self.declare_parameter("ground_filter", True)
+        self.declare_parameter("ground_radius", 2.0)
+        self.declare_parameter("ground_z_clearance", 0.0)
 
         self.declare_parameter("target_object", "")
         self.declare_parameter("save_png", False)
@@ -125,6 +141,25 @@ def __init__(self) -> None:
         self._sensor_to_camera = self._load_sensor_to_camera(
             str(self.get_parameter("sensor_to_camera_tf").value)
         )
+        self._mask_erode_kernel_size = max(1, int(self.get_parameter("mask_erode_kernel").value))
+        self._mask_erode_iterations = max(0, int(self.get_parameter("mask_erode_iterations").value))
+        self._mask_erode_kernel = cv2.getStructuringElement(
+            cv2.MORPH_RECT,
+            (self._mask_erode_kernel_size, self._mask_erode_kernel_size),
+        )
+        self._max_cloud_distance = float(self.get_parameter("max_cloud_distance").value)
+        self._outlier_filter = bool(self.get_parameter("outlier_filter").value)
+        self._outlier_nb_neighbors = int(self.get_parameter("outlier_nb_neighbors").value)
+        self._outlier_std_ratio = float(self.get_parameter("outlier_std_ratio").value)
+        self._outlier_min_points = int(self.get_parameter("outlier_min_points").value)
+        self._visualization_max_objects = int(
+            self.get_parameter("visualization_max_objects").value
+        )
+        self._ground_filter = bool(self.get_parameter("ground_filter").value)
+        self._ground_radius = float(self.get_parameter("ground_radius").value)
+        self._ground_z_clearance = float(self.get_parameter("ground_z_clearance").value)
+        self._ground_percentile = 0.05
+        self._ground_min_points = 80
 
         self._target_object = str(self.get_parameter("target_object").value)
         self._save_png = bool(self.get_parameter("save_png").value)
@@ -139,6 +174,13 @@ def __init__(self) -> None:
             pending_ttl_s=float(self.get_parameter("pending_ttl_s").value),
             max_inactive_frames=int(self.get_parameter("max_inactive_frames").value),
             num_angle_bins=int(self.get_parameter("num_angle_bins").value),
+            dbscan_eps_scale=float(self.get_parameter("dbscan_eps_scale").value),
+            dbscan_min_points_initial=int(self.get_parameter("dbscan_min_points_initial").value),
+            dbscan_min_points_mature=int(self.get_parameter("dbscan_min_points_mature").value),
+            outlier_filter=self._outlier_filter,
+            outlier_nb_neighbors=self._outlier_nb_neighbors,
+            outlier_std_ratio=self._outlier_std_ratio,
+            outlier_min_points=self._outlier_min_points,
         )
 
         self._image_saver = ImageSaveQueue(
@@ -297,6 +339,7 @@ def _mapping_callback(self) -> None:
                 return
 
             self._mapper.update(observations, target_object=self._target_object)
+            self._update_observation_angles(observations)
             self._update_best_images(observations, image)
             self._publish_map(detection_stamp, viewpoint_id)
             self._publish_object_type_queries()
@@ -398,6 +441,31 @@ def _collect_cloud_window(self, detection_stamp: float) -> np.ndarray | None:
             return None
         return np.concatenate(clouds, axis=0)
 
+    def _compute_ground_z_threshold(
+        self,
+        points: np.ndarray,
+        robot_xy: np.ndarray,
+    ) -> float | None:
+        if not self._ground_filter:
+            return None
+        if points.shape[0] < self._ground_min_points:
+            return None
+        if self._ground_radius <= 0.0:
+            return None
+        if not 0.0 < self._ground_percentile < 1.0:
+            return None
+
+        diffs = points[:, :2] - robot_xy
+        dist2 = diffs[:, 0] ** 2 + diffs[:, 1] ** 2
+        radius2 = self._ground_radius ** 2
+        mask = dist2 <= radius2
+        if np.count_nonzero(mask) < self._ground_min_points:
+            return None
+
+        z_vals = points[mask, 2]
+        z_thresh = float(np.percentile(z_vals, self._ground_percentile * 100.0))
+        return z_thresh + self._ground_z_clearance
+
     def _build_observations(
         self,
         detection_msg: DetectionResult,
@@ -442,8 +510,17 @@ def _build_observations(
             full_mask = np.zeros(image_shape, dtype=bool)
             if i < len(detection_msg.masks):
                 mask_msg: CompressedImage = detection_msg.masks[i]
-                mask_crop = self._bridge.compressed_imgmsg_to_cv2(mask_msg, desired_encoding="mono8")
+                mask_crop = self._bridge.compressed_imgmsg_to_cv2(mask_msg)
+                if mask_crop.ndim == 3:
+                    mask_crop = cv2.cvtColor(mask_crop, cv2.COLOR_BGR2GRAY)
                 mask_bool = mask_crop.astype(bool)
+                if self._mask_erode_iterations > 0:
+                    mask_eroded = cv2.erode(
+                        mask_bool.astype(np.uint8),
+                        self._mask_erode_kernel,
+                        iterations=self._mask_erode_iterations,
+                    )
+                    mask_bool = mask_eroded.astype(bool)
                 crop_h = min(mask_bool.shape[0], y2 - y1)
                 crop_w = min(mask_bool.shape[1], x2 - x1)
                 full_mask[y1 : y1 + crop_h, x1 : x1 + crop_w] = mask_bool[:crop_h, :crop_w]
@@ -485,9 +562,16 @@ def _build_observations(
             t_b2w=t_b2w,
             depth_jump_threshold=self._depth_jump_threshold,
         )
+        cloud_world = cloud_body @ R_b2w.T + t_b2w
+        ground_z_threshold = self._compute_ground_z_threshold(cloud_world, t_b2w[:2])
 
         observations = []
         for idx, world_cloud in enumerate(world_clouds):
+            if self._max_cloud_distance > 0:
+                dist = np.linalg.norm(world_cloud - t_b2w, axis=1)
+                world_cloud = world_cloud[dist < self._max_cloud_distance]
+            if ground_z_threshold is not None:
+                world_cloud = world_cloud[world_cloud[:, 2] > ground_z_threshold]
             if world_cloud.shape[0] == 0:
                 continue
             observations.append(
@@ -506,6 +590,62 @@ def _build_observations(
 
         return observations
 
+    def _update_observation_angles(self, observations: list[Observation]) -> None:
+        """Reproject object voxels onto the current mask to update angle votes."""
+        if len(observations) == 0:
+            return
+
+        objects = self._mapper.get_objects(include_pending=True)
+        track_to_obj = {}
+        for obj in objects:
+            for track_id in obj.track_ids:
+                track_to_obj[track_id] = obj
+
+        for obs in observations:
+            obj = track_to_obj.get(obs.track_id)
+            if obj is None or obs.mask is None:
+                continue
+            voxels = obj.voxel_manager.voxels
+            if voxels.shape[0] == 0:
+                continue
+
+            R_b2w = obs.robot_R
+            t_b2w = obs.robot_t
+            R_w2b = R_b2w.T
+            t_w2b = -R_w2b @ t_b2w
+            voxels_body = voxels @ R_w2b.T + t_w2b
+            _, pixel_idx = project_pointcloud_to_image(
+                points=voxels_body,
+                projection=self._projection,
+                image_width=self._image_width,
+                image_height=self._image_height,
+                axis_mode=self._axis_mode,
+                intrinsics=None,
+                depth_filter=self._depth_filter,
+                depth_filter_margin=self._depth_filter_margin,
+                transform=self._sensor_to_camera,
+            )
+            pix = pixel_idx.astype(int)
+            valid = (
+                (pix[:, 0] >= 0)
+                & (pix[:, 0] < obs.mask.shape[1])
+                & (pix[:, 1] >= 0)
+                & (pix[:, 1] < obs.mask.shape[0])
+            )
+            if not np.any(valid):
+                continue
+            pix = pix[valid]
+            voxel_indices = np.nonzero(valid)[0]
+            on_mask = obs.mask[pix[:, 1], pix[:, 0]].astype(bool)
+            if not np.any(on_mask):
+                continue
+            voxel_indices = voxel_indices[on_mask]
+            vectors = voxels[voxel_indices] - t_b2w
+            obs_angles = np.arctan2(vectors[:, 1], vectors[:, 0])
+            obs_bins = discretize_angles(obs_angles, obj.voxel_manager.num_angle_bins)
+            obj.voxel_manager.update_observation_angles(voxel_indices, obs_bins)
+            obj._invalidate_cache()
+
     def _update_best_images(self, observations: list[Observation], image: np.ndarray) -> None:
         """Update best image crops for objects based on mask area.
 
@@ -595,16 +735,22 @@ def _publish_map(self, stamp: float, viewpoint_id: int) -> None:
             if msg is not None:
                 self._object_nodes_pub.publish(msg)
 
-        bbox_markers = create_bbox_markers(objects, header)
-        label_markers = create_label_markers(objects, header)
+        viz_objects = objects
+        if self._visualization_max_objects > 0 and len(objects) > self._visualization_max_objects:
+            viz_objects = sorted(
+                objects, key=lambda obj: obj.last_update_time, reverse=True
+            )[: self._visualization_max_objects]
+
+        bbox_markers = create_bbox_markers(viz_objects, header)
+        label_markers = create_label_markers(viz_objects, header)
         self._obj_boxes_pub.publish(bbox_markers)
         self._obj_labels_pub.publish(label_markers)
 
-        obj_cloud = create_object_pointcloud(objects, header)
+        obj_cloud = create_object_pointcloud(viz_objects, header)
         if obj_cloud is not None:
             self._obj_points_pub.publish(obj_cloud)
 
-        current_ids = {tid for obj in objects for tid in obj.track_ids}
+        current_ids = {int(obj.track_ids[0]) if obj.track_ids else 0 for obj in viz_objects}
         deleted_ids = list(self._published_ids - current_ids)
         if len(deleted_ids) > 0:
             delete_markers = create_delete_markers(deleted_ids, header, namespace="bbox")
@@ -648,21 +794,46 @@ def _load_sensor_to_camera(self, path: str) -> np.ndarray | None:
         tf_data = data.get("sensor_to_camera", {})
         trans = tf_data.get("translation", {})
         rpy = tf_data.get("rotation_rpy", {})
-        translation = np.array(
+        cam_offset = np.array(
             [
                 float(trans.get("x", 0.0)),
                 float(trans.get("y", 0.0)),
                 float(trans.get("z", 0.0)),
             ]
         )
-        rotation = Rotation.from_euler(
-            "xyz",
+        roll = float(rpy.get("roll", 0.0))
+        pitch = float(rpy.get("pitch", 0.0))
+        yaw = float(rpy.get("yaw", 0.0))
+        cos_roll = np.cos(roll)
+        sin_roll = np.sin(roll)
+        cos_pitch = np.cos(pitch)
+        sin_pitch = np.sin(pitch)
+        cos_yaw = np.cos(yaw)
+        sin_yaw = np.sin(yaw)
+        r_z = np.array(
             [
-                float(rpy.get("roll", 0.0)),
-                float(rpy.get("pitch", 0.0)),
-                float(rpy.get("yaw", 0.0)),
-            ],
-        ).as_matrix()
+                [cos_yaw, -sin_yaw, 0.0],
+                [sin_yaw, cos_yaw, 0.0],
+                [0.0, 0.0, 1.0],
+            ]
+        )
+        r_y = np.array(
+            [
+                [cos_pitch, 0.0, sin_pitch],
+                [0.0, 1.0, 0.0],
+                [-sin_pitch, 0.0, cos_pitch],
+            ]
+        )
+        r_x = np.array(
+            [
+                [1.0, 0.0, 0.0],
+                [0.0, cos_roll, -sin_roll],
+                [0.0, sin_roll, cos_roll],
+            ]
+        )
+        cam_rotation = r_z @ r_y @ r_x
+        rotation = cam_rotation.T
+        translation = -rotation @ cam_offset
         transform = np.eye(4)
         transform[:3, :3] = rotation
         transform[:3, 3] = translation
diff --git a/semantic_mapping/semantic_mapping/utils/map_object_utils.py b/semantic_mapping/semantic_mapping/utils/map_object_utils.py
index b098c8c..5ad4163 100644
--- a/semantic_mapping/semantic_mapping/utils/map_object_utils.py
+++ b/semantic_mapping/semantic_mapping/utils/map_object_utils.py
@@ -17,6 +17,39 @@
 from ..map_object import MapObject
 
 
+def _axis_aligned_corners(points: np.ndarray) -> Optional[np.ndarray]:
+    if points.size == 0:
+        return None
+    min_bound = np.min(points, axis=0)
+    max_bound = np.max(points, axis=0)
+    return np.array(
+        [
+            [min_bound[0], min_bound[1], min_bound[2]],
+            [max_bound[0], min_bound[1], min_bound[2]],
+            [max_bound[0], max_bound[1], min_bound[2]],
+            [min_bound[0], max_bound[1], min_bound[2]],
+            [min_bound[0], min_bound[1], max_bound[2]],
+            [max_bound[0], min_bound[1], max_bound[2]],
+            [max_bound[0], max_bound[1], max_bound[2]],
+            [min_bound[0], max_bound[1], max_bound[2]],
+        ],
+        dtype=float,
+    )
+
+
+def get_object_seed(obj: MapObject) -> int:
+    """Generate a deterministic seed from a MapObject for color generation.
+    
+    Args:
+        obj: MapObject instance.
+        
+    Returns:
+        CRC32 hash of the object's identifier.
+    """
+    seed_key = obj.object_id or (str(obj.track_ids[0]) if obj.track_ids else obj.label)
+    return zlib.crc32(seed_key.encode("utf-8"))
+
+
 def map_object_to_object_node(
     obj: MapObject,
     header: Header,
@@ -66,7 +99,7 @@ def map_object_to_object_node(
             use_regularized=False,
         )
     if len(voxels) > 0:
-        seed = zlib.crc32(obj.label.encode("utf-8"))
+        seed = get_object_seed(obj)
         color = np.array(
             [
                 (seed & 0xFF) / 255.0,
@@ -105,7 +138,17 @@ def create_bbox_markers(
     ]
 
     for obj in objects:
-        if obj.bbox3d_corners is None:
+        voxels = obj.voxel_manager.get_valid_voxels(
+            diversity_percentile=obj._diversity_percentile,
+            use_regularized=True,
+        )
+        if len(voxels) == 0:
+            voxels = obj.voxel_manager.get_valid_voxels(
+                diversity_percentile=obj._diversity_percentile,
+                use_regularized=False,
+            )
+        corners = _axis_aligned_corners(voxels) if len(voxels) > 0 else None
+        if corners is None:
             continue
         marker = Marker()
         marker.header = header
@@ -115,13 +158,12 @@ def create_bbox_markers(
         marker.action = Marker.ADD
         marker.scale.x = 0.02
 
-        seed = zlib.crc32(obj.label.encode("utf-8"))
+        seed = get_object_seed(obj)
         marker.color.r = (seed & 0xFF) / 255.0
         marker.color.g = ((seed >> 8) & 0xFF) / 255.0
         marker.color.b = ((seed >> 16) & 0xFF) / 255.0
         marker.color.a = 1.0
 
-        corners = obj.bbox3d_corners
         for i, j in edges:
             marker.points.append(Point(x=float(corners[i, 0]), y=float(corners[i, 1]), z=float(corners[i, 2])))
             marker.points.append(Point(x=float(corners[j, 0]), y=float(corners[j, 1]), z=float(corners[j, 2])))
@@ -155,7 +197,7 @@ def create_label_markers(
         marker.pose.position.y = float(obj.centroid[1])
         marker.pose.position.z = float(obj.centroid[2]) + 0.3
 
-        seed = zlib.crc32(obj.label.encode("utf-8"))
+        seed = get_object_seed(obj)
         marker.color.r = (seed & 0xFF) / 255.0
         marker.color.g = ((seed >> 8) & 0xFF) / 255.0
         marker.color.b = ((seed >> 16) & 0xFF) / 255.0
@@ -184,7 +226,7 @@ def create_object_pointcloud(objects: list[MapObject], header: Header) -> Option
         if len(voxels) == 0:
             continue
 
-        seed = zlib.crc32(obj.label.encode("utf-8"))
+        seed = get_object_seed(obj)
         color = np.array(
             [
                 (seed & 0xFF) / 255.0,
diff --git a/semantic_mapping/semantic_mapping/utils/matching.py b/semantic_mapping/semantic_mapping/utils/matching.py
index fcfb7cd..e5aaa19 100644
--- a/semantic_mapping/semantic_mapping/utils/matching.py
+++ b/semantic_mapping/semantic_mapping/utils/matching.py
@@ -4,50 +4,14 @@
 
 from typing import Optional
 
+import cv2
 import numpy as np
 
-from vector_perception_utils.pointcloud_utils import compute_centroid, get_oriented_bbox
+from vector_perception_utils.pointcloud_utils import compute_centroid
 
 from ..map_object import MapObject, Observation
 
 
-def _convex_hull_2d(points: np.ndarray) -> np.ndarray:
-    """Compute 2D convex hull in CCW order (Monotonic chain).
-
-    Args:
-        points: Input points (N, 2).
-
-    Returns:
-        Hull vertices in CCW order (M, 2).
-    """
-    if points.shape[0] < 3:
-        return points
-
-    pts = np.unique(points, axis=0)
-    if pts.shape[0] < 3:
-        return pts
-
-    pts = pts[np.lexsort((pts[:, 1], pts[:, 0]))]
-
-    def cross(o: np.ndarray, a: np.ndarray, b: np.ndarray) -> float:
-        return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])
-
-    lower: list[np.ndarray] = []
-    for p in pts:
-        while len(lower) >= 2 and cross(lower[-2], lower[-1], p) <= 0:
-            lower.pop()
-        lower.append(p)
-
-    upper: list[np.ndarray] = []
-    for p in reversed(pts):
-        while len(upper) >= 2 and cross(upper[-2], upper[-1], p) <= 0:
-            upper.pop()
-        upper.append(p)
-
-    hull = np.array(lower[:-1] + upper[:-1])
-    return hull
-
-
 def _polygon_area(poly: np.ndarray) -> float:
     """Compute polygon area using the shoelace formula.
 
@@ -64,6 +28,48 @@ def _polygon_area(poly: np.ndarray) -> float:
     return 0.5 * abs(float(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1))))
 
 
+def _ensure_ccw(poly: np.ndarray) -> np.ndarray:
+    """Ensure polygon vertices are ordered counter-clockwise."""
+    if poly.shape[0] < 3:
+        return poly
+    x = poly[:, 0]
+    y = poly[:, 1]
+    signed = float(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))
+    if signed < 0:
+        return poly[::-1]
+    return poly
+
+
+def compute_bbox(points: np.ndarray) -> Optional[tuple[np.ndarray, np.ndarray]]:
+    """Compute yaw-only oriented bbox corners + extents."""
+    if points.shape[0] < 3:
+        return None
+
+    xy = points[:, :2].astype(np.float32)
+    unique_xy = np.unique(xy, axis=0)
+    if unique_xy.shape[0] < 3:
+        return None
+
+    rect = cv2.minAreaRect(unique_xy)
+    (width, height) = rect[1]
+    if width == 0.0 or height == 0.0:
+        return None
+
+    box = cv2.boxPoints(rect).astype(np.float64)
+    rect_xy = _ensure_ccw(box)
+
+    zmin = float(np.min(points[:, 2]))
+    zmax = float(np.max(points[:, 2]))
+    extent = np.array([width, height, zmax - zmin], dtype=float)
+    z_center = zmax - extent[2] / 2.0
+    half_z = extent[2] / 2.0
+
+    bottom = np.hstack([rect_xy, np.full((4, 1), z_center - half_z)])
+    top = np.hstack([rect_xy, np.full((4, 1), z_center + half_z)])
+    corners = np.vstack([bottom, top])
+    return corners, extent
+
+
 def _line_intersection(p1: np.ndarray, p2: np.ndarray, cp1: np.ndarray, cp2: np.ndarray) -> np.ndarray:
     """Intersection of line p1->p2 with line cp1->cp2.
 
@@ -113,7 +119,7 @@ def inside(p: np.ndarray, cp1: np.ndarray, cp2: np.ndarray) -> bool:
         input_list = output
         output = []
         if len(input_list) == 0:
-            break
+            return np.empty((0, 2), dtype=np.float64)
         s = input_list[-1]
         for e in input_list:
             if inside(e, cp1, cp2):
@@ -124,13 +130,13 @@ def inside(p: np.ndarray, cp1: np.ndarray, cp2: np.ndarray) -> bool:
                 output.append(_line_intersection(s, e, cp1, cp2))
             s = e
         output = np.array(output, dtype=np.float64)
-    return output
+    return np.array(output, dtype=np.float64)
 
 
 def compute_iou_3d_with_ratios(
     bbox1: np.ndarray, bbox2: np.ndarray
 ) -> tuple[float, float, float]:
-    """Compute IoU and intersection ratios using 2D polygon overlap + Z overlap.
+    """Compute IoU and intersection ratios using yaw-only bbox overlap.
 
     Args:
         bbox1: Corner points for bbox1 (N, 3).
@@ -142,12 +148,12 @@ def compute_iou_3d_with_ratios(
     if bbox1.shape[0] < 4 or bbox2.shape[0] < 4:
         return 0.0, 0.0, 0.0
 
-    hull1 = _convex_hull_2d(bbox1[:, :2])
-    hull2 = _convex_hull_2d(bbox2[:, :2])
-    if hull1.shape[0] < 3 or hull2.shape[0] < 3:
+    rect1 = _ensure_ccw(bbox1[:4, :2])
+    rect2 = _ensure_ccw(bbox2[:4, :2])
+    if rect1.shape[0] < 3 or rect2.shape[0] < 3:
         return 0.0, 0.0, 0.0
 
-    poly_inter = _polygon_clip(hull1, hull2)
+    poly_inter = _polygon_clip(rect1, rect2)
     inter_area = _polygon_area(poly_inter) if poly_inter.shape[0] >= 3 else 0.0
 
     zmin1, zmax1 = float(np.min(bbox1[:, 2])), float(np.max(bbox1[:, 2]))
@@ -155,8 +161,8 @@ def compute_iou_3d_with_ratios(
     height = max(0.0, min(zmax1, zmax2) - max(zmin1, zmin2))
     inter_vol = inter_area * height
 
-    area1 = _polygon_area(hull1)
-    area2 = _polygon_area(hull2)
+    area1 = _polygon_area(rect1)
+    area2 = _polygon_area(rect2)
     vol1 = area1 * (zmax1 - zmin1)
     vol2 = area2 * (zmax2 - zmin2)
     denom = vol1 + vol2 - inter_vol
@@ -192,13 +198,9 @@ def find_best_match(
         return None
 
     obs_center = compute_centroid(obs.points)
-    obs_bbox = get_oriented_bbox(obs.points)
-    obs_corners = obs_bbox[1] if obs_bbox is not None else None
-    obs_extent = (
-        np.max(obs_corners, axis=0) - np.min(obs_corners, axis=0)
-        if obs_corners is not None and obs_corners.size > 0
-        else None
-    )
+    obs_bbox = compute_bbox(obs.points)
+    obs_corners = obs_bbox[0] if obs_bbox is not None else None
+    obs_extent = obs_bbox[1] if obs_bbox is not None else None
 
     best_candidate: Optional[MapObject] = None
     best_dist = float("inf")
@@ -215,12 +217,18 @@ def find_best_match(
         if dist > distance_threshold:
             continue
 
-        cand_corners = candidate.bbox3d_corners
-        cand_extent = (
-            np.max(cand_corners, axis=0) - np.min(cand_corners, axis=0)
-            if cand_corners is not None and cand_corners.size > 0
-            else None
+        cand_points = candidate.voxel_manager.get_valid_voxels(
+            diversity_percentile=candidate._diversity_percentile,
+            use_regularized=True,
         )
+        if len(cand_points) == 0:
+            cand_points = candidate.voxel_manager.get_valid_voxels(
+                diversity_percentile=candidate._diversity_percentile,
+                use_regularized=False,
+            )
+        cand_bbox = compute_bbox(cand_points) if len(cand_points) >= 3 else None
+        cand_corners = cand_bbox[0] if cand_bbox is not None else None
+        cand_extent = cand_bbox[1] if cand_bbox is not None else None
 
         dist_merge = dist < min_distance_merge
         if obs_extent is not None and cand_extent is not None:
diff --git a/semantic_mapping/semantic_mapping/utils/storage.py b/semantic_mapping/semantic_mapping/utils/storage.py
index 6699373..d2be3cc 100644
--- a/semantic_mapping/semantic_mapping/utils/storage.py
+++ b/semantic_mapping/semantic_mapping/utils/storage.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from pathlib import Path
+import shutil
 import queue
 import threading
 import time
@@ -23,6 +24,8 @@ def __init__(self, output_dir: Path, max_queue_size: int = 100, save_png: bool =
             save_png: If True, also save a .png copy of the image.
         """
         self._output_dir = Path(output_dir)
+        if self._output_dir.exists() and any(self._output_dir.iterdir()):
+            shutil.rmtree(self._output_dir)
         self._output_dir.mkdir(parents=True, exist_ok=True)
         self._save_png = save_png
         self._queue: queue.Queue = queue.Queue(maxsize=max_queue_size)
@@ -81,11 +84,7 @@ def _run(self) -> None:
                         np.save(mask_path, mask)
                     if self._save_png:
                         png_path = Path(image_path).with_suffix(".png")
-                        if image.ndim == 3 and image.shape[2] == 3:
-                            bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-                            cv2.imwrite(str(png_path), bgr)
-                        else:
-                            cv2.imwrite(str(png_path), image)
+                        cv2.imwrite(str(png_path), image)
                 except Exception:
                     pass
             self._queue.task_done()
diff --git a/vector_perception_utils/vector_perception_utils/pointcloud_utils.py b/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
index c27a23b..e86678d 100644
--- a/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
+++ b/vector_perception_utils/vector_perception_utils/pointcloud_utils.py
@@ -148,6 +148,75 @@ def remove_radius_outliers(
     return points_filtered, colors_filtered, inlier_indices
 
 
+def fit_plane(
+    points: np.ndarray,
+    distance_threshold: float = 0.05,
+    ransac_n: int = 3,
+    num_iterations: int = 50,
+    max_points: int = 8000,
+    min_inlier_ratio: float = 0.0,
+    min_normal_z: float = 0.0,
+    min_above_ratio: float = 0.0,
+    prefer_up: bool = True,
+) -> Optional[np.ndarray]:
+    """Fit a plane to a point cloud using RANSAC.
+
+    Args:
+        points: Point coordinates (N, 3).
+        distance_threshold: Max distance for inliers during RANSAC.
+        ransac_n: Number of points to sample per RANSAC iteration.
+        num_iterations: RANSAC iterations.
+        max_points: Max points to sample for fitting (0 keeps all).
+        min_inlier_ratio: Minimum inlier ratio to accept the plane.
+        min_normal_z: Minimum absolute Z component of the plane normal.
+        min_above_ratio: Minimum ratio of points above the plane.
+        prefer_up: If True, flip normal to have positive Z.
+
+    Returns:
+        Plane coefficients [a, b, c, d] for ax + by + cz + d = 0, or None.
+    """
+    if points.shape[0] < 3:
+        return None
+
+    sample_points = points
+    if max_points > 0 and points.shape[0] > max_points:
+        indices = np.random.choice(points.shape[0], max_points, replace=False)
+        sample_points = points[indices]
+
+    pcd = create_pointcloud(sample_points)
+    plane_model, inliers = pcd.segment_plane(
+        distance_threshold=distance_threshold,
+        ransac_n=ransac_n,
+        num_iterations=num_iterations,
+    )
+    if len(inliers) == 0:
+        return None
+    if min_inlier_ratio > 0.0 and len(inliers) / sample_points.shape[0] < min_inlier_ratio:
+        return None
+
+    normal = np.array(plane_model[:3], dtype=float)
+    norm = float(np.linalg.norm(normal))
+    if norm == 0.0:
+        return None
+    normal /= norm
+    d = float(plane_model[3]) / norm
+
+    if min_normal_z > 0.0 and abs(normal[2]) < min_normal_z:
+        return None
+    if prefer_up and normal[2] < 0:
+        normal = -normal
+        d = -d
+
+    if min_above_ratio > 0.0:
+        check_points = sample_points
+        distances = check_points @ normal + d
+        above_ratio = float(np.mean(distances > 0.0))
+        if above_ratio < min_above_ratio:
+            return None
+
+    return np.array([normal[0], normal[1], normal[2], d], dtype=float)
+
+
 def transform_pointcloud(
     points: np.ndarray,
     transform: np.ndarray
@@ -208,14 +277,66 @@ def compute_centroid(points: np.ndarray) -> np.ndarray:
     """
     return np.mean(points, axis=0)
 
+def get_bbox_center(bbox) -> np.ndarray:
+    """Return bounding box center for Open3D AABB/OBB variants."""
+    if hasattr(bbox, "center"):
+        return np.asarray(bbox.center)
+    if hasattr(bbox, "get_center"):
+        return np.asarray(bbox.get_center())
+    if hasattr(bbox, "get_min_bound") and hasattr(bbox, "get_max_bound"):
+        min_bound = np.asarray(bbox.get_min_bound())
+        max_bound = np.asarray(bbox.get_max_bound())
+        return (min_bound + max_bound) * 0.5
+    return np.zeros(3, dtype=float)
+
+
+def get_bbox_extent(bbox) -> np.ndarray:
+    """Return bounding box extent for Open3D AABB/OBB variants."""
+    if hasattr(bbox, "extent"):
+        return np.asarray(bbox.extent)
+    if hasattr(bbox, "get_extent"):
+        return np.asarray(bbox.get_extent())
+    if hasattr(bbox, "get_min_bound") and hasattr(bbox, "get_max_bound"):
+        min_bound = np.asarray(bbox.get_min_bound())
+        max_bound = np.asarray(bbox.get_max_bound())
+        return max_bound - min_bound
+    return np.zeros(3, dtype=float)
+
+
+def get_bbox_corners(bbox) -> np.ndarray:
+    """Return 8 bbox corners for Open3D AABB/OBB variants."""
+    if hasattr(bbox, "get_box_points"):
+        return np.asarray(bbox.get_box_points())
+    if hasattr(bbox, "get_min_bound") and hasattr(bbox, "get_max_bound"):
+        min_bound = np.asarray(bbox.get_min_bound())
+        max_bound = np.asarray(bbox.get_max_bound())
+        return np.array(
+            [
+                [min_bound[0], min_bound[1], min_bound[2]],
+                [max_bound[0], min_bound[1], min_bound[2]],
+                [max_bound[0], max_bound[1], min_bound[2]],
+                [min_bound[0], max_bound[1], min_bound[2]],
+                [min_bound[0], min_bound[1], max_bound[2]],
+                [max_bound[0], min_bound[1], max_bound[2]],
+                [max_bound[0], max_bound[1], max_bound[2]],
+                [min_bound[0], max_bound[1], max_bound[2]],
+            ],
+            dtype=float,
+        )
+    return np.zeros((0, 3), dtype=float)
+
+
 def get_oriented_bbox(
     points: np.ndarray,
+    axis_aligned: bool = True,
 ) -> Optional[Tuple[np.ndarray, np.ndarray]]:
     """
-    Compute minimal oriented bounding box from point cloud.
+    Compute bounding box from point cloud.
 
     Args:
         points: Point coordinates (N, 3)
+        axis_aligned: If True, compute axis-aligned bounding box.
+            If False, compute minimal oriented bounding box.
 
     Returns:
         Tuple of (center, corners) where center is (3,) and corners is (8, 3),
@@ -227,12 +348,15 @@ def get_oriented_bbox(
     pcd = create_pointcloud(points)
 
     try:
-        obb = pcd.get_minimal_oriented_bounding_box()
+        if axis_aligned:
+            bbox = pcd.get_axis_aligned_bounding_box()
+        else:
+            bbox = pcd.get_minimal_oriented_bounding_box()
     except Exception:
         return None
 
-    center = np.asarray(obb.center)
-    corners = np.asarray(obb.get_box_points())
+    center = get_bbox_center(bbox)
+    corners = get_bbox_corners(bbox)
     return center, corners
 
 
@@ -257,9 +381,12 @@ def pointcloud_to_bbox3d(points: np.ndarray, header=None):
         obb = pcd.get_minimal_oriented_bounding_box()
     except:
         return None
-    center = obb.center
-    extent = obb.extent
-    quat = Rotation.from_matrix(obb.R).as_quat()
+    center = get_bbox_center(obb)
+    extent = get_bbox_extent(obb)
+    if hasattr(obb, "R"):
+        quat = Rotation.from_matrix(obb.R).as_quat()
+    else:
+        quat = np.array([0.0, 0.0, 0.0, 1.0], dtype=float)
 
     bbox = BoundingBox3D()
     bbox.center.position = Point(

From 8c4b248be3f17f2b7ed3b86b49a28f61e7f7bcac Mon Sep 17 00:00:00 2001
From: alexlin2 <alex.lin416@outlook.com>
Date: Sun, 25 Jan 2026 19:00:04 -0500
Subject: [PATCH 6/6] Changed DBSCAN to voxelmap connected graph for more
 comutationally efficient dedubplication and pointcloud filtering

---
 semantic_mapping/README.md                    |  13 ++
 semantic_mapping/config/params.yaml           |  25 +--
 .../docs/SEMANTIC_MAPPING_MIGRATION.md        |  10 +-
 .../semantic_mapping/map_object.py            | 208 +++++++-----------
 semantic_mapping/semantic_mapping/mapper.py   |  62 ++----
 .../semantic_mapping/mapping_node.py          |  46 ++--
 6 files changed, 152 insertions(+), 212 deletions(-)

diff --git a/semantic_mapping/README.md b/semantic_mapping/README.md
index d7f6c88..62b78e5 100644
--- a/semantic_mapping/README.md
+++ b/semantic_mapping/README.md
@@ -108,6 +108,11 @@ ros2 launch semantic_mapping semantic_mapping.launch.py
 - `config/sensor_to_camera_tf.yaml`: Static sensor→camera transform for projection.
 - `config/objects.yaml`: Object taxonomy for detection labels.
 
+Key runtime params:
+- `regularize_voxel_size`: Connectivity voxel size used in shape regularization.
+- `ground_filter`, `ground_radius`, `ground_z_clearance`: Simple ground clamp around robot.
+- `visualization_max_objects`: Limits RViz markers/pointcloud to most recent objects.
+
 ## Topics
 
 ### Subscriptions
@@ -137,3 +142,11 @@ ros2 launch semantic_mapping semantic_mapping.launch.py
   - External Gemini API for VLM reasoning
 - Detection: `detect_anything` package (decoupled)
 - dimos patterns: `dimos/perception/detection/objectDB.py`
+
+## Handoff Notes
+
+- Regularization uses vote-weighted filtering + voxel connected components; adjust
+  `regularize_voxel_size` and the regularization percentile (in `MapObject`) if
+  thin structures are dropped.
+- Mapping node logs summary stats and a single mapping loop time per cycle.
+- Permanent objects persist; only pending objects time out via `pending_ttl_s`.
diff --git a/semantic_mapping/config/params.yaml b/semantic_mapping/config/params.yaml
index 370d9c9..0749d09 100644
--- a/semantic_mapping/config/params.yaml
+++ b/semantic_mapping/config/params.yaml
@@ -14,16 +14,13 @@ semantic_mapping_node:
     map_frame: map
 
     confidence_threshold: 0.35
-    voxel_size: 0.03
+    voxel_size: 0.05
+    regularize_voxel_size: 0.2
     distance_threshold: 0.3
-    iou_threshold: 0.5
-    min_detections_for_permanent: 3
-    pending_ttl_s: 10.0
-    max_inactive_frames: 20
+    iou_threshold: 0.3
+    min_detections_for_permanent: 4
+    pending_ttl_s: 5.0
     num_angle_bins: 15
-    dbscan_eps_scale: 2.0
-    dbscan_min_points_initial: 6
-    dbscan_min_points_mature: 12
 
     detection_linear_state_time_bias: 0.0
     detection_angular_state_time_bias: 0.0
@@ -38,17 +35,13 @@ semantic_mapping_node:
     depth_filter_margin: 0.15
     depth_jump_threshold: 0.3
     sensor_to_camera_tf: config/sensor_to_camera_tf.yaml
-    mask_erode_kernel: 4
-    mask_erode_iterations: 3
+    mask_erode_kernel: 5
+    mask_erode_iterations: 2
     max_cloud_distance: 8.0
-    outlier_filter: true
-    outlier_nb_neighbors: 12
-    outlier_std_ratio: 1.6
-    outlier_min_points: 20
     visualization_max_objects: 50
     ground_filter: true
-    ground_radius: 2.0
-    ground_z_clearance: 0.05
+    ground_radius: 2.5
+    ground_z_clearance: 0.0
 
     target_object: refrigerator
     save_png: True
diff --git a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
index 07ef2c5..3f22372 100644
--- a/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
+++ b/semantic_mapping/docs/SEMANTIC_MAPPING_MIGRATION.md
@@ -460,13 +460,19 @@ Items to remove:
 - Added mask erosion, depth-jump pruning, distance gating, and light outlier filtering for cleaner object clouds.
 - Added reprojected observation-angle voting for better multi-view aggregation.
 - Fixed bbox ordering and switched `/obj_boxes` to axis-aligned markers (via `get_oriented_bbox(axis_aligned=True)`).
-- Updated DBSCAN regularization to v2 logic and exposed DBSCAN tuning params.
+- Replaced DBSCAN regularization with vote-weighted filtering + voxel connected components.
+- Added `regularize_voxel_size` to grow connectivity for thin structures.
+- Simplified ground filtering to radius-based Z clamp around robot.
+- Added `visualization_max_objects` to keep recent objects visible in RViz.
+- Logging: mapping stats (total/total_deleted/total_permanent) + mapping loop time.
 - Fixed image saving color order and ensured output dir is cleared on start.
 - Set MobileCLIP weights cache to `detect_anything/models` and removed corrupted caches.
 
 ## Temporary Handoff Notes
 
-- Add ground-height clamp to further suppress ground/background leakage.
+- Regularization is now connected-components; tune `regularize_voxel_size` and keep ratio
+  (`MapObject.regularize_shape` percentile) if legs/sparse parts are dropped.
+- Ground filter uses `ground_filter`, `ground_radius`, `ground_z_clearance`.
 - If bagfile sync issues persist, consider switching mapping inputs to `/sensor_scan` + `/state_estimation_at_scan`.
 - Verify VLM objects.yaml path in VLM_ROS (`vlm_reasoning_node` still expects old path).
 
diff --git a/semantic_mapping/semantic_mapping/map_object.py b/semantic_mapping/semantic_mapping/map_object.py
index 5875d9c..a9e3633 100644
--- a/semantic_mapping/semantic_mapping/map_object.py
+++ b/semantic_mapping/semantic_mapping/map_object.py
@@ -20,7 +20,6 @@
 from vector_perception_utils.pointcloud_utils import (
     compute_centroid,
     create_pointcloud,
-    remove_statistical_outliers,
 )
 
 
@@ -375,13 +374,8 @@ class MapObject:
     _diversity_percentile: float = field(default=0.85, repr=False)
     _angle_threshold: float = field(default=np.deg2rad(5), repr=False)
     _distance_threshold: float = field(default=0.3, repr=False)
-    _dbscan_eps_scale: float = field(default=2.0, repr=False)
-    _dbscan_min_points_initial: int = field(default=6, repr=False)
-    _dbscan_min_points_mature: int = field(default=12, repr=False)
-    _outlier_filter: bool = field(default=True, repr=False)
-    _outlier_nb_neighbors: int = field(default=10, repr=False)
-    _outlier_std_ratio: float = field(default=2.0, repr=False)
-    _outlier_min_points: int = field(default=30, repr=False)
+    _needs_regularization: bool = field(default=True, repr=False)
+    _regularize_voxel_size: float = field(default=0.06, repr=False)
 
     @classmethod
     def from_observation(
@@ -389,14 +383,8 @@ def from_observation(
         object_id: str,
         obs: Observation,
         voxel_size: float = 0.03,
+        regularize_voxel_size: float = 0.06,
         num_angle_bins: int = 15,
-        dbscan_eps_scale: float = 2.0,
-        dbscan_min_points_initial: int = 6,
-        dbscan_min_points_mature: int = 12,
-        outlier_filter: bool = True,
-        outlier_nb_neighbors: int = 10,
-        outlier_std_ratio: float = 2.0,
-        outlier_min_points: int = 30,
     ) -> MapObject:
         """Create a new MapObject from an initial observation.
 
@@ -404,14 +392,8 @@ def from_observation(
             object_id: Unique identifier for this object.
             obs: Initial observation.
             voxel_size: Voxel grid cell size in meters.
+            regularize_voxel_size: Voxel size for regularization clustering.
             num_angle_bins: Number of angle bins for observation tracking.
-            dbscan_eps_scale: DBSCAN eps scale (multiplier on voxel size).
-            dbscan_min_points_initial: Min points for new objects.
-            dbscan_min_points_mature: Min points for mature objects.
-            outlier_filter: Whether to run statistical outlier filtering.
-            outlier_nb_neighbors: Neighbor count for outlier filtering.
-            outlier_std_ratio: Standard deviation ratio for outlier filtering.
-            outlier_min_points: Minimum points to apply outlier filtering.
 
         Returns:
             New MapObject instance.
@@ -433,13 +415,7 @@ def from_observation(
             points_count={obs.label: len(obs.points)},
             robot_poses=[{"R": obs.robot_R.copy(), "t": obs.robot_t.copy()}],
             last_update_time=obs.timestamp,
-            _dbscan_eps_scale=dbscan_eps_scale,
-            _dbscan_min_points_initial=dbscan_min_points_initial,
-            _dbscan_min_points_mature=dbscan_min_points_mature,
-            _outlier_filter=outlier_filter,
-            _outlier_nb_neighbors=outlier_nb_neighbors,
-            _outlier_std_ratio=outlier_std_ratio,
-            _outlier_min_points=outlier_min_points,
+            _regularize_voxel_size=regularize_voxel_size,
         )
         return obj
 
@@ -497,6 +473,7 @@ def update(self, obs: Observation) -> None:
         self.detection_count += 1
         self.inactive_frames = 0
         self.last_update_time = obs.timestamp
+        self._needs_regularization = True
 
         # Check if pose is new (not similar to existing)
         is_similar = self._is_similar_pose(obs.robot_R, obs.robot_t)
@@ -533,6 +510,7 @@ def merge(self, other: MapObject) -> None:
 
         # Merge voxel managers
         self.voxel_manager.merge_from(other.voxel_manager)
+        self._needs_regularization = True
 
         # Merge class statistics
         for label, count in other.class_votes.items():
@@ -646,112 +624,82 @@ def _compute_geometry(self) -> None:
         self._needs_recompute = False
 
     def regularize_shape(self, percentile: float = 0.85) -> None:
-        """Apply v2 shape regularization (ported from legacy pipeline)."""
+        """Apply vote-weighted regularization with light outlier removal."""
+        if not self._needs_regularization:
+            return
         voxels = self.voxel_manager.voxels
         if len(voxels) < 3:
             self.voxel_manager.regularized_mask = np.ones(len(voxels), dtype=bool)
+            self._needs_regularization = False
             self._invalidate_cache()
             return
 
-        pcd = create_pointcloud(voxels)
-        eps = self.voxel_manager.voxel_size * self._dbscan_eps_scale
-        min_points = (
-            self._dbscan_min_points_mature
-            if self.detection_count >= 3
-            else self._dbscan_min_points_initial
-        )
-        labels = np.array(pcd.cluster_dbscan(eps=eps, min_points=min_points))
-        unique_labels = np.unique(labels)
-
-        cluster_masks = []
-        cluster_weights = []
-        cluster_centroids = []
-        for label in unique_labels:
-            if label == -1:
-                continue
-            mask = labels == label
-            if not np.any(mask):
-                continue
-            weight = float(np.sum(self.voxel_manager.observation_angles[mask]))
-            if weight < 5:
-                continue
-            cluster_masks.append(mask)
-            cluster_weights.append(weight)
-            cluster_centroids.append(compute_centroid(voxels[mask]))
-
-        if len(cluster_masks) == 0:
-            self.voxel_manager.regularized_mask = np.zeros(
-                self.voxel_manager.voxels.shape[0], dtype=bool
-            )
-        elif len(cluster_masks) == 1:
-            self.voxel_manager.regularized_mask = cluster_masks[0]
+        weights = np.sum(self.voxel_manager.observation_angles, axis=1) * self.voxel_manager.votes
+        keep_ratio = percentile if percentile is not None else 1.0
+        if keep_ratio >= 1.0:
+            keep_mask = np.ones(voxels.shape[0], dtype=bool)
         else:
-            clustering_weight = np.array(cluster_weights, dtype=float, copy=False)
-            cluster_centroids = np.array(cluster_centroids, dtype=float, copy=False)
-
-            main_idx = int(np.argmax(clustering_weight))
-            main_centroid = cluster_centroids[main_idx]
-
-            dists = np.linalg.norm(cluster_centroids - main_centroid, axis=1)
-            if len(voxels) >= 4:
-                try:
-                    obb = pcd.get_minimal_oriented_bounding_box()
-                    dist_base = float(np.linalg.norm(np.asarray(obb.extent)))
-                except Exception:
-                    dist_base = float(np.max(dists))
-            else:
-                dist_base = float(np.max(dists))
-            if dist_base == 0:
-                adjusted_weight = clustering_weight.copy()
-            else:
-                alpha = 3.0
-                scale = np.exp(-alpha * dists / dist_base)
-                adjusted_weight = clustering_weight * scale
-
-            main_adjusted_idx = int(np.argmax(adjusted_weight))
-            chosen = np.zeros(len(cluster_masks), dtype=bool)
-
-            if percentile is not None:
-                target = percentile * np.sum(adjusted_weight)
-                acc = adjusted_weight[main_adjusted_idx]
-                chosen[main_adjusted_idx] = True
-                order = np.argsort(adjusted_weight)[::-1]
-                for i in order:
-                    if chosen[i]:
-                        continue
-                    if acc + adjusted_weight[i] <= target + 1e-8:
-                        chosen[i] = True
-                        acc += adjusted_weight[i]
-                    if acc >= target:
-                        break
-            else:
-                chosen[:] = True
-
-            valid_cluster_mask = np.zeros(self.voxel_manager.voxels.shape[0], dtype=bool)
-            for i, cmask in enumerate(cluster_masks):
-                if chosen[i]:
-                    valid_cluster_mask |= cmask
-            self.voxel_manager.regularized_mask = valid_cluster_mask
-
-        if (
-            self._outlier_filter
-            and np.any(self.voxel_manager.regularized_mask)
-            and np.sum(self.voxel_manager.regularized_mask) >= self._outlier_min_points
-        ):
-            keep_indices = np.nonzero(self.voxel_manager.regularized_mask)[0]
-            keep_points = voxels[keep_indices]
-            _, _, inlier_indices = remove_statistical_outliers(
-                keep_points,
-                nb_neighbors=self._outlier_nb_neighbors,
-                std_ratio=self._outlier_std_ratio,
-            )
-            if inlier_indices.size > 0:
-                refined_mask = np.zeros_like(self.voxel_manager.regularized_mask)
-                refined_mask[keep_indices[inlier_indices]] = True
-                self.voxel_manager.regularized_mask = refined_mask
-            else:
-                self.voxel_manager.regularized_mask = np.zeros_like(
-                    self.voxel_manager.regularized_mask
-                )
-
+            cutoff = float(np.quantile(weights, 1.0 - keep_ratio))
+            keep_mask = weights >= cutoff
+
+        if not np.any(keep_mask):
+            keep_mask = np.ones(voxels.shape[0], dtype=bool)
+
+        selected_indices = np.nonzero(keep_mask)[0]
+        points_sel = voxels[selected_indices]
+        weights_sel = weights[selected_indices]
+        if points_sel.shape[0] >= 3:
+            voxel_size = self._regularize_voxel_size
+            keys = np.floor(points_sel / voxel_size).astype(np.int32)
+            voxel_map: dict[tuple[int, int, int], list[int]] = {}
+            for idx, key in enumerate(map(tuple, keys)):
+                voxel_map.setdefault(key, []).append(idx)
+
+            neighbor_offsets = [
+                (dx, dy, dz)
+                for dx in (-1, 0, 1)
+                for dy in (-1, 0, 1)
+                for dz in (-1, 0, 1)
+                if not (dx == 0 and dy == 0 and dz == 0)
+            ]
+            visited: set[tuple[int, int, int]] = set()
+            components: list[tuple[np.ndarray, float, np.ndarray]] = []
+            for key in voxel_map:
+                if key in visited:
+                    continue
+                stack = [key]
+                visited.add(key)
+                comp_indices: list[int] = []
+                while stack:
+                    current = stack.pop()
+                    comp_indices.extend(voxel_map[current])
+                    for dx, dy, dz in neighbor_offsets:
+                        neighbor = (current[0] + dx, current[1] + dy, current[2] + dz)
+                        if neighbor in voxel_map and neighbor not in visited:
+                            visited.add(neighbor)
+                            stack.append(neighbor)
+
+                comp_idx = np.array(comp_indices, dtype=int)
+                comp_weight = float(np.sum(weights_sel[comp_idx]))
+                comp_centroid = compute_centroid(points_sel[comp_idx])
+                components.append((comp_idx, comp_weight, comp_centroid))
+
+            if components:
+                main_idx = int(np.argmax([c[1] for c in components]))
+                main_points = points_sel[components[main_idx][0]]
+                main_centroid = components[main_idx][2]
+                extent = np.max(main_points, axis=0) - np.min(main_points, axis=0)
+                dist_thresh = max(0.3, 0.5 * float(np.max(extent)))
+
+                keep_sel_mask = np.zeros(points_sel.shape[0], dtype=bool)
+                for comp_idx, _, comp_centroid in components:
+                    if np.linalg.norm(comp_centroid - main_centroid) <= dist_thresh:
+                        keep_sel_mask[comp_idx] = True
+                if np.any(keep_sel_mask):
+                    keep_mask = np.zeros(voxels.shape[0], dtype=bool)
+                    keep_mask[selected_indices[keep_sel_mask]] = True
+
+        self.voxel_manager.regularized_mask = keep_mask
+
+        self._needs_regularization = False
         self._invalidate_cache()
diff --git a/semantic_mapping/semantic_mapping/mapper.py b/semantic_mapping/semantic_mapping/mapper.py
index 8b1b259..cc67cf1 100644
--- a/semantic_mapping/semantic_mapping/mapper.py
+++ b/semantic_mapping/semantic_mapping/mapper.py
@@ -29,39 +29,25 @@ class ObjectMapper:
     def __init__(
         self,
         voxel_size: float = 0.03,
+        regularize_voxel_size: float = 0.06,
         confidence_threshold: float = 0.30,
         distance_threshold: float = 0.5,
         iou_threshold: float = 0.2,
         min_detections_for_permanent: int = 3,
         pending_ttl_s: float = 10.0,
-        max_inactive_frames: int = 20,
         num_angle_bins: int = 15,
-        dbscan_eps_scale: float = 2.0,
-        dbscan_min_points_initial: int = 6,
-        dbscan_min_points_mature: int = 12,
-        outlier_filter: bool = True,
-        outlier_nb_neighbors: int = 10,
-        outlier_std_ratio: float = 2.0,
-        outlier_min_points: int = 30,
     ) -> None:
         """Initialize the object mapper and its thresholds.
 
         Args:
             voxel_size: Voxel grid cell size in meters.
+            regularize_voxel_size: Voxel size for regularization clustering.
             confidence_threshold: Minimum detection confidence to accept.
             distance_threshold: Max centroid distance for spatial matching.
             iou_threshold: IoU threshold for merge checks.
             min_detections_for_permanent: Detections before promotion.
             pending_ttl_s: Time-to-live for pending objects.
-            max_inactive_frames: Frames before pruning inactive objects.
             num_angle_bins: Observation angle bins for voxel manager.
-            dbscan_eps_scale: DBSCAN eps scale (multiplier on voxel size).
-            dbscan_min_points_initial: DBSCAN min points for new objects.
-            dbscan_min_points_mature: DBSCAN min points for mature objects.
-            outlier_filter: Whether to run statistical outlier filtering.
-            outlier_nb_neighbors: Neighbor count for outlier filtering.
-            outlier_std_ratio: Standard deviation ratio for outlier filtering.
-            outlier_min_points: Minimum points to apply outlier filtering.
         """
         self._pending_objects: dict[str, MapObject] = {}
         self._permanent_objects: dict[str, MapObject] = {}
@@ -69,20 +55,13 @@ def __init__(
         self._lock = threading.RLock()
 
         self._voxel_size = voxel_size
+        self._regularize_voxel_size = regularize_voxel_size
         self._confidence_threshold = confidence_threshold
         self._distance_threshold = distance_threshold
         self._iou_threshold = iou_threshold
         self._min_detections_for_permanent = min_detections_for_permanent
         self._pending_ttl_s = pending_ttl_s
-        self._max_inactive_frames = max_inactive_frames
         self._num_angle_bins = num_angle_bins
-        self._dbscan_eps_scale = dbscan_eps_scale
-        self._dbscan_min_points_initial = dbscan_min_points_initial
-        self._dbscan_min_points_mature = dbscan_min_points_mature
-        self._outlier_filter = outlier_filter
-        self._outlier_nb_neighbors = outlier_nb_neighbors
-        self._outlier_std_ratio = outlier_std_ratio
-        self._outlier_min_points = outlier_min_points
 
     def update(self, observations: list[Observation], target_object: str | None = None) -> UpdateStats:
         """Update the map with new observations.
@@ -97,6 +76,7 @@ def update(self, observations: list[Observation], target_object: str | None = No
         stats = UpdateStats()
         now = max((obs.timestamp for obs in observations), default=time.time())
         updated_ids: set[str] = set()
+        regularized_ids: set[str] = set()
 
         with self._lock:
             for obs in observations:
@@ -117,18 +97,21 @@ def update(self, observations: list[Observation], target_object: str | None = No
                     self._track_id_map[track_id] = obj.object_id
 
                 if obj.status == ObjectStatus.PENDING:
-                    self._check_promotion(obj)
-                    if obj.status == ObjectStatus.PERMANENT:
+                    if self._check_promotion(obj):
                         stats.promoted += 1
 
+                if obj.object_id not in regularized_ids and obj.inactive_frames == 0:
+                    obj.regularize_shape(percentile=obj._diversity_percentile)
+                    regularized_ids.add(obj.object_id)
+
             for obj in list(self._pending_objects.values()) + list(self._permanent_objects.values()):
                 if obj.object_id not in updated_ids:
                     obj.inactive_frames += 1
-                    obj.regularize_shape(percentile=obj._diversity_percentile)
 
-            stats.deleted += len(self._prune_inactive(target_object, now))
+            deleted = self._prune_inactive(target_object, now)
+            stats.deleted = len(deleted)
             stats.merged += self.merge_objects()
-            stats.total = len(self._pending_objects) + len(self._permanent_objects)
+            stats.total = len(self._permanent_objects)
 
         return stats
 
@@ -155,28 +138,23 @@ def _create_object(self, obs: Observation) -> MapObject:
             object_id=str(uuid.uuid4()),
             obs=obs,
             voxel_size=self._voxel_size,
+            regularize_voxel_size=self._regularize_voxel_size,
             num_angle_bins=self._num_angle_bins,
-            dbscan_eps_scale=self._dbscan_eps_scale,
-            dbscan_min_points_initial=self._dbscan_min_points_initial,
-            dbscan_min_points_mature=self._dbscan_min_points_mature,
-            outlier_filter=self._outlier_filter,
-            outlier_nb_neighbors=self._outlier_nb_neighbors,
-            outlier_std_ratio=self._outlier_std_ratio,
-            outlier_min_points=self._outlier_min_points,
         )
         self._track_id_map[obs.track_id] = obj.object_id
         return obj
 
-    def _check_promotion(self, obj: MapObject) -> None:
+    def _check_promotion(self, obj: MapObject) -> bool:
         """Promote object from pending to permanent if threshold met."""
         if obj.status != ObjectStatus.PENDING:
-            return
+            return False
         if obj.detection_count < self._min_detections_for_permanent:
-            return
+            return False
         if obj.object_id in self._pending_objects:
             self._pending_objects.pop(obj.object_id)
         obj.status = ObjectStatus.PERMANENT
         self._permanent_objects[obj.object_id] = obj
+        return True
 
     def merge_objects(self) -> int:
         """Merge overlapping permanent objects.
@@ -267,15 +245,13 @@ def _prune_inactive(self, target_object: str | None, now: float) -> list[MapObje
         deleted: list[MapObject] = []
 
         for obj_id, obj in list(self._pending_objects.items()):
-            if (now - obj.last_update_time) > self._pending_ttl_s or (
-                obj.inactive_frames > self._max_inactive_frames
-            ):
+            if (now - obj.last_update_time) > self._pending_ttl_s:
                 deleted.append(self._pending_objects.pop(obj_id))
 
         for obj_id, obj in list(self._permanent_objects.items()):
             if target_object and obj.label == target_object:
                 continue
-            if obj.inactive_frames > self._max_inactive_frames:
+            if obj.status == ObjectStatus.DELETED:
                 deleted.append(self._permanent_objects.pop(obj_id))
 
         for obj in deleted:
diff --git a/semantic_mapping/semantic_mapping/mapping_node.py b/semantic_mapping/semantic_mapping/mapping_node.py
index 0a4cf37..16b5649 100644
--- a/semantic_mapping/semantic_mapping/mapping_node.py
+++ b/semantic_mapping/semantic_mapping/mapping_node.py
@@ -73,15 +73,12 @@ def __init__(self) -> None:
 
         self.declare_parameter("confidence_threshold", 0.3)
         self.declare_parameter("voxel_size", 0.03)
+        self.declare_parameter("regularize_voxel_size", 0.06)
         self.declare_parameter("distance_threshold", 0.5)
         self.declare_parameter("iou_threshold", 0.2)
         self.declare_parameter("min_detections_for_permanent", 3)
         self.declare_parameter("pending_ttl_s", 10.0)
-        self.declare_parameter("max_inactive_frames", 20)
         self.declare_parameter("num_angle_bins", 15)
-        self.declare_parameter("dbscan_eps_scale", 2.0)
-        self.declare_parameter("dbscan_min_points_initial", 6)
-        self.declare_parameter("dbscan_min_points_mature", 12)
 
         self.declare_parameter("detection_linear_state_time_bias", 0.0)
         self.declare_parameter("detection_angular_state_time_bias", 0.0)
@@ -99,10 +96,6 @@ def __init__(self) -> None:
         self.declare_parameter("mask_erode_kernel", 3)
         self.declare_parameter("mask_erode_iterations", 2)
         self.declare_parameter("max_cloud_distance", 8.0)
-        self.declare_parameter("outlier_filter", True)
-        self.declare_parameter("outlier_nb_neighbors", 10)
-        self.declare_parameter("outlier_std_ratio", 2.0)
-        self.declare_parameter("outlier_min_points", 30)
         self.declare_parameter("visualization_max_objects", 50)
         self.declare_parameter("ground_filter", True)
         self.declare_parameter("ground_radius", 2.0)
@@ -148,10 +141,6 @@ def __init__(self) -> None:
             (self._mask_erode_kernel_size, self._mask_erode_kernel_size),
         )
         self._max_cloud_distance = float(self.get_parameter("max_cloud_distance").value)
-        self._outlier_filter = bool(self.get_parameter("outlier_filter").value)
-        self._outlier_nb_neighbors = int(self.get_parameter("outlier_nb_neighbors").value)
-        self._outlier_std_ratio = float(self.get_parameter("outlier_std_ratio").value)
-        self._outlier_min_points = int(self.get_parameter("outlier_min_points").value)
         self._visualization_max_objects = int(
             self.get_parameter("visualization_max_objects").value
         )
@@ -167,20 +156,13 @@ def __init__(self) -> None:
 
         self._mapper = ObjectMapper(
             voxel_size=float(self.get_parameter("voxel_size").value),
+            regularize_voxel_size=float(self.get_parameter("regularize_voxel_size").value),
             confidence_threshold=self._confidence_threshold,
             distance_threshold=float(self.get_parameter("distance_threshold").value),
             iou_threshold=float(self.get_parameter("iou_threshold").value),
             min_detections_for_permanent=int(self.get_parameter("min_detections_for_permanent").value),
             pending_ttl_s=float(self.get_parameter("pending_ttl_s").value),
-            max_inactive_frames=int(self.get_parameter("max_inactive_frames").value),
             num_angle_bins=int(self.get_parameter("num_angle_bins").value),
-            dbscan_eps_scale=float(self.get_parameter("dbscan_eps_scale").value),
-            dbscan_min_points_initial=int(self.get_parameter("dbscan_min_points_initial").value),
-            dbscan_min_points_mature=int(self.get_parameter("dbscan_min_points_mature").value),
-            outlier_filter=self._outlier_filter,
-            outlier_nb_neighbors=self._outlier_nb_neighbors,
-            outlier_std_ratio=self._outlier_std_ratio,
-            outlier_min_points=self._outlier_min_points,
         )
 
         self._image_saver = ImageSaveQueue(
@@ -313,6 +295,7 @@ def _mapping_callback(self) -> None:
             return
 
         self._mapping_busy = True
+        start_time = time.perf_counter()
         try:
             detection_msg, viewpoint_stamp, viewpoint_id = self._select_detection()
             if detection_msg is None:
@@ -338,7 +321,12 @@ def _mapping_callback(self) -> None:
             if len(observations) == 0:
                 return
 
-            self._mapper.update(observations, target_object=self._target_object)
+            stats = self._mapper.update(observations, target_object=self._target_object)
+            self.get_logger().info(
+                "Mapping stats:\n"
+                f"  total: {stats.total}\n"
+                f"  deleted: {stats.deleted}"
+            )
             self._update_observation_angles(observations)
             self._update_best_images(observations, image)
             self._publish_map(detection_stamp, viewpoint_id)
@@ -347,6 +335,8 @@ def _mapping_callback(self) -> None:
             if viewpoint_stamp is not None:
                 self._processed_viewpoints.add(viewpoint_stamp)
         finally:
+            elapsed_ms = (time.perf_counter() - start_time) * 1000.0
+            self.get_logger().info(f"Mapping loop time: {elapsed_ms:.2f} ms")
             self._mapping_busy = False
 
     def _select_detection(self) -> tuple[DetectionResult | None, float | None, int]:
@@ -555,6 +545,20 @@ def _build_observations(
             masks=masks,
             pixel_idx=pixel_idx,
         )
+        if self._depth_filter:
+            filtered_clouds = []
+            filtered_depths = []
+            for cloud, depth in zip(seg_clouds, seg_depths):
+                if depth.shape[0] == 0:
+                    filtered_clouds.append(cloud)
+                    filtered_depths.append(depth)
+                    continue
+                min_depth = float(np.min(depth))
+                keep = depth[:, 0] <= (min_depth + self._depth_filter_margin)
+                filtered_clouds.append(cloud[keep])
+                filtered_depths.append(depth[keep])
+            seg_clouds = filtered_clouds
+            seg_depths = filtered_depths
         world_clouds = transform_segmented_clouds_to_world(
             clouds=seg_clouds,
             depths=seg_depths,