diff --git a/tools/auto_labeling_3d/Dockerfile b/tools/auto_labeling_3d/Dockerfile index 288a5f8f8..ab60fc9d4 100644 --- a/tools/auto_labeling_3d/Dockerfile +++ b/tools/auto_labeling_3d/Dockerfile @@ -15,6 +15,9 @@ RUN git clone -b ${perception_dataset_version} ${perception_dataset_url} \ WORKDIR /workspace COPY projects projects +## Setup Auto Labeling 3D +RUN pip install polars + ## Setup CenterPoint ENV CUBLAS_WORKSPACE_CONFIG=:4096:8 diff --git a/tools/auto_labeling_3d/attach_tracking_id/mot.py b/tools/auto_labeling_3d/attach_tracking_id/mot.py index 099600690..da1e7ad6a 100644 --- a/tools/auto_labeling_3d/attach_tracking_id/mot.py +++ b/tools/auto_labeling_3d/attach_tracking_id/mot.py @@ -166,6 +166,7 @@ def _transform_pred_instance_to_global_t4box( # [vx, vy, vz] velocity: Tuple[float] = (*velocity, np.float64(0.0)) + # T4Box3D validates unix_time as int; the tracker keeps the precise float timestamp separately. box = T4Box3D( unix_time=int(timestamp), frame_id="base_link", @@ -231,7 +232,7 @@ class KalmanBoxTracker: count: int = 0 - def __init__(self, t4box3d: T4Box3D, class_params=CLASS_PARAMS): + def __init__(self, t4box3d: T4Box3D, timestamp: float, class_params=CLASS_PARAMS): """ Initialize Kalman filter with class-specific parameters. @@ -246,8 +247,8 @@ def __init__(self, t4box3d: T4Box3D, class_params=CLASS_PARAMS): """ self.kf = KalmanFilter(dim_x=4, dim_z=4) - self.prev_timestamp: float = t4box3d.unix_time - self.latest_timestamp: float = t4box3d.unix_time + self.prev_timestamp: float = float(timestamp) + self.latest_timestamp: float = float(timestamp) self.id: str = t4box3d.uuid # Measurement matrix (identity matrix for direct state observation). States are x,y,vx,vy @@ -298,7 +299,7 @@ def predict(self, timestamp: float) -> TrackedBox2D: tracked_box = TrackedBox2D(position_xy=self.kf.x[:2].reshape((2,)), class_name=self.class_name) return tracked_box - def update(self, t4box3d: T4Box3D): + def update(self, t4box3d: T4Box3D, timestamp: float): """ Update state with new measurement. @@ -313,7 +314,7 @@ def update(self, t4box3d: T4Box3D): # Update Kalman filter with new measurement self.kf.update(np.concatenate([t4box3d.position[:2], t4box3d.velocity[:2]]).reshape((4, 1))) - self.prev_timestamp = t4box3d.unix_time + self.prev_timestamp = float(timestamp) def end_of_life(self): """Check if tracker should be terminated. @@ -483,7 +484,12 @@ def _convert_pred_instance_to_global_t4boxes( return instance_ids, bbox_list - def _add_new_tracker(self, bbox_list: List[T4Box3D], det_ids: Optional[Iterable[int]] = None): + def _add_new_tracker( + self, + bbox_list: List[T4Box3D], + timestamp: float, + det_ids: Optional[Iterable[int]] = None, + ): """ Add new trackers for specified detections. @@ -495,7 +501,7 @@ def _add_new_tracker(self, bbox_list: List[T4Box3D], det_ids: Optional[Iterable[ if det_ids is None: det_ids = range(len(bbox_list)) for i in det_ids: - self.trackers.append(KalmanBoxTracker(bbox_list[i])) + self.trackers.append(KalmanBoxTracker(bbox_list[i], timestamp=timestamp)) def _remove_old_tracker(self): """Remove trackers that have exceeded their maximum age. @@ -528,7 +534,7 @@ def frame_mot(self, pred_instances_3d: List[Dict[str, Any]], ego2global: NDArray # Initialize new trackers for all detections if no existing trackers if not len(self.trackers): - self._add_new_tracker(bbox_list) + self._add_new_tracker(bbox_list, timestamp=timestamp) return instance_ids # Get tracked_box_list from active trackers @@ -538,13 +544,13 @@ def frame_mot(self, pred_instances_3d: List[Dict[str, Any]], ego2global: NDArray valid_matches, unmatched_dets = associate_detections_to_tracks(bbox_list, tracked_box_list) # For unmatched detections, add new trackes - self._add_new_tracker(bbox_list, unmatched_dets) + self._add_new_tracker(bbox_list, timestamp=timestamp, det_ids=unmatched_dets) # Update matched trackers and instance IDs for det_idx, trk_idx in valid_matches: # Update matched tracker trk = self.trackers[trk_idx] - trk.update(bbox_list[det_idx]) + trk.update(bbox_list[det_idx], timestamp=timestamp) # Update instance id instance_ids[det_idx] = trk.id diff --git a/tools/auto_labeling_3d/entrypoint/configs/example.yaml b/tools/auto_labeling_3d/entrypoint/configs/example.yaml index b4e407e3d..a749aa4dc 100644 --- a/tools/auto_labeling_3d/entrypoint/configs/example.yaml +++ b/tools/auto_labeling_3d/entrypoint/configs/example.yaml @@ -27,25 +27,25 @@ create_info: # model_zoo_url: https://url/to/download/checkpoint.pth - name: streampetr model_config: - config_path: /workspace/projects/StreamPETR/configs/t4dataset/t4_base_vov_flash_480x640_baseline.py - model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/streampetr/streampetr-vov99/t4base/v2.5/t4_base_vov_flash_480x640_baseline.py + config_path: /workspace/work_dirs/checkpoints/streampetr_2.5.0_config.py + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/streampetr/streampetr-vov99/t4base/v2.5/streampetr_2.5.0_auto_labeling_3d_config.py checkpoint: + checkpoint_path: /workspace/work_dirs/checkpoints/streampetr_2.5.0_model.pth model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/streampetr/streampetr-vov99/t4base/v2.5/best_NuScenesmetric_T4Metric_mAP_epoch_34.pth - checkpoint_path: /workspace/work_dirs/checkpoints/example_model_streampetr.pth - name: centerpoint model_config: - config_path: /workspace/projects/CenterPoint/configs/t4dataset/Centerpoint/second_secfpn_4xb16_121m_j6gen2_base_amp_t4metric_v2.py - model_zoo_url: + config_path: /workspace/work_dirs/checkpoints/centerpoint_2.5.1_config.py + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/centerpoint/centerpoint/j6gen2/v2.5.1/centerpoint_2.5.1_auto_labeling_3d_config.py checkpoint: - checkpoint_path: /workspace/work_dirs/checkpoints/example_model.pth - model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/centerpoint/centerpoint/j6gen2/v2.3.1/best_NuScenes_metric_T4Metric_mAP_epoch_29.pth + checkpoint_path: /workspace/work_dirs/checkpoints/centerpoint_2.5.1_model.pth + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/centerpoint/centerpoint/j6gen2/v2.5.1/best_NuScenes%20metric_T4Metric_mAP_epoch_28.pth - name: bevfusion model_config: - config_path: /workspace/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_4xb8_gen2_base.py - model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-cl/t4base/v2.0.0/bevfusion_camera_lidar_voxel_second_secfpn_4xb8_base.py + config_path: /workspace/work_dirs/checkpoints/bevfusion_L_2.5.1_config.py + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2/2.5.1/bevfusion_L_2.5.1_auto_labeling_3d_config.py checkpoint: - checkpoint_path: /workspace/work_dirs/checkpoints/example_model_bevfusion.pth - model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-cl/t4base/v2.0.0/best_NuScenes_metric_T4Metric_mAP_epoch_48.pth + checkpoint_path: /workspace/work_dirs/checkpoints/bevfusion_L_2.5.1_model.pth + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2/2.5.1/best_NuScenes%20metric_T4Metric_mAP_epoch_30.pth # ensemble_infos configuration # Please update the config file for ensemble_infos when changing the latest model_config in create_info diff --git a/tools/auto_labeling_3d/entrypoint/configs/example_deepen.yaml b/tools/auto_labeling_3d/entrypoint/configs/example_deepen.yaml index bcefd8cd6..54cd7dada 100644 --- a/tools/auto_labeling_3d/entrypoint/configs/example_deepen.yaml +++ b/tools/auto_labeling_3d/entrypoint/configs/example_deepen.yaml @@ -21,16 +21,27 @@ create_info: # checkpoint: # model_zoo_url: https://url/to/download/checkpoint.pth # checkpoint_path: /path/to/save/checkpoint.pth + - name: streampetr + model_config: + config_path: /workspace/work_dirs/checkpoints/streampetr_2.5.0_config.py + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/streampetr/streampetr-vov99/t4base/v2.5/streampetr_2.5.0_auto_labeling_3d_config.py + checkpoint: + checkpoint_path: /workspace/work_dirs/checkpoints/streampetr_2.5.0_model.pth + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/streampetr/streampetr-vov99/t4base/v2.5/best_NuScenesmetric_T4Metric_mAP_epoch_34.pth - name: centerpoint - model_config: /workspace/projects/CenterPoint/configs/t4dataset/Centerpoint/second_secfpn_4xb16_121m_j6gen2_base_t4metric_v2.py + model_config: + config_path: /workspace/work_dirs/checkpoints/centerpoint_2.5.1_config.py + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/centerpoint/centerpoint/j6gen2/v2.5.1/centerpoint_2.5.1_auto_labeling_3d_config.py checkpoint: - model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/centerpoint/centerpoint/j6gen2/v2.3.1/best_NuScenes_metric_T4Metric_mAP_epoch_29.pth - checkpoint_path: /workspace/work_dirs/checkpoints/example_model.pth + checkpoint_path: /workspace/work_dirs/checkpoints/centerpoint_2.5.1_model.pth + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/centerpoint/centerpoint/j6gen2/v2.5.1/best_NuScenes%20metric_T4Metric_mAP_epoch_28.pth - name: bevfusion - model_config: /workspace/projects/BEVFusion/configs/t4dataset/BEVFusion-CL/bevfusion_camera_lidar_voxel_second_secfpn_4xb8_gen2_base.py + model_config: + config_path: /workspace/work_dirs/checkpoints/bevfusion_L_2.5.1_config.py + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2/2.5.1/bevfusion_L_2.5.1_auto_labeling_3d_config.py checkpoint: - model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-cl/t4base/v2.0.0/best_NuScenes_metric_T4Metric_mAP_epoch_48.pth - checkpoint_path: /workspace/work_dirs/checkpoints/example_model_bevfusion.pth + checkpoint_path: /workspace/work_dirs/checkpoints/bevfusion_L_2.5.1_model.pth + model_zoo_url: https://download.autoware-ml-model-zoo.tier4.jp/autoware-ml/models/bevfusion/bevfusion-l/j6gen2/2.5.1/best_NuScenes%20metric_T4Metric_mAP_epoch_30.pth # ensemble_infos configuration # Please update the config file for ensemble_infos when changing the latest model_config in create_info diff --git a/tools/auto_labeling_3d/filter_objects/config/ensemble_base.py b/tools/auto_labeling_3d/filter_objects/config/ensemble_base.py index 86b945b0e..1032d50b2 100644 --- a/tools/auto_labeling_3d/filter_objects/config/ensemble_base.py +++ b/tools/auto_labeling_3d/filter_objects/config/ensemble_base.py @@ -8,10 +8,10 @@ type="ThresholdFilter", confidence_thresholds={ "car": 0.35, - "truck": 0.35, - "bus": 0.35, - "bicycle": 0.35, - "pedestrian": 0.35, + "truck": 0.4, + "bus": 0.4, + "bicycle": 0.4, + "pedestrian": 0.4, }, use_label=["car", "truck", "bus", "bicycle", "pedestrian"], ), @@ -22,10 +22,10 @@ type="ThresholdFilter", confidence_thresholds={ "car": 0.35, - "truck": 0.35, - "bus": 0.35, - "bicycle": 0.35, - "pedestrian": 0.35, + "truck": 0.4, + "bus": 0.4, + "bicycle": 0.4, + "pedestrian": 0.4, }, use_label=["car", "truck", "bus", "bicycle", "pedestrian"], ), @@ -50,33 +50,31 @@ config=dict( type="NMSEnsembleModel", ensemble_setting=dict( - weights=[1.0, 1.0, 1.0], - iou_threshold=0.55, + weights=[0.9, 1.0, 1.0], + iou_threshold=0.20, # Ensemble label groups. Each group is processed as one ensemble unit. ensemble_label_groups=[ - ["car"], - ["truck"], - ["bus"], + ["car", "truck", "bus"], ["pedestrian"], ["bicycle"], ], ), ), inputs=[ + dict( + name="streampetr", + info_path="/workspace/data/t4dataset/info/pseudo_infos_raw_streampetr.pkl", + filter_pipeline=streampetr_pipeline, + ), dict( name="centerpoint", - info_path="./data/t4dataset/info/pseudo_infos_raw_centerpoint.pkl", + info_path="/workspace/data/t4dataset/info/pseudo_infos_raw_centerpoint.pkl", filter_pipeline=centerpoint_pipeline, ), dict( name="bevfusion", - info_path="./data/t4dataset/info/pseudo_infos_raw_bevfusion.pkl", + info_path="/workspace/data/t4dataset/info/pseudo_infos_raw_bevfusion.pkl", filter_pipeline=bevfusion_pipeline, ), - dict( - name="streampetr", - info_path="./data/t4dataset/info/pseudo_infos_raw_streampetr.pkl", - filter_pipeline=streampetr_pipeline, - ), ], ) diff --git a/tools/auto_labeling_3d/filter_objects/ensemble/base_ensemble_model.py b/tools/auto_labeling_3d/filter_objects/ensemble/base_ensemble_model.py index 3ccbf8b8c..129a9ddd2 100644 --- a/tools/auto_labeling_3d/filter_objects/ensemble/base_ensemble_model.py +++ b/tools/auto_labeling_3d/filter_objects/ensemble/base_ensemble_model.py @@ -79,28 +79,148 @@ def ensemble(self, results: List[Dict[str, Any]]) -> Dict[str, Any]: if len(results) == 1: return results[0] - # Check if the number of weights matches the number of results + self._validate_weight_count(results) + aligned_results = align_label_spaces(results) + class_name_to_id = self._build_class_name_to_id(aligned_results[0]["metainfo"]) + merged_data_list = self._ensemble_data_lists(aligned_results, class_name_to_id) + + return {"metainfo": aligned_results[0]["metainfo"], "data_list": merged_data_list} + + def _validate_weight_count(self, results: List[Dict[str, Any]]) -> None: + """Validate that each model result has a matching ensemble weight.""" assert len(self.settings["weights"]) == len(results), "Number of weights must match number of models" - # Align label spaces across multiple models - aligned_results = align_label_spaces(results) + def _build_class_name_to_id(self, metainfo: Dict[str, Any]) -> Dict[str, int]: + """Build the unified class-name to class-id mapping for ensemble.""" + return {class_name: class_id for class_id, class_name in enumerate(metainfo["classes"])} - # Merge data_list from all models - all_data_list: List[List[Dict[str, Any]]] = [r["data_list"] for r in aligned_results] - class_name_to_id: Dict[str, int] = { - class_name: class_id for class_id, class_name in enumerate(aligned_results[0]["metainfo"]["classes"]) - } + def _ensemble_data_lists( + self, + aligned_results: List[Dict[str, Any]], + class_name_to_id: Dict[str, int], + ) -> List[Dict[str, Any]]: + """Ensemble per-frame predictions using token alignment when available.""" + if self._can_align_by_token(aligned_results): + return self._ensemble_token_aligned_results(aligned_results, class_name_to_id) + return self._ensemble_positional_results(aligned_results, class_name_to_id) + + def _ensemble_token_aligned_results( + self, + aligned_results: List[Dict[str, Any]], + class_name_to_id: Dict[str, int], + ) -> List[Dict[str, Any]]: + """Ensemble frames by token so missing model outputs do not shift later frames.""" + anchor_index, anchor_data_list = self._get_anchor_result(aligned_results) + anchor_tokens = [self._get_frame_token(frame) for frame in anchor_data_list] + frame_lookups = [self._build_frame_lookup(result["data_list"]) for result in aligned_results] + + self._log_token_alignment_mismatches(anchor_tokens, frame_lookups, anchor_index) + + merged_data_list: List[Dict[str, Any]] = [] + for anchor_frame in anchor_data_list: + frame_data = self._build_token_aligned_frame_data(anchor_frame, frame_lookups) + merged_data_list.append(self._merge_frame_data(frame_data, class_name_to_id)) + + return merged_data_list + + def _ensemble_positional_results( + self, + aligned_results: List[Dict[str, Any]], + class_name_to_id: Dict[str, int], + ) -> List[Dict[str, Any]]: + """Fallback to positional frame alignment when frame tokens are unavailable.""" merged_data_list: List[Dict[str, Any]] = [] + all_data_list: List[List[Dict[str, Any]]] = [result["data_list"] for result in aligned_results] for frame_data in zip(*all_data_list): - merged_frame = self._ensemble_frame( - frame_data, - ensemble_function=self.ensemble_function, - ensemble_label_groups=self.settings["ensemble_label_groups"], - class_name_to_id=class_name_to_id, - ) - merged_data_list.append(merged_frame) + merged_data_list.append(self._merge_frame_data(frame_data, class_name_to_id)) + return merged_data_list + + def _get_anchor_result(self, aligned_results: List[Dict[str, Any]]) -> tuple[int, List[Dict[str, Any]]]: + """Return the index and data list of the longest result stream anchor.""" + anchor_index, anchor_result = max( + enumerate(aligned_results), + key=lambda item: len(item[1]["data_list"]), + ) + return anchor_index, anchor_result["data_list"] + + def _log_token_alignment_mismatches( + self, + anchor_tokens: List[str], + frame_lookups: List[Dict[str, Dict[str, Any]]], + anchor_index: int, + ) -> None: + """Log frames missing from or extra to the anchor sequence for non-anchor models.""" + anchor_token_set = set(anchor_tokens) + for model_idx, frame_lookup in enumerate(frame_lookups): + if model_idx == anchor_index: + continue + missing_tokens = [token for token in anchor_tokens if token not in frame_lookup] + if missing_tokens: + self.logger.info( + "Model %d is missing %d frames during ensemble; those frames will use empty predictions.", + model_idx, + len(missing_tokens), + ) - return {"metainfo": aligned_results[0]["metainfo"], "data_list": merged_data_list} + extra_tokens = set(frame_lookup) - anchor_token_set + if extra_tokens: + self.logger.warning( + "Model %d has %d extra frames not present in the anchor result; they will be ignored.", + model_idx, + len(extra_tokens), + ) + + def _build_token_aligned_frame_data( + self, + anchor_frame: Dict[str, Any], + frame_lookups: List[Dict[str, Dict[str, Any]]], + ) -> List[Dict[str, Any]]: + """Build one aligned multi-model frame tuple using the anchor frame token.""" + token = self._get_frame_token(anchor_frame) + return [ + frame_lookup[token] if token in frame_lookup else self._generate_empty_frame(anchor_frame) + for frame_lookup in frame_lookups + ] + + def _merge_frame_data( + self, + frame_data, + class_name_to_id: Dict[str, int], + ) -> Dict[str, Any]: + """Merge one frame across all models using the configured ensemble function.""" + return self._ensemble_frame( + frame_data, + ensemble_function=self.ensemble_function, + ensemble_label_groups=self.settings["ensemble_label_groups"], + class_name_to_id=class_name_to_id, + ) + + def _can_align_by_token(self, results: List[Dict[str, Any]]) -> bool: + """Return whether all frames carry tokens and can be aligned by token.""" + return all("token" in frame for result in results for frame in result["data_list"]) + + def _get_frame_token(self, frame: Dict[str, Any]) -> str: + """Extract a frame token and raise if token-based alignment is impossible.""" + token = frame.get("token", None) + if token is None: + raise ValueError("Each frame must contain a token for ensemble alignment") + return token + + def _build_frame_lookup(self, data_list: List[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + """Build a token-to-frame lookup for one model result stream.""" + frame_lookup: Dict[str, Dict[str, Any]] = {} + for frame in data_list: + token = self._get_frame_token(frame) + if token in frame_lookup: + raise ValueError(f"Duplicate frame token found during ensemble alignment: {token}") + frame_lookup[token] = frame + return frame_lookup + + def _generate_empty_frame(self, reference_frame: Dict[str, Any]) -> Dict[str, Any]: + """Generate a metadata-preserving empty frame for a missing model prediction.""" + empty_frame = reference_frame.copy() + empty_frame["pred_instances_3d"] = [] + return empty_frame def _ensemble_frame( self, frame_results, ensemble_function, ensemble_label_groups, class_name_to_id diff --git a/tools/auto_labeling_3d/filter_objects/ensemble/nms_ensemble_model.py b/tools/auto_labeling_3d/filter_objects/ensemble/nms_ensemble_model.py index ac9496db5..814767d1b 100644 --- a/tools/auto_labeling_3d/filter_objects/ensemble/nms_ensemble_model.py +++ b/tools/auto_labeling_3d/filter_objects/ensemble/nms_ensemble_model.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import Any, Dict, List, Tuple, Type +import cv2 import numpy as np from mmengine.registry import TASK_UTILS @@ -84,6 +85,7 @@ class NMSEnsembleModel(BaseEnsembleModel): @property def model_instances_type(self) -> Type[NMSModelInstances]: + """Return the per-model container used by the NMS ensemble.""" return NMSModelInstances def ensemble_function( @@ -176,7 +178,8 @@ def _nms_indices(boxes: np.ndarray, scores: np.ndarray, iou_threshold: float) -> def _calculate_iou(box: np.ndarray, boxes: np.ndarray) -> np.ndarray: - """Calculate IoU between a single box and an array of boxes in BEV (Bird's Eye View). + """Calculate rotated IoU between a single box and an array of boxes in BEV. + TODO: GPU and/or C++ implementation to improve speed Args: box: Single bounding box [x, y, z, dx, dy, dz, yaw]. @@ -185,23 +188,39 @@ def _calculate_iou(box: np.ndarray, boxes: np.ndarray) -> np.ndarray: Returns: Array of IoU values. """ - x1, y1 = box[0], box[1] - dx1, dy1 = box[3], box[4] - - x2 = boxes[:, 0] - y2 = boxes[:, 1] - dx2 = boxes[:, 3] - dy2 = boxes[:, 4] - - # Calculate overlapping area in BEV - x_min = np.maximum(x1 - dx1 / 2, x2 - dx2 / 2) - y_min = np.maximum(y1 - dy1 / 2, y2 - dy2 / 2) - x_max = np.minimum(x1 + dx1 / 2, x2 + dx2 / 2) - y_max = np.minimum(y1 + dy1 / 2, y2 + dy2 / 2) - - intersection = np.maximum(0, x_max - x_min) * np.maximum(0, y_max - y_min) - area1 = dx1 * dy1 - area2 = dx2 * dy2 - union = area1 + area2 - intersection - - return intersection / union + if boxes.size == 0: + return np.array([]) + + reference_rect = _to_rotated_rect(box) + reference_area = float(box[3] * box[4]) + ious = [] + for candidate_box in boxes: + candidate_rect = _to_rotated_rect(candidate_box) + candidate_area = float(candidate_box[3] * candidate_box[4]) + intersection = _rotated_intersection_area(reference_rect, candidate_rect) + union = reference_area + candidate_area - intersection + ious.append(intersection / union if union > 0 else 0.0) + + return np.array(ious) + + +def _to_rotated_rect(box: np.ndarray) -> Tuple[Tuple[float, float], Tuple[float, float], float]: + """Convert a 3D box in `[x, y, z, dx, dy, dz, yaw]` format to an OpenCV BEV rectangle.""" + return ( + (float(box[0]), float(box[1])), + (float(box[3]), float(box[4])), + float(np.degrees(box[6])), + ) + + +def _rotated_intersection_area( + rect1: Tuple[Tuple[float, float], Tuple[float, float], float], + rect2: Tuple[Tuple[float, float], Tuple[float, float], float], +) -> float: + """Return the BEV intersection area between two OpenCV rotated rectangles.""" + _, intersection_points = cv2.rotatedRectangleIntersection(rect1, rect2) + if intersection_points is None: + return 0.0 + + hull = cv2.convexHull(intersection_points, returnPoints=True) + return float(abs(cv2.contourArea(hull))) diff --git a/tools/auto_labeling_3d/filter_objects/test/test_nms_ensemble_model.py b/tools/auto_labeling_3d/filter_objects/test/test_nms_ensemble_model.py index e89b814f8..0a0481baf 100644 --- a/tools/auto_labeling_3d/filter_objects/test/test_nms_ensemble_model.py +++ b/tools/auto_labeling_3d/filter_objects/test/test_nms_ensemble_model.py @@ -429,6 +429,64 @@ def test_ensemble_partial_empty_predictions(self): instances = result["data_list"][0]["pred_instances_3d"] self.assertGreater(len(instances), 0) + def test_ensemble_skips_missing_model_frame_by_token(self): + """ + Test case for token-based alignment when one model is missing a frame. + + This verifies that a missing frame from one model is treated as an empty + contribution for that token instead of shifting later frames forward. + """ + token_aligned_results = [ + { + "metainfo": {"classes": self.sample_classes}, + "data_list": [ + { + "token": "frame_0", + "pred_instances_3d": [ + {"bbox_label_3d": 0, "bbox_score_3d": 0.8, "bbox_3d": [1.0, 2.0, 0.0, 4.0, 2.0, 1.5, 0.1]} + ], + }, + { + "token": "frame_1", + "pred_instances_3d": [ + { + "bbox_label_3d": 1, + "bbox_score_3d": 0.7, + "bbox_3d": [10.0, 11.0, 0.0, 0.6, 0.6, 1.8, 0.0], + } + ], + }, + ], + }, + { + "metainfo": {"classes": self.sample_classes}, + "data_list": [ + { + "token": "frame_1", + "pred_instances_3d": [ + { + "bbox_label_3d": 2, + "bbox_score_3d": 0.6, + "bbox_3d": [20.0, 21.0, 0.0, 1.8, 0.8, 1.2, 0.2], + } + ], + } + ], + }, + ] + model = NMSEnsembleModel(ensemble_setting=self.ensemble_settings, logger=self.mock_logger) + result = model.ensemble(token_aligned_results) + + self.assertEqual(len(result["data_list"]), 2) + self.assertEqual(result["data_list"][0]["token"], "frame_0") + self.assertEqual(result["data_list"][1]["token"], "frame_1") + self.assertEqual(len(result["data_list"][0]["pred_instances_3d"]), 1) + self.assertEqual(result["data_list"][0]["pred_instances_3d"][0]["bbox_label_3d"], 0) + self.assertEqual( + {instance["bbox_label_3d"] for instance in result["data_list"][1]["pred_instances_3d"]}, + {1, 2}, + ) + def test_weight_mismatch_assertion(self): """ Test case for mismatched ensemble weights and model count. (2 models, 1 weight) @@ -556,6 +614,47 @@ def test_calculate_iou_partial_overlap(self): self.assertEqual(len(iou), 1) self.assertAlmostEqual(iou[0], expected_iou, places=6) + def test_calculate_iou_uses_yaw_for_rotated_boxes(self): + """ + Ensure `_calculate_iou` uses box yaw instead of axis-aligned overlap. + + The pair below is adapted from a real output example where the + axis-aligned IoU is below 0.3, but the rotated IoU is above 0.3. + """ + + def axis_aligned_iou(box1: np.ndarray, box2: np.ndarray) -> float: + x1, y1, dx1, dy1 = box1[0], box1[1], box1[3], box1[4] + x2, y2, dx2, dy2 = box2[0], box2[1], box2[3], box2[4] + x_min = max(x1 - dx1 / 2, x2 - dx2 / 2) + y_min = max(y1 - dy1 / 2, y2 - dy2 / 2) + x_max = min(x1 + dx1 / 2, x2 + dx2 / 2) + y_max = min(y1 + dy1 / 2, y2 + dy2 / 2) + intersection = max(0, x_max - x_min) * max(0, y_max - y_min) + area1 = dx1 * dy1 + area2 = dx2 * dy2 + union = area1 + area2 - intersection + return intersection / union if union > 0 else 0.0 + + box1 = np.array([0.0, 0.0, 0.0, 4.25, 1.7265625, 1.5, np.deg2rad(72.135684475929)]) + box2 = np.array( + [ + [ + -0.44700223403, + -1.72870231061, + 0.0, + 4.039058208465576, + 1.8613028526306152, + 1.5, + np.deg2rad(73.58854567046951), + ] + ] + ) + + rotated_iou = _calculate_iou(box1, box2) + self.assertEqual(len(rotated_iou), 1) + self.assertLess(axis_aligned_iou(box1, box2[0]), 0.3) + self.assertGreater(rotated_iou[0], 0.3) + def test_calculate_iou_multiple_boxes(self): """ Ensure `_calculate_iou` evaluates IoU against a batch of boxes. @@ -606,6 +705,29 @@ def test_calculate_iou_partial_overlap_diagonal_offset(self): self.assertIn(2, keep_indices) self.assertNotIn(0, keep_indices) + def test_nms_indices_suppresses_rotated_overlap(self): + """ + Ensure `_nms_indices` suppresses rotated overlaps that axis-aligned IoU + would miss. + """ + boxes = np.array( + [ + [0.0, 0.0, 0.0, 4.25, 1.7265625, 1.5, np.deg2rad(72.135684475929)], + [ + -0.44700223403, + -1.72870231061, + 0.0, + 4.039058208465576, + 1.8613028526306152, + 1.5, + np.deg2rad(73.58854567046951), + ], + ] + ) + scores = np.array([0.9, 0.8]) + keep_indices = _nms_indices(boxes, scores, iou_threshold=0.3) + self.assertEqual(keep_indices, [0]) + def test_nms_indices_no_overlap(self): """ Ensure `_nms_indices` keeps all boxes when none overlap. diff --git a/tools/auto_labeling_3d/utils/dataset/annotation_tool_dataset.py b/tools/auto_labeling_3d/utils/dataset/annotation_tool_dataset.py index de5eef1fb..9de0cf66a 100644 --- a/tools/auto_labeling_3d/utils/dataset/annotation_tool_dataset.py +++ b/tools/auto_labeling_3d/utils/dataset/annotation_tool_dataset.py @@ -199,12 +199,12 @@ def _create_annotations_from_info(info: AWML3DInfo, tool_id: str) -> list[dict]: file_id: str = f"{idx}.pcd" for box_global in boxes_in_frame_global: - unique_label_id: str = id_generator.assign_id(box_global.uuid, str(box_global.semantic_label)) + unique_label_id: str = id_generator.assign_id(box_global.uuid, str(box_global.semantic_label.name)) annotation_fields = DeepenAnnotationFields( dataset_id=tool_id, file_id=file_id, - label_category_id=str(box_global.semantic_label), + label_category_id=str(box_global.semantic_label.name), label_id=unique_label_id, label_type="3d_bbox", attributes={"pseudo-label": "auto-labeled"},