From 3bae850e4529b6b1d930b50c6586be4471288f53 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Sun, 15 Mar 2026 23:38:58 +0900 Subject: [PATCH 01/23] feat(autoware_ml): add t4segmetric Signed-off-by: Amadeusz Szymko --- .../evaluation/functional/__init__.py | 34 ++ .../evaluation/functional/t4_seg_eval.py | 393 ++++++++++++++++++ .../evaluation/metrics/__init__.py | 4 + .../evaluation/metrics/t4_seg_metric.py | 341 +++++++++++++++ .../frnet_1xb8_t4dataset-ot128-seg.py | 18 +- .../frnet_1xb8_t4dataset-qt128-seg.py | 18 +- .../configs/semseg-pt-v3m1-0-t4dataset.py | 2 + projects/PTv3/engines/hooks/evaluator.py | 171 +++++--- projects/PTv3/engines/test.py | 156 ++++--- 9 files changed, 1001 insertions(+), 136 deletions(-) create mode 100644 autoware_ml/segmentation3d/evaluation/functional/__init__.py create mode 100644 autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py create mode 100644 autoware_ml/segmentation3d/evaluation/metrics/__init__.py create mode 100644 autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py diff --git a/autoware_ml/segmentation3d/evaluation/functional/__init__.py b/autoware_ml/segmentation3d/evaluation/functional/__init__.py new file mode 100644 index 000000000..2d95736f4 --- /dev/null +++ b/autoware_ml/segmentation3d/evaluation/functional/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) TIER IV, Inc. All rights reserved. +from .t4_seg_eval import ( + SegEvalResult, + compute_bev_distance, + fast_hist, + figure_to_numpy, + get_acc, + get_acc_cls, + normalize_confusion_matrix, + per_class_f1, + per_class_iou, + per_class_precision, + per_class_recall, + plot_confusion_matrix, + range_label, + t4_seg_eval, +) + +__all__ = [ + "SegEvalResult", + "compute_bev_distance", + "fast_hist", + "figure_to_numpy", + "get_acc", + "get_acc_cls", + "normalize_confusion_matrix", + "per_class_f1", + "per_class_iou", + "per_class_precision", + "per_class_recall", + "plot_confusion_matrix", + "range_label", + "t4_seg_eval", +] diff --git a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py new file mode 100644 index 000000000..edd2ee395 --- /dev/null +++ b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py @@ -0,0 +1,393 @@ +# Copyright (c) TIER IV, Inc. All rights reserved. +"""Pure-Python helpers for 3D semantic segmentation evaluation. + +This module is shared by FRNet and PTv3 and does not depend on mmdet3d +runner internals. +""" + +from __future__ import annotations + +import io +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Sequence, Tuple, Union + +import numpy as np +from mmengine.logging import print_log +from terminaltables import AsciiTable + +_EPS = 1e-10 + + +def fast_hist(preds: np.ndarray, labels: np.ndarray, num_classes: int) -> np.ndarray: + """Confusion matrix for one sample (matches mmdet3d ``fast_hist``). + + ``hist[gt_class, pred_class]`` = number of points. + + Args: + preds: Predicted label array, shape ``(N,)``. + labels: Ground-truth label array, shape ``(N,)``. + num_classes: Number of classes. + + Returns: + ``np.ndarray`` of shape ``(num_classes, num_classes)``. + """ + k = (labels >= 0) & (labels < num_classes) + bin_count = np.bincount( + num_classes * labels[k].astype(int) + preds[k], + minlength=num_classes**2, + ) + return bin_count[: num_classes**2].reshape(num_classes, num_classes) + + +def per_class_iou(hist: np.ndarray) -> np.ndarray: + """Per-class IoU from cumulative confusion matrix.""" + return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist) + _EPS) + + +def get_acc(hist: np.ndarray) -> float: + """Overall point-level accuracy.""" + return float(np.diag(hist).sum() / (hist.sum() + _EPS)) + + +def get_acc_cls(hist: np.ndarray) -> float: + """Class-average accuracy (same as macro recall).""" + return float(np.nanmean(np.diag(hist) / (hist.sum(axis=1) + _EPS))) + + +def per_class_precision(hist: np.ndarray) -> np.ndarray: + """Per-class precision: TP / (TP + FP) = TP / predicted-as-class.""" + tp = np.diag(hist) + predicted = hist.sum(axis=0) # column sums + return np.where(predicted > _EPS, tp / (predicted + _EPS), np.nan) + + +def per_class_recall(hist: np.ndarray) -> np.ndarray: + """Per-class recall: TP / (TP + FN) = TP / actual-class-count.""" + tp = np.diag(hist) + actual = hist.sum(axis=1) # row sums + return np.where(actual > _EPS, tp / (actual + _EPS), np.nan) + + +def per_class_f1(hist: np.ndarray) -> np.ndarray: + """Per-class F1 score: 2 * precision * recall / (precision + recall).""" + prec = per_class_precision(hist) + rec = per_class_recall(hist) + denom = prec + rec + return np.where(denom > _EPS, 2.0 * prec * rec / (denom + _EPS), np.nan) + + +def normalize_confusion_matrix(cm: np.ndarray) -> np.ndarray: + """Row-normalise so each row sums to 1 (GT-class perspective). + + Rows without any GT sample are set to 0 rather than NaN so the result + can be safely passed to matplotlib's ``imshow``. + """ + row_sums = cm.sum(axis=1, keepdims=True) + safe = np.where(row_sums > 0, row_sums, 1.0) + return cm / safe + + +def plot_confusion_matrix( + cm: np.ndarray, + class_names: List[str], + normalize: bool = True, + label: str = "", +) -> "matplotlib.figure.Figure": # type: ignore[name-defined] + """Render a confusion matrix as a matplotlib Figure for TensorBoard. + + * Y-axis = "True label", X-axis = "Predicted label". + * Color scale fixed at ``[0, 1]`` (normalised fractions) so plots from + different epochs are directly comparable. + * Numeric value annotated in every cell. + + Args: + cm: ``(num_classes, num_classes)`` confusion matrix, ``cm[gt][pred]``. + class_names: Human-readable class names. + normalize: If ``True`` (default), row-normalise before plotting. + label: Optional range label appended to the figure title. + + Returns: + ``matplotlib.figure.Figure`` - caller is responsible for closing it. + """ + import matplotlib + + matplotlib.use("Agg") + import matplotlib.pyplot as plt + from matplotlib.colors import Normalize as MplNormalize + + nc = cm.shape[0] + cm_plot = normalize_confusion_matrix(cm) if normalize else cm.astype(float) + + fig, ax = plt.subplots(figsize=(max(10, nc * 0.6), max(8, nc * 0.55))) + im = ax.imshow( + cm_plot, + interpolation="nearest", + cmap="Blues", + norm=MplNormalize(vmin=0.0, vmax=1.0), + ) + fig.colorbar(im, ax=ax, shrink=0.8) + + font_size = max(4, 7 - nc // 10) + for i in range(nc): + for j in range(nc): + val = cm_plot[i, j] + color = "white" if val > 0.5 else "black" + ax.text(j, i, f"{val:.2f}", ha="center", va="center", fontsize=font_size, color=color) + + title = "Confusion Matrix" + if label: + title += f" [{label}]" + ax.set_title(title, fontsize=12) + ax.set_ylabel("True label", fontsize=11) + ax.set_xlabel("Predicted label", fontsize=11) + + tick_marks = np.arange(nc) + ax.set_xticks(tick_marks) + ax.set_yticks(tick_marks) + ax.set_xticklabels(class_names, rotation=45, ha="right", fontsize=7) + ax.set_yticklabels(class_names, fontsize=7) + fig.tight_layout() + return fig + + +def figure_to_numpy(fig) -> np.ndarray: + """Convert a matplotlib Figure to a uint8 HWC NumPy array (RGB). + + Uses the in-memory PNG path; does not require a display. + """ + buf = io.BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight") + buf.seek(0) + from PIL import Image # lazy import; PIL is a lightweight dep + + img = Image.open(buf).convert("RGB") + return np.array(img) + + +def compute_bev_distance(coords: np.ndarray) -> np.ndarray: + """BEV distance from ego: ``sqrt(x^2 + y^2)`` for each point. + + Args: + coords: ``(N, ≥2)`` array (first two columns are X, Y in metres). + + Returns: + ``(N,)`` array of distances in metres. + """ + return np.sqrt(coords[:, 0] ** 2 + coords[:, 1] ** 2) + + +def range_label(lo: float, hi: float) -> str: + """Human-readable range label, e.g. ``'0-20m'``.""" + return f"{lo:g}-{hi:g}m" + + +@dataclass +class SegEvalResult: + """Evaluation result with scalar metrics and raw confusion matrices. + + Attributes: + metrics: Flat dict of scalar metrics keyed in mmdetection3d style: + ``miou``, ``acc``, ``acc_cls``, per-class IoU by name, + ``mprecision``, ``mrecall``, ``mf1``, + ``precision/{class}``, ``recall/{class}``, ``f1/{class}``; and + for each range bucket, the same keys prefixed with + ``{range_label}/`` (e.g. ``0-20m/miou``). + cm: Total confusion matrix ``(num_classes, num_classes)``. + range_cms: Per-range confusion matrices keyed by range label. + """ + + metrics: Dict[str, float] = field(default_factory=dict) + cm: np.ndarray = field(default_factory=lambda: np.zeros((0, 0))) + range_cms: Dict[str, np.ndarray] = field(default_factory=dict) + + +def _compute_bucket_metrics( + hist: np.ndarray, + label2cat: Dict[int, str], + ignore_index: int, + prefix: str, +) -> Dict[str, float]: + """Derive all scalar metrics from a confusion histogram. + + Args: + hist: ``(num_classes, num_classes)`` cumulative confusion matrix. + label2cat: ``{index: class_name}`` mapping. + ignore_index: Class index to exclude from averages. + prefix: String prepended to every key (e.g. ``'0-20m/'``). + + Returns: + Flat dict of scalar metrics for this bucket. + """ + num_classes = hist.shape[0] + out: Dict[str, float] = {} + + # Per-class IoU - identical to mmdet3d seg_eval + iou = per_class_iou(hist) + if 0 <= ignore_index < num_classes: + iou[ignore_index] = np.nan + miou = float(np.nanmean(iou)) + + # Per-class precision / recall / F1 + prec = per_class_precision(hist) + rec = per_class_recall(hist) + f1 = per_class_f1(hist) + if 0 <= ignore_index < num_classes: + prec[ignore_index] = np.nan + rec[ignore_index] = np.nan + f1[ignore_index] = np.nan + + out[f"{prefix}miou"] = miou + out[f"{prefix}acc"] = get_acc(hist) + out[f"{prefix}acc_cls"] = get_acc_cls(hist) + out[f"{prefix}mprecision"] = float(np.nanmean(prec)) + out[f"{prefix}mrecall"] = float(np.nanmean(rec)) + out[f"{prefix}mf1"] = float(np.nanmean(f1)) + + for idx in range(num_classes): + if idx == ignore_index: + continue + name = label2cat.get(idx, str(idx)) + out[f"{prefix}{name}"] = float(iou[idx]) if not np.isnan(iou[idx]) else 0.0 + out[f"{prefix}precision/{name}"] = float(prec[idx]) if not np.isnan(prec[idx]) else 0.0 + out[f"{prefix}recall/{name}"] = float(rec[idx]) if not np.isnan(rec[idx]) else 0.0 + out[f"{prefix}f1/{name}"] = float(f1[idx]) if not np.isnan(f1[idx]) else 0.0 + + return out + + +def _print_bucket_table( + hist: np.ndarray, + label2cat: Dict[int, str], + ignore_index: int, + title: str, + logger=None, +) -> None: + """Print an AsciiTable for one evaluation bucket.""" + num_classes = hist.shape[0] + iou = per_class_iou(hist) + if 0 <= ignore_index < num_classes: + iou[ignore_index] = np.nan + prec = per_class_precision(hist) + rec = per_class_recall(hist) + f1 = per_class_f1(hist) + + header = ["class", "IoU", "Prec", "Rec", "F1"] + rows = [header] + for idx in range(num_classes): + if idx == ignore_index: + continue + name = label2cat.get(idx, str(idx)) + rows.append( + [ + name, + f"{iou[idx]:.4f}" if not np.isnan(iou[idx]) else "N/A", + f"{prec[idx]:.4f}" if not np.isnan(prec[idx]) else "N/A", + f"{rec[idx]:.4f}" if not np.isnan(rec[idx]) else "N/A", + f"{f1[idx]:.4f}" if not np.isnan(f1[idx]) else "N/A", + ] + ) + miou = float(np.nanmean(iou)) + mprec = float(np.nanmean(prec)) + mrec = float(np.nanmean(rec)) + mf1 = float(np.nanmean(f1)) + rows.append(["mean", f"{miou:.4f}", f"{mprec:.4f}", f"{mrec:.4f}", f"{mf1:.4f}"]) + rows.append(["acc", f"{get_acc(hist):.4f}", "-", "-", "-"]) + rows.append(["acc_cls", f"{get_acc_cls(hist):.4f}", "-", "-", "-"]) + + table = AsciiTable(rows, title=title) + table.inner_footing_row_border = True + print_log("\n" + table.table, logger=logger) + + +def t4_seg_eval( + gt_labels: List[np.ndarray], + seg_preds: List[np.ndarray], + label2cat: Dict[int, str], + ignore_index: int, + coords_list: Optional[List[Optional[np.ndarray]]] = None, + distance_ranges: Optional[List[Tuple[float, float]]] = None, + logger=None, +) -> SegEvalResult: + """Semantic segmentation evaluation with optional range-based breakdown. + + Produces the same top-level keys as ``mmdet3d.evaluation.seg_eval`` + (``miou``, ``acc``, ``acc_cls``, per-class IoU by name) and additionally + adds precision / recall / F1 metrics and optional per-range variants. + + Args: + gt_labels: Ground-truth label arrays, one per sample. + seg_preds: Predicted label arrays, one per sample. + label2cat: ``{output_index: class_name}`` mapping. + ignore_index: Label to exclude from metric computation. + coords_list: Optional per-sample XYZ coordinate arrays ``(N, ≥2)``. + When provided together with ``distance_ranges``, range-based + metrics are computed; otherwise only total metrics are returned. + distance_ranges: List of ``(lo, hi)`` metre pairs, e.g. + ``[(0, 20), (20, 40), ..., (100, 120)]``. + logger: Optional logger for tabular output. + + Returns: + :class:`SegEvalResult` with scalar metrics dict, total CM, and + per-range CMs. + """ + assert len(gt_labels) == len(seg_preds), ( + f"gt and pred lists must have the same length " f"({len(gt_labels)} vs {len(seg_preds)})" + ) + + num_classes = len(label2cat) + use_ranges = bool(distance_ranges and coords_list is not None) + + total_hist = np.zeros((num_classes, num_classes), dtype=np.float64) + total_cm = np.zeros((num_classes, num_classes), dtype=np.float64) # same thing + + if use_ranges: + range_hists: Dict[str, np.ndarray] = { + range_label(lo, hi): np.zeros((num_classes, num_classes), dtype=np.float64) + for lo, hi in distance_ranges # type: ignore[union-attr] + } + range_cms: Dict[str, np.ndarray] = { + lbl: np.zeros((num_classes, num_classes), dtype=np.float64) for lbl in range_hists + } + else: + range_hists = {} + range_cms = {} + + for i in range(len(gt_labels)): + gt = gt_labels[i].astype(np.int64) + pred = seg_preds[i].astype(np.int64) + + pred[gt == ignore_index] = ignore_index + gt[gt == ignore_index] = ignore_index + + h = fast_hist(pred, gt, num_classes) + total_hist += h + total_cm += h + + if use_ranges: + assert coords_list is not None + coord = coords_list[i] + if coord is None: + continue + coord = np.asarray(coord) + if coord.ndim != 2 or coord.shape[1] < 2 or coord.shape[0] != gt.size: + continue + dist = compute_bev_distance(coord) + for lo, hi in distance_ranges: # type: ignore[union-attr] + lbl = range_label(lo, hi) + mask = (dist >= lo) & (dist < hi) + if not np.any(mask): + continue + h_r = fast_hist(pred[mask], gt[mask], num_classes) + range_hists[lbl] += h_r + range_cms[lbl] += h_r + + _print_bucket_table(total_hist, label2cat, ignore_index, title="Total", logger=logger) + metrics = _compute_bucket_metrics(total_hist, label2cat, ignore_index, prefix="") + + for lbl, hist_r in range_hists.items(): + if hist_r.sum() == 0: + continue + _print_bucket_table(hist_r, label2cat, ignore_index, title=lbl, logger=logger) + metrics.update(_compute_bucket_metrics(hist_r, label2cat, ignore_index, prefix=f"{lbl}/")) + + return SegEvalResult(metrics=metrics, cm=total_cm, range_cms=range_cms) diff --git a/autoware_ml/segmentation3d/evaluation/metrics/__init__.py b/autoware_ml/segmentation3d/evaluation/metrics/__init__.py new file mode 100644 index 000000000..0c56a0fda --- /dev/null +++ b/autoware_ml/segmentation3d/evaluation/metrics/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) TIER IV, Inc. All rights reserved. +from .t4_seg_metric import T4SegMetric + +__all__ = ["T4SegMetric"] diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py new file mode 100644 index 000000000..8db6fc4df --- /dev/null +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -0,0 +1,341 @@ +# Copyright (c) TIER IV, Inc. All rights reserved. +"""MMEngine metric adapter for shared T4 segmentation evaluation.""" + +import os.path as osp +import tempfile +from typing import Dict, List, Optional, Sequence, Tuple + +import mmcv +import numpy as np +from mmdet3d.registry import METRICS +from mmengine.evaluator import BaseMetric +from mmengine.logging import MMLogger + +from autoware_ml.segmentation3d.evaluation.functional.t4_seg_eval import ( + figure_to_numpy, + plot_confusion_matrix, + range_label, + t4_seg_eval, +) + + +@METRICS.register_module() +class T4SegMetric(BaseMetric): + """3D semantic segmentation evaluation metric for T4 datasets. + + Parameters + ---------- + num_classes: + Number of output classes (excluding the ignore class). + ignore_index: + Label value to skip during evaluation. Defaults to the value set in + ``dataset_meta``; the explicit argument takes priority. + distance_ranges: + Optional list of ``(lo, hi)`` metre pairs for range-based breakdown, + e.g. ``[(0, 20), (20, 40), (40, 60), (60, 80), (80, 100), (100, 120)]``. + collect_device: + Device used for collecting results across ranks. ``'cpu'`` or ``'gpu'``. + prefix: + Optional metric-name prefix. + pklfile_prefix: + If set, raw predictions are written to ``{pklfile_prefix}.pkl``. + submission_prefix: + If set, predictions are exported in ScanNet TXT format to this path + instead of computing metrics. + """ + + default_prefix: Optional[str] = None + + def __init__( + self, + num_classes: Optional[int] = None, + ignore_index: Optional[int] = None, + distance_ranges: Optional[List[Tuple[float, float]]] = None, + collect_device: str = "cpu", + prefix: Optional[str] = None, + pklfile_prefix: Optional[str] = None, + submission_prefix: Optional[str] = None, + **kwargs, + ): + super().__init__(prefix=prefix, collect_device=collect_device) + self._num_classes = num_classes + self._ignore_index = ignore_index + self.distance_ranges = distance_ranges or [] + self.pklfile_prefix = pklfile_prefix + self.submission_prefix = submission_prefix + # Counter used as the TensorBoard global-step for CM images. + self._eval_step: int = 0 + + def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: + """Collect one batch of model outputs for later aggregation.""" + batch_coords = self._extract_batch_coords(data_batch, data_samples) + + for i, data_sample in enumerate(data_samples): + pred_field = data_sample.get("pred_pts_seg", {}) + ann_field = data_sample.get("eval_ann_info", {}) + + pred = self._to_numpy(pred_field.get("pts_semantic_mask")) + gt = self._to_numpy(ann_field.get("pts_semantic_mask")) + + if pred is None or gt is None or pred.size != gt.size: + continue + + coord_i = batch_coords[i] if batch_coords else None + if coord_i is not None: + if coord_i.shape[0] > gt.size: + coord_i = coord_i[: gt.size] + elif coord_i.shape[0] < gt.size: + coord_i = None + + self.results.append( + dict( + pred=pred, + gt=gt, + coord=coord_i, + # Keep original annotation info for submission export. + eval_ann_info=ann_field, + ) + ) + + def compute_metrics(self, results: list) -> Dict[str, float]: + """Aggregate per-batch results and return the full metrics dict.""" + logger: MMLogger = MMLogger.get_current_instance() + + if self.submission_prefix: + self.format_results(results) + return {} + + if not results: + logger.warning("T4SegMetric: no results to evaluate.") + return {} + + ignore_index = self._get_ignore_index() + label2cat = self._get_label2cat() + + target_num_classes = self._num_classes or len(label2cat) + target_num_classes = max(target_num_classes, ignore_index + 1) + for idx in range(target_num_classes): + if idx not in label2cat: + label2cat[idx] = "ignore" if idx == ignore_index else str(idx) + + gt_labels = [r["gt"] for r in results] + seg_preds = [r["pred"] for r in results] + coords_list = [r.get("coord") for r in results] if self.distance_ranges else None + if self.distance_ranges and (not coords_list or all(c is None for c in coords_list)): + logger.warning( + "T4SegMetric: distance_ranges is configured but no coordinates " + "were extracted from data_batch. Range-based confusion matrices " + "will be empty." + ) + + eval_result = t4_seg_eval( + gt_labels, + seg_preds, + label2cat, + ignore_index, + coords_list=coords_list, + distance_ranges=self.distance_ranges if self.distance_ranges else None, + logger=logger, + ) + + if self.distance_ranges and eval_result.cm.sum() > 0: + covered = sum(cm.sum() for cm in eval_result.range_cms.values()) + if covered == 0: + logger.warning( + "T4SegMetric: total confusion matrix is non-empty but all " + "range-based confusion matrices are empty. This usually " + "means distance_ranges do not cover observed distances or " + "coordinate extraction is still misaligned." + ) + + self._log_confusion_matrix_images(eval_result, label2cat) + self._eval_step += 1 + + return eval_result.metrics + + def format_results(self, results: list) -> None: + """Export predictions to TXT files for submission (ScanNet format).""" + submission_prefix = self.submission_prefix + if submission_prefix is None: + tmp_dir = tempfile.TemporaryDirectory() + submission_prefix = osp.join(tmp_dir.name, "results") + mmcv.mkdir_or_exist(submission_prefix) + + ignore_index_val = self._get_ignore_index() + label2cat_map = self._get_label2cat() + num_labels = len(label2cat_map) + + cat2label = np.zeros(num_labels, dtype=np.int64) + for out_idx, _ in label2cat_map.items(): + if out_idx != ignore_index_val: + cat2label[out_idx] = out_idx + + meta = getattr(self, "dataset_meta", {}) or {} + if "label2cat" in meta: + for original_label, output_idx in meta["label2cat"].items(): + if isinstance(output_idx, int) and output_idx != ignore_index_val: + cat2label[output_idx] = original_label + + for r in results: + ann = r.get("eval_ann_info", {}) + sample_idx = (ann.get("point_cloud") or {}).get("lidar_idx", "unknown") + pred_sem = r["pred"].astype(np.int64) + pred_label = cat2label[pred_sem] + curr_file = f"{submission_prefix}/{sample_idx}.txt" + np.savetxt(curr_file, pred_label, fmt="%d") + + @staticmethod + def _to_numpy(v) -> Optional[np.ndarray]: + """Convert tensor / array-like to a flat int64 numpy array.""" + if v is None: + return None + if hasattr(v, "cpu"): + v = v.cpu().numpy() + arr = np.asarray(v, dtype=np.int64) + return arr.ravel() + + @staticmethod + def _extract_batch_coords(data_batch: dict, data_samples: Sequence[dict]) -> Optional[List]: + """Try to extract XY coordinates from packed input points. + + Returns a list of length ``len(data_samples)`` where each entry is either a + ``(N, 2)`` float32 array or ``None``. + """ + try: + n_samples = len(data_samples) + + def _unwrap_points_tensor(obj): + """Best-effort unwrapping for collate/data wrappers.""" + cur = obj + for _ in range(8): + if cur is None: + return None + if hasattr(cur, "tensor"): + cur = cur.tensor + continue + if hasattr(cur, "data") and not isinstance(cur, np.ndarray): + nxt = getattr(cur, "data") + if nxt is cur: + break + cur = nxt + continue + if isinstance(cur, (list, tuple)) and len(cur) == 1: + cur = cur[0] + continue + break + return cur + + inputs = data_batch.get("inputs") or {} + if not isinstance(inputs, dict): + inputs = {} + points_data = inputs.get("points") + if points_data is None: + return None + + num_points_list = [] + for ds in data_samples: + meta = getattr(ds, "metainfo", {}) or {} + n = meta.get("num_points", None) + num_points_list.append(int(n) if isinstance(n, (int, np.integer)) else None) + + if not isinstance(points_data, (list, tuple)): + raw = _unwrap_points_tensor(points_data) + if raw is not None and hasattr(raw, "cpu"): + raw = raw.cpu().numpy() + raw_arr = np.asarray(raw) if raw is not None else None + if ( + raw_arr is not None + and raw_arr.ndim >= 2 + and all(v is not None for v in num_points_list) + and sum(num_points_list) <= raw_arr.shape[0] + ): + split = [] + st = 0 + for n in num_points_list: + ed = st + int(n) + split.append(raw_arr[st:ed]) + st = ed + points_data = split + else: + points_data = [points_data] * n_samples + + coords = [] + for pts in points_data[:n_samples]: + if pts is None: + coords.append(None) + continue + tens = _unwrap_points_tensor(pts) + if tens is None: + coords.append(None) + continue + if hasattr(tens, "cpu"): + tens = tens.cpu().numpy() + arr = np.asarray(tens, dtype=np.float32) + if arr.ndim >= 2 and arr.shape[1] >= 2: + coords.append(arr[:, :2]) + else: + coords.append(None) + return coords + except Exception: + return None + + def _get_label2cat(self) -> Dict[int, str]: + """Resolve {output_index: class_name} from constructor args or meta.""" + meta = getattr(self, "dataset_meta", {}) or {} + label2cat = meta.get("label2cat") + if isinstance(label2cat, dict): + return {int(k): str(v) for k, v in label2cat.items()} + # Fallback: use class_names list if available + class_names = meta.get("classes") or meta.get("class_names") + if isinstance(class_names, (list, tuple)): + return {i: str(name) for i, name in enumerate(class_names)} + # Last resort: numeric class names + nc = self._num_classes or 1 + return {i: str(i) for i in range(nc)} + + def _get_ignore_index(self) -> int: + if self._ignore_index is not None: + return self._ignore_index + meta = getattr(self, "dataset_meta", {}) or {} + return int(meta.get("ignore_index", -1)) + + def _log_confusion_matrix_images(self, eval_result, label2cat: Dict[int, str]) -> None: + """Log normalised confusion-matrix images to TensorBoard (rank-0 only).""" + try: + from mmengine.visualization import Visualizer + + vis = Visualizer.get_current_instance() + except Exception: + return + + num_classes = int(eval_result.cm.shape[0]) if eval_result.cm is not None else len(label2cat) + class_names = [label2cat.get(i, str(i)) for i in range(num_classes)] + step = self._eval_step + tag_prefix = f"{self.prefix}/" if self.prefix else "" + + if eval_result.cm is not None: + cm_label = "" if eval_result.cm.sum() > 0 else "empty" + fig = plot_confusion_matrix(eval_result.cm, class_names, label=cm_label) + img = figure_to_numpy(fig) + try: + vis.add_image(f"{tag_prefix}confusion_matrix", img, step=step) + except Exception: + pass + import matplotlib.pyplot as plt + + plt.close(fig) + + for lbl, rcm in eval_result.range_cms.items(): + if rcm is None: + continue + cm_label = lbl if rcm.sum() > 0 else f"{lbl} (empty)" + fig = plot_confusion_matrix(rcm, class_names, label=cm_label) + img = figure_to_numpy(fig) + tag = f"confusion_matrix_{lbl.replace('-', '_').replace(' ', '_')}" + try: + vis.add_image(f"{tag_prefix}{tag}", img, step=step) + except Exception: + pass + import matplotlib.pyplot as plt + + plt.close(fig) diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py index e14c903b8..794f30363 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py @@ -9,6 +9,7 @@ "projects.FRNet.frnet.datasets", "projects.FRNet.frnet.datasets.transforms", "projects.FRNet.frnet.models", + "autoware_ml.segmentation3d.evaluation.metrics", ], allow_failed_imports=False, ) @@ -290,8 +291,21 @@ ) test_dataloader = val_dataloader -val_evaluator = dict(type="SegMetric") -test_evaluator = val_evaluator +distance_ranges = [(0, 20), (20, 40), (40, 60), (60, 80), (80, 100.0), (100.0, 120.0)] +val_evaluator = dict( + type="T4SegMetric", + num_classes=num_classes, + ignore_index=ignore_index, + distance_ranges=distance_ranges, + prefix="val", +) +test_evaluator = dict( + type="T4SegMetric", + num_classes=num_classes, + ignore_index=ignore_index, + distance_ranges=distance_ranges, + prefix="test", +) vis_backends = [dict(type="LocalVisBackend"), dict(type="TensorboardVisBackend")] diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py index 4bed85c4b..1d7d3616d 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py @@ -9,6 +9,7 @@ "projects.FRNet.frnet.datasets", "projects.FRNet.frnet.datasets.transforms", "projects.FRNet.frnet.models", + "autoware_ml.segmentation3d.evaluation.metrics", ], allow_failed_imports=False, ) @@ -290,8 +291,21 @@ ) test_dataloader = val_dataloader -val_evaluator = dict(type="SegMetric") -test_evaluator = val_evaluator +distance_ranges = [(0, 20), (20, 40), (40, 60), (60, 80), (80, 100.0), (100.0, 120.0)] +val_evaluator = dict( + type="T4SegMetric", + num_classes=num_classes, + ignore_index=ignore_index, + distance_ranges=distance_ranges, + prefix="val", +) +test_evaluator = dict( + type="T4SegMetric", + num_classes=num_classes, + ignore_index=ignore_index, + distance_ranges=distance_ranges, + prefix="test", +) vis_backends = [dict(type="LocalVisBackend"), dict(type="TensorboardVisBackend")] diff --git a/projects/PTv3/configs/semseg-pt-v3m1-0-t4dataset.py b/projects/PTv3/configs/semseg-pt-v3m1-0-t4dataset.py index 756fd4c32..e000357c3 100644 --- a/projects/PTv3/configs/semseg-pt-v3m1-0-t4dataset.py +++ b/projects/PTv3/configs/semseg-pt-v3m1-0-t4dataset.py @@ -54,6 +54,8 @@ "unpainted": ignore_index, } num_classes = 26 +distance_ranges = [(0, 20), (20, 40), (40, 60), (60, 80), (80, 100.0), (100.0, 120.0)] +metric_options = dict(distance_ranges=distance_ranges) # model settings model = dict( diff --git a/projects/PTv3/engines/hooks/evaluator.py b/projects/PTv3/engines/hooks/evaluator.py index 8732447bd..c23fd65c6 100644 --- a/projects/PTv3/engines/hooks/evaluator.py +++ b/projects/PTv3/engines/hooks/evaluator.py @@ -7,11 +7,14 @@ import numpy as np import torch -import torch.distributed as dist import utils.comm as comm -from utils.misc import intersection_and_union_gpu from autoware_ml.segmentation3d.datasets.utils import class_mapping_to_names +from autoware_ml.segmentation3d.evaluation import ( + SegEvalResult, + plot_confusion_matrix, + t4_seg_eval, +) from .builder import HOOKS from .default import HookBase @@ -26,6 +29,16 @@ def after_epoch(self): def eval(self): self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") self.trainer.model.eval() + cfg = self.trainer.cfg + num_classes = cfg.data.num_classes + ignore_index = cfg.data.ignore_index + metric_options = getattr(cfg, "metric_options", None) or {} + distance_ranges = metric_options.get("distance_ranges") or [] + + local_results = [] + loss_sum = 0.0 + loss_count = 0 + for i, input_dict in enumerate(self.trainer.val_loader): for key in input_dict.keys(): if isinstance(input_dict[key], torch.Tensor): @@ -34,73 +47,99 @@ def eval(self): output_dict = self.trainer.model(input_dict) output = output_dict["seg_logits"] loss = output_dict["loss"] - pred = output.max(1)[1] - segment = input_dict["segment"] - intersection, union, target = intersection_and_union_gpu( - pred, - segment, - self.trainer.cfg.data.num_classes, - self.trainer.cfg.data.ignore_index, - ) - if comm.get_world_size() > 1: - dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce(target) - intersection, union, target = ( - intersection.cpu().numpy(), - union.cpu().numpy(), - target.cpu().numpy(), - ) - # Here there is no need to sync since sync happened in dist.all_reduce - self.trainer.storage.put_scalar("val_intersection", intersection) - self.trainer.storage.put_scalar("val_union", union) - self.trainer.storage.put_scalar("val_target", target) - self.trainer.storage.put_scalar("val_loss", loss.item()) - info = "Test: [{iter}/{max_iter}] ".format(iter=i + 1, max_iter=len(self.trainer.val_loader)) - if "origin_coord" in input_dict.keys(): + pred = output.max(1)[1].detach().cpu().numpy() + segment = input_dict["segment"].detach().cpu().numpy() + + # Extract BEV coordinate for range-based metrics. + coord_np = None + if "coord" in input_dict: + coord = input_dict["coord"] + if isinstance(coord, torch.Tensor): + coord_np = coord.detach().cpu().numpy() + if coord_np.ndim != 2 or coord_np.shape[1] < 2: + coord_np = None + + local_results.append(dict(pred=pred, gt=segment, coord=coord_np)) + loss_sum += float(loss.item()) + loss_count += 1 + + info = f"Test: [{i + 1}/{len(self.trainer.val_loader)}] " + if "origin_coord" in input_dict: info = "Interp. " + info - self.trainer.logger.info( - info + "Loss {loss:.4f} ".format(iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item()) - ) - loss_avg = self.trainer.storage.history("val_loss").avg - intersection = self.trainer.storage.history("val_intersection").total - union = self.trainer.storage.history("val_union").total - target = self.trainer.storage.history("val_target").total - iou_class = intersection / (union + 1e-10) - acc_class = intersection / (target + 1e-10) - m_iou = np.mean(iou_class) - m_acc = np.mean(acc_class) - all_acc = sum(intersection) / (sum(target) + 1e-10) - self.trainer.logger.info("Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format(m_iou, m_acc, all_acc)) - - mapped_class_names = class_mapping_to_names( - self.trainer.cfg.class_mapping, - self.trainer.cfg.data.ignore_index, + self.trainer.logger.info(info + f"Loss {loss.item():.4f}") + + comm.synchronize() + gathered = comm.gather( + dict(results=local_results, loss_sum=loss_sum, loss_count=loss_count), + dst=0, ) - assert len(mapped_class_names) == self.trainer.cfg.data.num_classes, ( - "class_mapping_to_names length must match num_classes: " - f"{len(mapped_class_names)} vs {self.trainer.cfg.data.num_classes}" + if not comm.is_main_process(): + return + + merged_results = [] + total_loss_sum = 0.0 + total_loss_count = 0 + for item in gathered: + merged_results.extend(item["results"]) + total_loss_sum += float(item["loss_sum"]) + total_loss_count += int(item["loss_count"]) + loss_avg = total_loss_sum / max(total_loss_count, 1) + + mapped_class_names = class_mapping_to_names(cfg.class_mapping, ignore_index) + assert len(mapped_class_names) == num_classes, ( + "class_mapping_to_names length must match num_classes: " f"{len(mapped_class_names)} vs {num_classes}" + ) + label2cat = {i: mapped_class_names[i] for i in range(num_classes)} + + eval_result: SegEvalResult = t4_seg_eval( + gt_labels=[r["gt"] for r in merged_results], + seg_preds=[r["pred"] for r in merged_results], + label2cat=label2cat, + ignore_index=ignore_index, + coords_list=[r.get("coord") for r in merged_results] if distance_ranges else None, + distance_ranges=distance_ranges if distance_ranges else None, + logger=self.trainer.logger, ) - for i in range(self.trainer.cfg.data.num_classes): - self.trainer.logger.info( - "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( - idx=i, - name=mapped_class_names[i], - iou=iou_class[i], - accuracy=acc_class[i], - ) - ) - current_epoch = self.trainer.epoch + 1 - if self.trainer.writer is not None: - self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) - self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) - self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) - self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) - for i in range(self.trainer.cfg.data.num_classes): - name = mapped_class_names[i] - self.trainer.writer.add_scalar(f"val_class_iou/{name}", iou_class[i], current_epoch) - self.trainer.writer.add_scalar(f"val_class_acc/{name}", acc_class[i], current_epoch) + + epoch = self.trainer.epoch + 1 + writer = self.trainer.writer + if writer is not None: + writer.add_scalar("val/loss", loss_avg, epoch) + m = eval_result.metrics + writer.add_scalar("val/miou", m.get("miou", 0.0), epoch) + writer.add_scalar("val/acc", m.get("acc", 0.0), epoch) + writer.add_scalar("val/acc_cls", m.get("acc_cls", 0.0), epoch) + writer.add_scalar("val/mprecision", m.get("mprecision", 0.0), epoch) + writer.add_scalar("val/mrecall", m.get("mrecall", 0.0), epoch) + writer.add_scalar("val/mf1", m.get("mf1", 0.0), epoch) + for name in mapped_class_names: + writer.add_scalar(f"val/class_iou/{name}", m.get(name, 0.0), epoch) + writer.add_scalar(f"val/class_precision/{name}", m.get(f"precision/{name}", 0.0), epoch) + writer.add_scalar(f"val/class_recall/{name}", m.get(f"recall/{name}", 0.0), epoch) + writer.add_scalar(f"val/class_f1/{name}", m.get(f"f1/{name}", 0.0), epoch) + for lo, hi in distance_ranges: + lbl = f"{lo:g}-{hi:g}m" + writer.add_scalar(f"val/range/{lbl}/miou", m.get(f"{lbl}/miou", 0.0), epoch) + writer.add_scalar(f"val/range/{lbl}/acc", m.get(f"{lbl}/acc", 0.0), epoch) + writer.add_scalar(f"val/range/{lbl}/mprecision", m.get(f"{lbl}/mprecision", 0.0), epoch) + writer.add_scalar(f"val/range/{lbl}/mrecall", m.get(f"{lbl}/mrecall", 0.0), epoch) + writer.add_scalar(f"val/range/{lbl}/mf1", m.get(f"{lbl}/mf1", 0.0), epoch) + import matplotlib.pyplot as plt + + if eval_result.cm is not None and eval_result.cm.sum() > 0: + fig = plot_confusion_matrix(eval_result.cm, mapped_class_names) + writer.add_figure("val/confusion_matrix", fig, epoch) + plt.close(fig) + for lbl, rcm in eval_result.range_cms.items(): + if rcm is not None and rcm.sum() > 0: + fig = plot_confusion_matrix(rcm, mapped_class_names, label=lbl) + tag = f"val/confusion_matrix_{lbl.replace('-', '_').replace(' ', '_')}" + writer.add_figure(tag, fig, epoch) + plt.close(fig) + self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") - self.trainer.comm_info["current_metric_value"] = m_iou # save for saver - self.trainer.comm_info["current_metric_name"] = "mIoU" # save for saver + self.trainer.comm_info["current_metric_value"] = eval_result.metrics.get("miou", 0.0) + self.trainer.comm_info["current_metric_name"] = "miou" def after_train(self): - self.trainer.logger.info("Best {}: {:.4f}".format("mIoU", self.trainer.best_metric_value)) + self.trainer.logger.info("Best {}: {:.4f}".format("miou", self.trainer.best_metric_value)) diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index f06e989bc..05f7ca8a2 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -16,16 +16,21 @@ import utils.comm as comm from datasets import build_dataset, collate_fn from models import build_model +from tensorboardX import SummaryWriter from utils.logger import get_root_logger from utils.misc import ( AverageMeter, - intersection_and_union, make_dirs, ) from utils.registry import Registry from utils.visualization import get_segmentation_colors, visualize_point_cloud from autoware_ml.segmentation3d.datasets.utils import class_mapping_to_names +from autoware_ml.segmentation3d.evaluation import ( + SegEvalResult, + plot_confusion_matrix, + t4_seg_eval, +) from .defaults import create_ddp_model @@ -42,6 +47,7 @@ def __init__(self, cfg, model=None, test_loader=None, verbose=False) -> None: self.logger.info("=> Loading config ...") self.cfg = cfg self.verbose = verbose + self.writer = self.build_writer() if self.verbose: self.logger.info(f"Save path: {cfg.save_path}") self.logger.info(f"Config:\n{cfg.pretty_text}") @@ -100,6 +106,12 @@ def build_test_loader(self): ) return test_loader + def build_writer(self): + if not comm.is_main_process(): + return None + self.logger.info(f"Tensorboard writer logging dir: {self.cfg.save_path}") + return SummaryWriter(self.cfg.save_path) + def test(self): raise NotImplementedError @@ -128,14 +140,15 @@ def test(self): logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") batch_time = AverageMeter() - intersection_meter = AverageMeter() - union_meter = AverageMeter() - target_meter = AverageMeter() + num_classes = self.cfg.data.num_classes + ignore_index = self.cfg.data.ignore_index + metric_options = getattr(self.cfg, "metric_options", None) or {} + distance_ranges = metric_options.get("distance_ranges") or [] + local_results = [] self.model.eval() save_path = os.path.join(self.cfg.save_path, "result") make_dirs(save_path) - # create submit folder only on main process if self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): import json @@ -153,8 +166,6 @@ def test(self): with open(os.path.join(save_path, "submit", "test", "submission.json"), "w") as f: json.dump(submission, f, indent=4) comm.synchronize() - record = {} - # fragment inference for idx, data_dict in enumerate(self.test_loader): end = time.time() data_dict = data_dict[0] # current assume batch size is 1 @@ -162,15 +173,15 @@ def test(self): segment = data_dict.pop("segment") data_name = data_dict.pop("name") pred_save_path = os.path.join(save_path, "{}_pred.npy".format(data_name)) - feat_save_path = os.path.join(save_path, "{}_feat.npy".format(data_name)) result_save_path = os.path.join(save_path, "{}_{}_pred.npz".format(idx, data_name)) if os.path.isfile(pred_save_path): logger.info("{}/{}: {}, loaded pred and label.".format(idx + 1, len(self.test_loader), data_name)) pred = np.load(pred_save_path) + feat_np = None if "origin_segment" in data_dict.keys(): segment = data_dict["origin_segment"] else: - pred = torch.zeros((segment.size, self.cfg.data.num_classes)).cuda() + pred = torch.zeros((segment.size, num_classes)).cuda() feat = torch.zeros((segment.size, 4)).cuda() for i in range(len(fragment_list)): fragment_batch_size = 1 @@ -201,17 +212,15 @@ def test(self): ) ) pred = pred.max(1)[1].data.cpu().numpy() + feat_np = feat.cpu().numpy() if "origin_segment" in data_dict.keys(): assert "inverse" in data_dict.keys() pred = pred[data_dict["inverse"]] - feat = feat[data_dict["inverse"]] + feat_np = feat_np[data_dict["inverse"]] segment = data_dict["origin_segment"] - # np.save(pred_save_path, pred) - # np.save(feat_save_path, feat.cpu().numpy()) - np.savez_compressed(result_save_path, pred=pred, feat=feat.cpu().numpy()) + np.savez_compressed(result_save_path, pred=pred, feat=feat_np) - # Call visualization if self.cfg.show: outputs = {"pred": pred, "segment": segment, "result_path": result_save_path} self.visualize_results(outputs, result_save_path) @@ -227,85 +236,100 @@ def test(self): ) ) - intersection, union, target = intersection_and_union( - pred, segment, self.cfg.data.num_classes, self.cfg.data.ignore_index - ) - intersection_meter.update(intersection) - union_meter.update(union) - target_meter.update(target) - record[data_name] = dict(intersection=intersection, union=union, target=target) - - mask = union != 0 - iou_class = intersection / (union + 1e-10) - iou = np.mean(iou_class[mask]) - acc = sum(intersection) / (sum(target) + 1e-10) - - m_iou = np.mean(intersection_meter.sum / (union_meter.sum + 1e-10)) - m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + coord_np = feat_np[:, :3] if feat_np is not None and feat_np.ndim == 2 and feat_np.shape[1] >= 3 else None + local_results.append(dict(pred=pred, gt=segment, coord=coord_np)) batch_time.update(time.time() - end) logger.info( "Test: {} [{}/{}]-{} " - "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " - "Accuracy {acc:.4f} ({m_acc:.4f}) " - "mIoU {iou:.4f} ({m_iou:.4f})".format( + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f})".format( data_name, idx + 1, len(self.test_loader), segment.size, batch_time=batch_time, - acc=acc, - m_acc=m_acc, - iou=iou, - m_iou=m_iou, ) ) logger.info("Syncing ...") comm.synchronize() - record_sync = comm.gather(record, dst=0) + record_sync = comm.gather(local_results, dst=0) if comm.is_main_process(): - record = {} + merged_results = [] for _ in range(len(record_sync)): r = record_sync.pop() - record.update(r) + merged_results.extend(r) del r - intersection = np.sum([meters["intersection"] for _, meters in record.items()], axis=0) - union = np.sum([meters["union"] for _, meters in record.items()], axis=0) - target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + + mapped_class_names = class_mapping_to_names(self.cfg.class_mapping, ignore_index) + assert len(mapped_class_names) == num_classes, ( + "class_mapping_to_names length must match num_classes: " f"{len(mapped_class_names)} vs {num_classes}" + ) + label2cat = {i: mapped_class_names[i] for i in range(num_classes)} if self.cfg.data.test.type == "S3DISDataset": + from autoware_ml.segmentation3d.evaluation.functional.t4_seg_eval import ( + fast_hist, + per_class_iou, + ) + + total_hist = sum(fast_hist(r["pred"].ravel(), r["gt"].ravel(), num_classes) for r in merged_results) + iou = per_class_iou(total_hist) + intersection = np.diag(total_hist) + union = total_hist.sum(1) + total_hist.sum(0) - np.diag(total_hist) + target = total_hist.sum(1) torch.save( dict(intersection=intersection, union=union, target=target), os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), ) - iou_class = intersection / (union + 1e-10) - accuracy_class = intersection / (target + 1e-10) - mIoU = np.mean(iou_class) - mAcc = np.mean(accuracy_class) - allAcc = sum(intersection) / (sum(target) + 1e-10) - - logger.info("Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format(mIoU, mAcc, allAcc)) - mapped_class_names = class_mapping_to_names( - self.cfg.class_mapping, - self.cfg.data.ignore_index, - ) - assert len(mapped_class_names) == self.cfg.data.num_classes, ( - "class_mapping_to_names length must match num_classes: " - f"{len(mapped_class_names)} vs {self.cfg.data.num_classes}" + eval_result: SegEvalResult = t4_seg_eval( + gt_labels=[r["gt"] for r in merged_results], + seg_preds=[r["pred"] for r in merged_results], + label2cat=label2cat, + ignore_index=ignore_index, + coords_list=[r.get("coord") for r in merged_results] if distance_ranges else None, + distance_ranges=distance_ranges if distance_ranges else None, + logger=logger, ) - for i in range(self.cfg.data.num_classes): - logger.info( - "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( - idx=i, - name=mapped_class_names[i], - iou=iou_class[i], - accuracy=accuracy_class[i], - ) - ) - logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + if self.writer is not None: + import matplotlib.pyplot as plt + + m = eval_result.metrics + self.writer.add_scalar("test/miou", m.get("miou", 0.0), 0) + self.writer.add_scalar("test/acc", m.get("acc", 0.0), 0) + self.writer.add_scalar("test/acc_cls", m.get("acc_cls", 0.0), 0) + self.writer.add_scalar("test/mprecision", m.get("mprecision", 0.0), 0) + self.writer.add_scalar("test/mrecall", m.get("mrecall", 0.0), 0) + self.writer.add_scalar("test/mf1", m.get("mf1", 0.0), 0) + for name in mapped_class_names: + self.writer.add_scalar(f"test/class_iou/{name}", m.get(name, 0.0), 0) + self.writer.add_scalar(f"test/class_precision/{name}", m.get(f"precision/{name}", 0.0), 0) + self.writer.add_scalar(f"test/class_recall/{name}", m.get(f"recall/{name}", 0.0), 0) + self.writer.add_scalar(f"test/class_f1/{name}", m.get(f"f1/{name}", 0.0), 0) + for lo, hi in distance_ranges: + lbl = f"{lo:g}-{hi:g}m" + self.writer.add_scalar(f"test/range/{lbl}/miou", m.get(f"{lbl}/miou", 0.0), 0) + self.writer.add_scalar(f"test/range/{lbl}/mprecision", m.get(f"{lbl}/mprecision", 0.0), 0) + self.writer.add_scalar(f"test/range/{lbl}/mrecall", m.get(f"{lbl}/mrecall", 0.0), 0) + self.writer.add_scalar(f"test/range/{lbl}/mf1", m.get(f"{lbl}/mf1", 0.0), 0) + if eval_result.cm is not None and eval_result.cm.sum() > 0: + fig = plot_confusion_matrix(eval_result.cm, mapped_class_names) + self.writer.add_figure("test/confusion_matrix", fig, 0) + plt.close(fig) + for lbl, rcm in eval_result.range_cms.items(): + if rcm is not None and rcm.sum() > 0: + fig = plot_confusion_matrix(rcm, mapped_class_names, label=lbl) + tag = f"test/confusion_matrix_{lbl.replace('-', '_').replace(' ', '_')}" + self.writer.add_figure(tag, fig, 0) + plt.close(fig) + self.writer.flush() + + if self.writer is not None: + self.writer.close() + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") @staticmethod def collate_fn(batch): From 719a19dd01feea520774b65c26f1678b91eb492e Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Mon, 16 Mar 2026 00:04:38 +0900 Subject: [PATCH 02/23] fix(autoware_ml): missing init file Signed-off-by: Amadeusz Szymko --- .../segmentation3d/evaluation/__init__.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 autoware_ml/segmentation3d/evaluation/__init__.py diff --git a/autoware_ml/segmentation3d/evaluation/__init__.py b/autoware_ml/segmentation3d/evaluation/__init__.py new file mode 100644 index 000000000..4d61bbed0 --- /dev/null +++ b/autoware_ml/segmentation3d/evaluation/__init__.py @@ -0,0 +1,38 @@ +# Copyright (c) TIER IV, Inc. All rights reserved. +"""Segmentation evaluation: functional helpers + MMEngine metric adapter.""" + +from .functional.t4_seg_eval import ( + SegEvalResult, + compute_bev_distance, + fast_hist, + figure_to_numpy, + get_acc, + get_acc_cls, + normalize_confusion_matrix, + per_class_f1, + per_class_iou, + per_class_precision, + per_class_recall, + plot_confusion_matrix, + range_label, + t4_seg_eval, +) +from .metrics.t4_seg_metric import T4SegMetric + +__all__ = [ + "SegEvalResult", + "T4SegMetric", + "compute_bev_distance", + "fast_hist", + "figure_to_numpy", + "get_acc", + "get_acc_cls", + "normalize_confusion_matrix", + "per_class_f1", + "per_class_iou", + "per_class_precision", + "per_class_recall", + "plot_confusion_matrix", + "range_label", + "t4_seg_eval", +] From ba93aeb428054c448f110d58bfddad434d08b9d8 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Mon, 16 Mar 2026 00:48:02 +0900 Subject: [PATCH 03/23] fix(FRNet): remove metric's prefix Signed-off-by: Amadeusz Szymko --- .../FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py | 2 -- .../FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py index 794f30363..a5df649cf 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py @@ -297,14 +297,12 @@ num_classes=num_classes, ignore_index=ignore_index, distance_ranges=distance_ranges, - prefix="val", ) test_evaluator = dict( type="T4SegMetric", num_classes=num_classes, ignore_index=ignore_index, distance_ranges=distance_ranges, - prefix="test", ) vis_backends = [dict(type="LocalVisBackend"), dict(type="TensorboardVisBackend")] diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py index 1d7d3616d..959e7e0a4 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py @@ -297,14 +297,12 @@ num_classes=num_classes, ignore_index=ignore_index, distance_ranges=distance_ranges, - prefix="val", ) test_evaluator = dict( type="T4SegMetric", num_classes=num_classes, ignore_index=ignore_index, distance_ranges=distance_ranges, - prefix="test", ) vis_backends = [dict(type="LocalVisBackend"), dict(type="TensorboardVisBackend")] From 069646e8205c4313ac6928a4ebf615177d4da645 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Mon, 16 Mar 2026 09:40:54 +0900 Subject: [PATCH 04/23] style(autoware_ml): cleanup Signed-off-by: Amadeusz Szymko --- .../evaluation/functional/t4_seg_eval.py | 36 ++++++++----------- .../evaluation/metrics/t4_seg_metric.py | 9 ++--- projects/PTv3/engines/hooks/evaluator.py | 24 +++++-------- projects/PTv3/engines/test.py | 19 ++++------ 4 files changed, 32 insertions(+), 56 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py index edd2ee395..ae766833c 100644 --- a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py +++ b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py @@ -1,15 +1,11 @@ # Copyright (c) TIER IV, Inc. All rights reserved. -"""Pure-Python helpers for 3D semantic segmentation evaluation. - -This module is shared by FRNet and PTv3 and does not depend on mmdet3d -runner internals. -""" +"""Helpers for 3D semantic segmentation evaluation.""" from __future__ import annotations import io from dataclasses import dataclass, field -from typing import Dict, List, Optional, Sequence, Tuple, Union +from typing import Dict, List, Optional, Tuple import numpy as np from mmengine.logging import print_log @@ -31,7 +27,7 @@ def fast_hist(preds: np.ndarray, labels: np.ndarray, num_classes: int) -> np.nda Returns: ``np.ndarray`` of shape ``(num_classes, num_classes)``. """ - k = (labels >= 0) & (labels < num_classes) + k = (labels >= 0) & (labels < num_classes) & (preds >= 0) & (preds < num_classes) bin_count = np.bincount( num_classes * labels[k].astype(int) + preds[k], minlength=num_classes**2, @@ -41,7 +37,9 @@ def fast_hist(preds: np.ndarray, labels: np.ndarray, num_classes: int) -> np.nda def per_class_iou(hist: np.ndarray) -> np.ndarray: """Per-class IoU from cumulative confusion matrix.""" - return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist) + _EPS) + tp = np.diag(hist) + denom = hist.sum(1) + hist.sum(0) - tp + return np.where(denom > _EPS, tp / (denom + _EPS), np.nan) def get_acc(hist: np.ndarray) -> float: @@ -110,11 +108,12 @@ def plot_confusion_matrix( ``matplotlib.figure.Figure`` - caller is responsible for closing it. """ import matplotlib - - matplotlib.use("Agg") import matplotlib.pyplot as plt from matplotlib.colors import Normalize as MplNormalize + if matplotlib.get_backend().lower() != "agg": + matplotlib.use("Agg") + nc = cm.shape[0] cm_plot = normalize_confusion_matrix(cm) if normalize else cm.astype(float) @@ -247,10 +246,10 @@ def _compute_bucket_metrics( if idx == ignore_index: continue name = label2cat.get(idx, str(idx)) - out[f"{prefix}{name}"] = float(iou[idx]) if not np.isnan(iou[idx]) else 0.0 - out[f"{prefix}precision/{name}"] = float(prec[idx]) if not np.isnan(prec[idx]) else 0.0 - out[f"{prefix}recall/{name}"] = float(rec[idx]) if not np.isnan(rec[idx]) else 0.0 - out[f"{prefix}f1/{name}"] = float(f1[idx]) if not np.isnan(f1[idx]) else 0.0 + out[f"{prefix}{name}"] = float(iou[idx]) + out[f"{prefix}precision/{name}"] = float(prec[idx]) + out[f"{prefix}recall/{name}"] = float(rec[idx]) + out[f"{prefix}f1/{name}"] = float(f1[idx]) return out @@ -338,19 +337,14 @@ def t4_seg_eval( use_ranges = bool(distance_ranges and coords_list is not None) total_hist = np.zeros((num_classes, num_classes), dtype=np.float64) - total_cm = np.zeros((num_classes, num_classes), dtype=np.float64) # same thing if use_ranges: range_hists: Dict[str, np.ndarray] = { range_label(lo, hi): np.zeros((num_classes, num_classes), dtype=np.float64) for lo, hi in distance_ranges # type: ignore[union-attr] } - range_cms: Dict[str, np.ndarray] = { - lbl: np.zeros((num_classes, num_classes), dtype=np.float64) for lbl in range_hists - } else: range_hists = {} - range_cms = {} for i in range(len(gt_labels)): gt = gt_labels[i].astype(np.int64) @@ -361,7 +355,6 @@ def t4_seg_eval( h = fast_hist(pred, gt, num_classes) total_hist += h - total_cm += h if use_ranges: assert coords_list is not None @@ -379,7 +372,6 @@ def t4_seg_eval( continue h_r = fast_hist(pred[mask], gt[mask], num_classes) range_hists[lbl] += h_r - range_cms[lbl] += h_r _print_bucket_table(total_hist, label2cat, ignore_index, title="Total", logger=logger) metrics = _compute_bucket_metrics(total_hist, label2cat, ignore_index, prefix="") @@ -390,4 +382,4 @@ def t4_seg_eval( _print_bucket_table(hist_r, label2cat, ignore_index, title=lbl, logger=logger) metrics.update(_compute_bucket_metrics(hist_r, label2cat, ignore_index, prefix=f"{lbl}/")) - return SegEvalResult(metrics=metrics, cm=total_cm, range_cms=range_cms) + return SegEvalResult(metrics=metrics, cm=total_hist, range_cms=range_hists) diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py index 8db6fc4df..a8e9a4115 100644 --- a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -157,8 +157,7 @@ def format_results(self, results: list) -> None: """Export predictions to TXT files for submission (ScanNet format).""" submission_prefix = self.submission_prefix if submission_prefix is None: - tmp_dir = tempfile.TemporaryDirectory() - submission_prefix = osp.join(tmp_dir.name, "results") + submission_prefix = osp.join(tempfile.mkdtemp(), "results") mmcv.mkdir_or_exist(submission_prefix) ignore_index_val = self._get_ignore_index() @@ -308,6 +307,8 @@ def _log_confusion_matrix_images(self, eval_result, label2cat: Dict[int, str]) - except Exception: return + import matplotlib.pyplot as plt + num_classes = int(eval_result.cm.shape[0]) if eval_result.cm is not None else len(label2cat) class_names = [label2cat.get(i, str(i)) for i in range(num_classes)] step = self._eval_step @@ -321,8 +322,6 @@ def _log_confusion_matrix_images(self, eval_result, label2cat: Dict[int, str]) - vis.add_image(f"{tag_prefix}confusion_matrix", img, step=step) except Exception: pass - import matplotlib.pyplot as plt - plt.close(fig) for lbl, rcm in eval_result.range_cms.items(): @@ -336,6 +335,4 @@ def _log_confusion_matrix_images(self, eval_result, label2cat: Dict[int, str]) - vis.add_image(f"{tag_prefix}{tag}", img, step=step) except Exception: pass - import matplotlib.pyplot as plt - plt.close(fig) diff --git a/projects/PTv3/engines/hooks/evaluator.py b/projects/PTv3/engines/hooks/evaluator.py index c23fd65c6..1566a9094 100644 --- a/projects/PTv3/engines/hooks/evaluator.py +++ b/projects/PTv3/engines/hooks/evaluator.py @@ -12,6 +12,7 @@ from autoware_ml.segmentation3d.datasets.utils import class_mapping_to_names from autoware_ml.segmentation3d.evaluation import ( SegEvalResult, + figure_to_numpy, plot_confusion_matrix, t4_seg_eval, ) @@ -104,27 +105,20 @@ def eval(self): epoch = self.trainer.epoch + 1 writer = self.trainer.writer if writer is not None: + import matplotlib.pyplot as plt + writer.add_scalar("val/loss", loss_avg, epoch) m = eval_result.metrics - writer.add_scalar("val/miou", m.get("miou", 0.0), epoch) - writer.add_scalar("val/acc", m.get("acc", 0.0), epoch) - writer.add_scalar("val/acc_cls", m.get("acc_cls", 0.0), epoch) - writer.add_scalar("val/mprecision", m.get("mprecision", 0.0), epoch) - writer.add_scalar("val/mrecall", m.get("mrecall", 0.0), epoch) - writer.add_scalar("val/mf1", m.get("mf1", 0.0), epoch) + for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): + writer.add_scalar(f"val/{key}", m.get(key, 0.0), epoch) for name in mapped_class_names: writer.add_scalar(f"val/class_iou/{name}", m.get(name, 0.0), epoch) - writer.add_scalar(f"val/class_precision/{name}", m.get(f"precision/{name}", 0.0), epoch) - writer.add_scalar(f"val/class_recall/{name}", m.get(f"recall/{name}", 0.0), epoch) - writer.add_scalar(f"val/class_f1/{name}", m.get(f"f1/{name}", 0.0), epoch) + for sub in ("precision", "recall", "f1"): + writer.add_scalar(f"val/class_{sub}/{name}", m.get(f"{sub}/{name}", 0.0), epoch) for lo, hi in distance_ranges: lbl = f"{lo:g}-{hi:g}m" - writer.add_scalar(f"val/range/{lbl}/miou", m.get(f"{lbl}/miou", 0.0), epoch) - writer.add_scalar(f"val/range/{lbl}/acc", m.get(f"{lbl}/acc", 0.0), epoch) - writer.add_scalar(f"val/range/{lbl}/mprecision", m.get(f"{lbl}/mprecision", 0.0), epoch) - writer.add_scalar(f"val/range/{lbl}/mrecall", m.get(f"{lbl}/mrecall", 0.0), epoch) - writer.add_scalar(f"val/range/{lbl}/mf1", m.get(f"{lbl}/mf1", 0.0), epoch) - import matplotlib.pyplot as plt + for key in ("miou", "acc", "mprecision", "mrecall", "mf1"): + writer.add_scalar(f"val/range/{lbl}/{key}", m.get(f"{lbl}/{key}", 0.0), epoch) if eval_result.cm is not None and eval_result.cm.sum() > 0: fig = plot_confusion_matrix(eval_result.cm, mapped_class_names) diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index 05f7ca8a2..d0c84b021 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -298,23 +298,16 @@ def test(self): import matplotlib.pyplot as plt m = eval_result.metrics - self.writer.add_scalar("test/miou", m.get("miou", 0.0), 0) - self.writer.add_scalar("test/acc", m.get("acc", 0.0), 0) - self.writer.add_scalar("test/acc_cls", m.get("acc_cls", 0.0), 0) - self.writer.add_scalar("test/mprecision", m.get("mprecision", 0.0), 0) - self.writer.add_scalar("test/mrecall", m.get("mrecall", 0.0), 0) - self.writer.add_scalar("test/mf1", m.get("mf1", 0.0), 0) + for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): + self.writer.add_scalar(f"test/{key}", m.get(key, 0.0), 0) for name in mapped_class_names: self.writer.add_scalar(f"test/class_iou/{name}", m.get(name, 0.0), 0) - self.writer.add_scalar(f"test/class_precision/{name}", m.get(f"precision/{name}", 0.0), 0) - self.writer.add_scalar(f"test/class_recall/{name}", m.get(f"recall/{name}", 0.0), 0) - self.writer.add_scalar(f"test/class_f1/{name}", m.get(f"f1/{name}", 0.0), 0) + for sub in ("precision", "recall", "f1"): + self.writer.add_scalar(f"test/class_{sub}/{name}", m.get(f"{sub}/{name}", 0.0), 0) for lo, hi in distance_ranges: lbl = f"{lo:g}-{hi:g}m" - self.writer.add_scalar(f"test/range/{lbl}/miou", m.get(f"{lbl}/miou", 0.0), 0) - self.writer.add_scalar(f"test/range/{lbl}/mprecision", m.get(f"{lbl}/mprecision", 0.0), 0) - self.writer.add_scalar(f"test/range/{lbl}/mrecall", m.get(f"{lbl}/mrecall", 0.0), 0) - self.writer.add_scalar(f"test/range/{lbl}/mf1", m.get(f"{lbl}/mf1", 0.0), 0) + for key in ("miou", "mprecision", "mrecall", "mf1"): + self.writer.add_scalar(f"test/range/{lbl}/{key}", m.get(f"{lbl}/{key}", 0.0), 0) if eval_result.cm is not None and eval_result.cm.sum() > 0: fig = plot_confusion_matrix(eval_result.cm, mapped_class_names) self.writer.add_figure("test/confusion_matrix", fig, 0) From b473c960f5d532e978490aa0885e6370dfa5cda4 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Mon, 16 Mar 2026 09:48:49 +0900 Subject: [PATCH 05/23] style(autoware_ml): imports Signed-off-by: Amadeusz Szymko --- .../evaluation/functional/t4_seg_eval.py | 15 ++++++--------- .../evaluation/metrics/t4_seg_metric.py | 6 ++---- projects/PTv3/engines/hooks/evaluator.py | 3 +-- projects/PTv3/engines/test.py | 15 ++++++--------- 4 files changed, 15 insertions(+), 24 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py index ae766833c..4aabffb9e 100644 --- a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py +++ b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py @@ -7,10 +7,16 @@ from dataclasses import dataclass, field from typing import Dict, List, Optional, Tuple +import matplotlib +import matplotlib.pyplot as plt import numpy as np +from matplotlib.colors import Normalize as MplNormalize from mmengine.logging import print_log +from PIL import Image from terminaltables import AsciiTable +matplotlib.use("Agg") + _EPS = 1e-10 @@ -107,13 +113,6 @@ def plot_confusion_matrix( Returns: ``matplotlib.figure.Figure`` - caller is responsible for closing it. """ - import matplotlib - import matplotlib.pyplot as plt - from matplotlib.colors import Normalize as MplNormalize - - if matplotlib.get_backend().lower() != "agg": - matplotlib.use("Agg") - nc = cm.shape[0] cm_plot = normalize_confusion_matrix(cm) if normalize else cm.astype(float) @@ -157,8 +156,6 @@ def figure_to_numpy(fig) -> np.ndarray: buf = io.BytesIO() fig.savefig(buf, format="png", bbox_inches="tight") buf.seek(0) - from PIL import Image # lazy import; PIL is a lightweight dep - img = Image.open(buf).convert("RGB") return np.array(img) diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py index a8e9a4115..4a955cba5 100644 --- a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -5,11 +5,13 @@ import tempfile from typing import Dict, List, Optional, Sequence, Tuple +import matplotlib.pyplot as plt import mmcv import numpy as np from mmdet3d.registry import METRICS from mmengine.evaluator import BaseMetric from mmengine.logging import MMLogger +from mmengine.visualization import Visualizer from autoware_ml.segmentation3d.evaluation.functional.t4_seg_eval import ( figure_to_numpy, @@ -301,14 +303,10 @@ def _get_ignore_index(self) -> int: def _log_confusion_matrix_images(self, eval_result, label2cat: Dict[int, str]) -> None: """Log normalised confusion-matrix images to TensorBoard (rank-0 only).""" try: - from mmengine.visualization import Visualizer - vis = Visualizer.get_current_instance() except Exception: return - import matplotlib.pyplot as plt - num_classes = int(eval_result.cm.shape[0]) if eval_result.cm is not None else len(label2cat) class_names = [label2cat.get(i, str(i)) for i in range(num_classes)] step = self._eval_step diff --git a/projects/PTv3/engines/hooks/evaluator.py b/projects/PTv3/engines/hooks/evaluator.py index 1566a9094..8dad6578e 100644 --- a/projects/PTv3/engines/hooks/evaluator.py +++ b/projects/PTv3/engines/hooks/evaluator.py @@ -5,6 +5,7 @@ Please cite our work if the code is helpful to you. """ +import matplotlib.pyplot as plt import numpy as np import torch import utils.comm as comm @@ -105,8 +106,6 @@ def eval(self): epoch = self.trainer.epoch + 1 writer = self.trainer.writer if writer is not None: - import matplotlib.pyplot as plt - writer.add_scalar("val/loss", loss_avg, epoch) m = eval_result.metrics for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index d0c84b021..bb3be790d 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -5,10 +5,12 @@ Please cite our work if the code is helpful to you. """ +import json import os import time from collections import OrderedDict +import matplotlib.pyplot as plt import numpy as np import torch import torch.nn.functional as F @@ -31,6 +33,10 @@ plot_confusion_matrix, t4_seg_eval, ) +from autoware_ml.segmentation3d.evaluation.functional.t4_seg_eval import ( + fast_hist, + per_class_iou, +) from .defaults import create_ddp_model @@ -150,8 +156,6 @@ def test(self): save_path = os.path.join(self.cfg.save_path, "result") make_dirs(save_path) if self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): - import json - make_dirs(os.path.join(save_path, "submit", "lidarseg", "test")) make_dirs(os.path.join(save_path, "submit", "test")) submission = dict( @@ -269,11 +273,6 @@ def test(self): label2cat = {i: mapped_class_names[i] for i in range(num_classes)} if self.cfg.data.test.type == "S3DISDataset": - from autoware_ml.segmentation3d.evaluation.functional.t4_seg_eval import ( - fast_hist, - per_class_iou, - ) - total_hist = sum(fast_hist(r["pred"].ravel(), r["gt"].ravel(), num_classes) for r in merged_results) iou = per_class_iou(total_hist) intersection = np.diag(total_hist) @@ -295,8 +294,6 @@ def test(self): ) if self.writer is not None: - import matplotlib.pyplot as plt - m = eval_result.metrics for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): self.writer.add_scalar(f"test/{key}", m.get(key, 0.0), 0) From 10a0b3e1a6a025bc75cb557f6133e61792c609c9 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Mon, 16 Mar 2026 12:37:52 +0900 Subject: [PATCH 06/23] feat(autoware_ml): add per-class range-based metrics Signed-off-by: Amadeusz Szymko --- projects/FRNet/configs/nuscenes/frnet_1xb4_nus-seg.py | 5 ++++- .../t4dataset/frnet_1xb8_t4dataset-ot128-seg.py | 5 ++++- .../t4dataset/frnet_1xb8_t4dataset-qt128-seg.py | 5 ++++- projects/PTv3/engines/hooks/evaluator.py | 10 +++++++++- projects/PTv3/engines/test.py | 10 +++++++++- 5 files changed, 30 insertions(+), 5 deletions(-) diff --git a/projects/FRNet/configs/nuscenes/frnet_1xb4_nus-seg.py b/projects/FRNet/configs/nuscenes/frnet_1xb4_nus-seg.py index 8a79a6ce8..cdeac1488 100644 --- a/projects/FRNet/configs/nuscenes/frnet_1xb4_nus-seg.py +++ b/projects/FRNet/configs/nuscenes/frnet_1xb4_nus-seg.py @@ -309,4 +309,7 @@ log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False) -default_hooks = dict(checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou")) +default_hooks = dict( + logger=dict(type="LoggerHook", log_metric_by_epoch=False), + checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou"), +) diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py index a5df649cf..6c0200e6c 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py @@ -335,4 +335,7 @@ log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False) -default_hooks = dict(checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou")) +default_hooks = dict( + logger=dict(type="LoggerHook", log_metric_by_epoch=False), + checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou"), +) diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py index 959e7e0a4..a711a08b1 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py @@ -335,4 +335,7 @@ log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False) -default_hooks = dict(checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou")) +default_hooks = dict( + logger=dict(type="LoggerHook", log_metric_by_epoch=False), + checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou"), +) diff --git a/projects/PTv3/engines/hooks/evaluator.py b/projects/PTv3/engines/hooks/evaluator.py index 8dad6578e..25cc4167c 100644 --- a/projects/PTv3/engines/hooks/evaluator.py +++ b/projects/PTv3/engines/hooks/evaluator.py @@ -116,8 +116,16 @@ def eval(self): writer.add_scalar(f"val/class_{sub}/{name}", m.get(f"{sub}/{name}", 0.0), epoch) for lo, hi in distance_ranges: lbl = f"{lo:g}-{hi:g}m" - for key in ("miou", "acc", "mprecision", "mrecall", "mf1"): + for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): writer.add_scalar(f"val/range/{lbl}/{key}", m.get(f"{lbl}/{key}", 0.0), epoch) + for name in mapped_class_names: + writer.add_scalar(f"val/range/{lbl}/class_iou/{name}", m.get(f"{lbl}/{name}", 0.0), epoch) + for sub in ("precision", "recall", "f1"): + writer.add_scalar( + f"val/range/{lbl}/class_{sub}/{name}", + m.get(f"{lbl}/{sub}/{name}", 0.0), + epoch, + ) if eval_result.cm is not None and eval_result.cm.sum() > 0: fig = plot_confusion_matrix(eval_result.cm, mapped_class_names) diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index bb3be790d..b93ee942b 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -303,8 +303,16 @@ def test(self): self.writer.add_scalar(f"test/class_{sub}/{name}", m.get(f"{sub}/{name}", 0.0), 0) for lo, hi in distance_ranges: lbl = f"{lo:g}-{hi:g}m" - for key in ("miou", "mprecision", "mrecall", "mf1"): + for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): self.writer.add_scalar(f"test/range/{lbl}/{key}", m.get(f"{lbl}/{key}", 0.0), 0) + for name in mapped_class_names: + self.writer.add_scalar(f"test/range/{lbl}/class_iou/{name}", m.get(f"{lbl}/{name}", 0.0), 0) + for sub in ("precision", "recall", "f1"): + self.writer.add_scalar( + f"test/range/{lbl}/class_{sub}/{name}", + m.get(f"{lbl}/{sub}/{name}", 0.0), + 0, + ) if eval_result.cm is not None and eval_result.cm.sum() > 0: fig = plot_confusion_matrix(eval_result.cm, mapped_class_names) self.writer.add_figure("test/confusion_matrix", fig, 0) From 82c8e26827af212924fcbebf238bd854beafd0d1 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 22:06:59 +0900 Subject: [PATCH 07/23] fix(PTv3): matplotlib agg Signed-off-by: Amadeusz Szymko Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- projects/PTv3/engines/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index b93ee942b..36f04c1ab 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -10,6 +10,8 @@ import time from collections import OrderedDict +import matplotlib +matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np import torch From a1e1152cbd806c951464fc7f7716b92c85f23210 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 13:07:20 +0000 Subject: [PATCH 08/23] ci(pre-commit): autofix --- projects/PTv3/engines/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index 36f04c1ab..427bb0bee 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -11,6 +11,7 @@ from collections import OrderedDict import matplotlib + matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np From a9af54e05ae23a35bb9b00f31f0fa6f4304a2209 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 22:12:19 +0900 Subject: [PATCH 09/23] fix(PTv3): matplotlib agg Signed-off-by: Amadeusz Szymko --- projects/PTv3/engines/hooks/evaluator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/PTv3/engines/hooks/evaluator.py b/projects/PTv3/engines/hooks/evaluator.py index 25cc4167c..f5f6a25ef 100644 --- a/projects/PTv3/engines/hooks/evaluator.py +++ b/projects/PTv3/engines/hooks/evaluator.py @@ -6,6 +6,8 @@ """ import matplotlib.pyplot as plt + +matplotlib.use("Agg") import numpy as np import torch import utils.comm as comm From b2d00119a5daab7d458a6b65ba8da64ca9b9993b Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 22:13:57 +0900 Subject: [PATCH 10/23] fix(PTv3): unused imports Signed-off-by: Amadeusz Szymko --- projects/PTv3/engines/hooks/evaluator.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/projects/PTv3/engines/hooks/evaluator.py b/projects/PTv3/engines/hooks/evaluator.py index f5f6a25ef..d9c095bf7 100644 --- a/projects/PTv3/engines/hooks/evaluator.py +++ b/projects/PTv3/engines/hooks/evaluator.py @@ -8,14 +8,12 @@ import matplotlib.pyplot as plt matplotlib.use("Agg") -import numpy as np import torch import utils.comm as comm from autoware_ml.segmentation3d.datasets.utils import class_mapping_to_names from autoware_ml.segmentation3d.evaluation import ( SegEvalResult, - figure_to_numpy, plot_confusion_matrix, t4_seg_eval, ) From ac2b6b8398efc5a41dd43d93d8dc310ccf62e900 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 22:15:48 +0900 Subject: [PATCH 11/23] fix(autoware_ml): matplotlib agg Signed-off-by: Amadeusz Szymko --- .../segmentation3d/evaluation/functional/t4_seg_eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py index 4aabffb9e..b908ecb7f 100644 --- a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py +++ b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py @@ -8,6 +8,8 @@ from typing import Dict, List, Optional, Tuple import matplotlib + +matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np from matplotlib.colors import Normalize as MplNormalize @@ -15,8 +17,6 @@ from PIL import Image from terminaltables import AsciiTable -matplotlib.use("Agg") - _EPS = 1e-10 From 89013302d08e3a450f31ae20a4412804762acd79 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 22:19:43 +0900 Subject: [PATCH 12/23] feat(PTv3): cache feats Signed-off-by: Amadeusz Szymko --- projects/PTv3/engines/test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index 427bb0bee..1532c1486 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -184,7 +184,12 @@ def test(self): if os.path.isfile(pred_save_path): logger.info("{}/{}: {}, loaded pred and label.".format(idx + 1, len(self.test_loader), data_name)) pred = np.load(pred_save_path) + # Try to recover cached features from the corresponding NPZ file, if available. feat_np = None + if os.path.isfile(result_save_path): + cached_result = np.load(result_save_path) + if "feat" in getattr(cached_result, "files", []): + feat_np = cached_result["feat"] if "origin_segment" in data_dict.keys(): segment = data_dict["origin_segment"] else: From 4e8ff9b3caaa6f693b17cf311f5fb048462a0da9 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 22:30:07 +0900 Subject: [PATCH 13/23] fix(autoware_ml): cat2label bounds Signed-off-by: Amadeusz Szymko --- .../evaluation/metrics/t4_seg_metric.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py index 4a955cba5..b677cb3ed 100644 --- a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -164,11 +164,19 @@ def format_results(self, results: list) -> None: ignore_index_val = self._get_ignore_index() label2cat_map = self._get_label2cat() - num_labels = len(label2cat_map) + base_num_labels = len(label2cat_map) + + # Ensure cat2label covers all possible prediction indices, including ignore_index. + num_labels = max( + base_num_labels, + getattr(self, "_num_classes", base_num_labels), + ) + if isinstance(ignore_index_val, int) and ignore_index_val >= 0: + num_labels = max(num_labels, ignore_index_val + 1) cat2label = np.zeros(num_labels, dtype=np.int64) for out_idx, _ in label2cat_map.items(): - if out_idx != ignore_index_val: + if out_idx != ignore_index_val and 0 <= out_idx < num_labels: cat2label[out_idx] = out_idx meta = getattr(self, "dataset_meta", {}) or {} From d85f909a6410d0322b7cf399b9f516f6207a56e9 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 22:32:04 +0900 Subject: [PATCH 14/23] fix(autoware_ml): unused import & arg Signed-off-by: Amadeusz Szymko --- .../segmentation3d/evaluation/metrics/t4_seg_metric.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py index b677cb3ed..33142abec 100644 --- a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -16,7 +16,6 @@ from autoware_ml.segmentation3d.evaluation.functional.t4_seg_eval import ( figure_to_numpy, plot_confusion_matrix, - range_label, t4_seg_eval, ) @@ -39,8 +38,6 @@ class T4SegMetric(BaseMetric): Device used for collecting results across ranks. ``'cpu'`` or ``'gpu'``. prefix: Optional metric-name prefix. - pklfile_prefix: - If set, raw predictions are written to ``{pklfile_prefix}.pkl``. submission_prefix: If set, predictions are exported in ScanNet TXT format to this path instead of computing metrics. @@ -55,7 +52,6 @@ def __init__( distance_ranges: Optional[List[Tuple[float, float]]] = None, collect_device: str = "cpu", prefix: Optional[str] = None, - pklfile_prefix: Optional[str] = None, submission_prefix: Optional[str] = None, **kwargs, ): @@ -63,7 +59,6 @@ def __init__( self._num_classes = num_classes self._ignore_index = ignore_index self.distance_ranges = distance_ranges or [] - self.pklfile_prefix = pklfile_prefix self.submission_prefix = submission_prefix # Counter used as the TensorBoard global-step for CM images. self._eval_step: int = 0 From 9a2c07513d00c7ba7bc1da3e736a1c0a049eff77 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 23:14:04 +0900 Subject: [PATCH 15/23] fix(PTv3): reduce distributed seg eval via confusion matrices Signed-off-by: Amadeusz Szymko --- .../segmentation3d/evaluation/__init__.py | 4 + .../evaluation/functional/t4_seg_eval.py | 111 ++++++++++++------ projects/PTv3/engines/hooks/evaluator.py | 65 ++++++---- projects/PTv3/engines/test.py | 63 ++++++---- 4 files changed, 164 insertions(+), 79 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/__init__.py b/autoware_ml/segmentation3d/evaluation/__init__.py index 4d61bbed0..d0d38e2c5 100644 --- a/autoware_ml/segmentation3d/evaluation/__init__.py +++ b/autoware_ml/segmentation3d/evaluation/__init__.py @@ -16,6 +16,8 @@ plot_confusion_matrix, range_label, t4_seg_eval, + t4_seg_eval_from_hists, + update_seg_eval_histograms, ) from .metrics.t4_seg_metric import T4SegMetric @@ -35,4 +37,6 @@ "plot_confusion_matrix", "range_label", "t4_seg_eval", + "t4_seg_eval_from_hists", + "update_seg_eval_histograms", ] diff --git a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py index b908ecb7f..3ee98d7ae 100644 --- a/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py +++ b/autoware_ml/segmentation3d/evaluation/functional/t4_seg_eval.py @@ -197,6 +197,41 @@ class SegEvalResult: range_cms: Dict[str, np.ndarray] = field(default_factory=dict) +def update_seg_eval_histograms( + total_hist: np.ndarray, + pred: np.ndarray, + gt: np.ndarray, + num_classes: int, + ignore_index: int, + range_hists: Optional[Dict[str, np.ndarray]] = None, + coord: Optional[np.ndarray] = None, + distance_ranges: Optional[List[Tuple[float, float]]] = None, +) -> None: + """Accumulate one sample into total and optional range confusion matrices.""" + pred = np.asarray(pred, dtype=np.int64).copy() + gt = np.asarray(gt, dtype=np.int64).copy() + + pred[gt == ignore_index] = ignore_index + gt[gt == ignore_index] = ignore_index + + total_hist += fast_hist(pred, gt, num_classes) + + if not range_hists or not distance_ranges or coord is None: + return + + coord = np.asarray(coord) + if coord.ndim != 2 or coord.shape[1] < 2 or coord.shape[0] != gt.size: + return + + dist = compute_bev_distance(coord) + for lo, hi in distance_ranges: + lbl = range_label(lo, hi) + mask = (dist >= lo) & (dist < hi) + if not np.any(mask): + continue + range_hists[lbl] += fast_hist(pred[mask], gt[mask], num_classes) + + def _compute_bucket_metrics( hist: np.ndarray, label2cat: Dict[int, str], @@ -295,6 +330,30 @@ def _print_bucket_table( print_log("\n" + table.table, logger=logger) +def t4_seg_eval_from_hists( + total_hist: np.ndarray, + label2cat: Dict[int, str], + ignore_index: int, + range_hists: Optional[Dict[str, np.ndarray]] = None, + logger=None, +) -> SegEvalResult: + """Build scalar metrics and tables from pre-aggregated confusion matrices.""" + total_hist = np.asarray(total_hist, dtype=np.float64) + range_hists = range_hists or {} + + _print_bucket_table(total_hist, label2cat, ignore_index, title="Total", logger=logger) + metrics = _compute_bucket_metrics(total_hist, label2cat, ignore_index, prefix="") + + for lbl, hist_r in range_hists.items(): + hist_r = np.asarray(hist_r, dtype=np.float64) + if hist_r.sum() == 0: + continue + _print_bucket_table(hist_r, label2cat, ignore_index, title=lbl, logger=logger) + metrics.update(_compute_bucket_metrics(hist_r, label2cat, ignore_index, prefix=f"{lbl}/")) + + return SegEvalResult(metrics=metrics, cm=total_hist, range_cms=range_hists) + + def t4_seg_eval( gt_labels: List[np.ndarray], seg_preds: List[np.ndarray], @@ -344,39 +403,21 @@ def t4_seg_eval( range_hists = {} for i in range(len(gt_labels)): - gt = gt_labels[i].astype(np.int64) - pred = seg_preds[i].astype(np.int64) - - pred[gt == ignore_index] = ignore_index - gt[gt == ignore_index] = ignore_index - - h = fast_hist(pred, gt, num_classes) - total_hist += h - - if use_ranges: - assert coords_list is not None - coord = coords_list[i] - if coord is None: - continue - coord = np.asarray(coord) - if coord.ndim != 2 or coord.shape[1] < 2 or coord.shape[0] != gt.size: - continue - dist = compute_bev_distance(coord) - for lo, hi in distance_ranges: # type: ignore[union-attr] - lbl = range_label(lo, hi) - mask = (dist >= lo) & (dist < hi) - if not np.any(mask): - continue - h_r = fast_hist(pred[mask], gt[mask], num_classes) - range_hists[lbl] += h_r - - _print_bucket_table(total_hist, label2cat, ignore_index, title="Total", logger=logger) - metrics = _compute_bucket_metrics(total_hist, label2cat, ignore_index, prefix="") - - for lbl, hist_r in range_hists.items(): - if hist_r.sum() == 0: - continue - _print_bucket_table(hist_r, label2cat, ignore_index, title=lbl, logger=logger) - metrics.update(_compute_bucket_metrics(hist_r, label2cat, ignore_index, prefix=f"{lbl}/")) + update_seg_eval_histograms( + total_hist=total_hist, + pred=seg_preds[i], + gt=gt_labels[i], + num_classes=num_classes, + ignore_index=ignore_index, + range_hists=range_hists, + coord=coords_list[i] if use_ranges else None, + distance_ranges=distance_ranges if use_ranges else None, + ) - return SegEvalResult(metrics=metrics, cm=total_hist, range_cms=range_hists) + return t4_seg_eval_from_hists( + total_hist=total_hist, + label2cat=label2cat, + ignore_index=ignore_index, + range_hists=range_hists, + logger=logger, + ) diff --git a/projects/PTv3/engines/hooks/evaluator.py b/projects/PTv3/engines/hooks/evaluator.py index d9c095bf7..7fa9a9b41 100644 --- a/projects/PTv3/engines/hooks/evaluator.py +++ b/projects/PTv3/engines/hooks/evaluator.py @@ -5,17 +5,21 @@ Please cite our work if the code is helpful to you. """ -import matplotlib.pyplot as plt +import matplotlib +import numpy as np matplotlib.use("Agg") +import matplotlib.pyplot as plt import torch +import torch.distributed as dist import utils.comm as comm from autoware_ml.segmentation3d.datasets.utils import class_mapping_to_names from autoware_ml.segmentation3d.evaluation import ( SegEvalResult, plot_confusion_matrix, - t4_seg_eval, + t4_seg_eval_from_hists, + update_seg_eval_histograms, ) from .builder import HOOKS @@ -36,8 +40,15 @@ def eval(self): ignore_index = cfg.data.ignore_index metric_options = getattr(cfg, "metric_options", None) or {} distance_ranges = metric_options.get("distance_ranges") or [] + reduce_device = ( + torch.device("cuda", torch.cuda.current_device()) if torch.cuda.is_available() else torch.device("cpu") + ) - local_results = [] + total_hist = torch.zeros((num_classes, num_classes), dtype=torch.float64, device=reduce_device) + range_hist_tensors = { + f"{lo:g}-{hi:g}m": torch.zeros((num_classes, num_classes), dtype=torch.float64, device=reduce_device) + for lo, hi in distance_ranges + } loss_sum = 0.0 loss_count = 0 @@ -61,7 +72,23 @@ def eval(self): if coord_np.ndim != 2 or coord_np.shape[1] < 2: coord_np = None - local_results.append(dict(pred=pred, gt=segment, coord=coord_np)) + sample_total_hist = np.zeros((num_classes, num_classes), dtype=np.float64) + sample_range_hists = { + label: np.zeros((num_classes, num_classes), dtype=np.float64) for label in range_hist_tensors + } + update_seg_eval_histograms( + total_hist=sample_total_hist, + pred=pred, + gt=segment, + num_classes=num_classes, + ignore_index=ignore_index, + range_hists=sample_range_hists, + coord=coord_np, + distance_ranges=distance_ranges if distance_ranges else None, + ) + total_hist += torch.from_numpy(sample_total_hist).to(device=total_hist.device) + for label, hist in sample_range_hists.items(): + range_hist_tensors[label] += torch.from_numpy(hist).to(device=total_hist.device) loss_sum += float(loss.item()) loss_count += 1 @@ -71,21 +98,21 @@ def eval(self): self.trainer.logger.info(info + f"Loss {loss.item():.4f}") comm.synchronize() - gathered = comm.gather( - dict(results=local_results, loss_sum=loss_sum, loss_count=loss_count), - dst=0, + if comm.get_world_size() > 1: + dist.reduce(total_hist, dst=0) + for hist in range_hist_tensors.values(): + dist.reduce(hist, dst=0) + loss_reduced = comm.reduce_dict( + { + "loss_sum": torch.tensor(loss_sum, dtype=torch.float64, device=reduce_device), + "loss_count": torch.tensor(loss_count, dtype=torch.float64, device=reduce_device), + }, + average=False, ) if not comm.is_main_process(): return - merged_results = [] - total_loss_sum = 0.0 - total_loss_count = 0 - for item in gathered: - merged_results.extend(item["results"]) - total_loss_sum += float(item["loss_sum"]) - total_loss_count += int(item["loss_count"]) - loss_avg = total_loss_sum / max(total_loss_count, 1) + loss_avg = float(loss_reduced["loss_sum"] / loss_reduced["loss_count"].clamp_min(1.0)) mapped_class_names = class_mapping_to_names(cfg.class_mapping, ignore_index) assert len(mapped_class_names) == num_classes, ( @@ -93,13 +120,11 @@ def eval(self): ) label2cat = {i: mapped_class_names[i] for i in range(num_classes)} - eval_result: SegEvalResult = t4_seg_eval( - gt_labels=[r["gt"] for r in merged_results], - seg_preds=[r["pred"] for r in merged_results], + eval_result: SegEvalResult = t4_seg_eval_from_hists( + total_hist=total_hist.cpu().numpy(), label2cat=label2cat, ignore_index=ignore_index, - coords_list=[r.get("coord") for r in merged_results] if distance_ranges else None, - distance_ranges=distance_ranges if distance_ranges else None, + range_hists={label: hist.cpu().numpy() for label, hist in range_hist_tensors.items()}, logger=self.trainer.logger, ) diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index 1532c1486..e82db4fc3 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -16,6 +16,7 @@ import matplotlib.pyplot as plt import numpy as np import torch +import torch.distributed as dist import torch.nn.functional as F import torch.utils.data import utils.comm as comm @@ -34,11 +35,8 @@ from autoware_ml.segmentation3d.evaluation import ( SegEvalResult, plot_confusion_matrix, - t4_seg_eval, -) -from autoware_ml.segmentation3d.evaluation.functional.t4_seg_eval import ( - fast_hist, - per_class_iou, + t4_seg_eval_from_hists, + update_seg_eval_histograms, ) from .defaults import create_ddp_model @@ -153,7 +151,14 @@ def test(self): ignore_index = self.cfg.data.ignore_index metric_options = getattr(self.cfg, "metric_options", None) or {} distance_ranges = metric_options.get("distance_ranges") or [] - local_results = [] + reduce_device = ( + torch.device("cuda", torch.cuda.current_device()) if torch.cuda.is_available() else torch.device("cpu") + ) + total_hist = torch.zeros((num_classes, num_classes), dtype=torch.float64, device=reduce_device) + range_hist_tensors = { + f"{lo:g}-{hi:g}m": torch.zeros((num_classes, num_classes), dtype=torch.float64, device=reduce_device) + for lo, hi in distance_ranges + } self.model.eval() save_path = os.path.join(self.cfg.save_path, "result") @@ -249,7 +254,23 @@ def test(self): ) coord_np = feat_np[:, :3] if feat_np is not None and feat_np.ndim == 2 and feat_np.shape[1] >= 3 else None - local_results.append(dict(pred=pred, gt=segment, coord=coord_np)) + sample_total_hist = np.zeros((num_classes, num_classes), dtype=np.float64) + sample_range_hists = { + label: np.zeros((num_classes, num_classes), dtype=np.float64) for label in range_hist_tensors + } + update_seg_eval_histograms( + total_hist=sample_total_hist, + pred=pred, + gt=segment, + num_classes=num_classes, + ignore_index=ignore_index, + range_hists=sample_range_hists, + coord=coord_np, + distance_ranges=distance_ranges if distance_ranges else None, + ) + total_hist += torch.from_numpy(sample_total_hist).to(device=total_hist.device) + for label, hist in sample_range_hists.items(): + range_hist_tensors[label] += torch.from_numpy(hist).to(device=total_hist.device) batch_time.update(time.time() - end) logger.info( @@ -265,15 +286,12 @@ def test(self): logger.info("Syncing ...") comm.synchronize() - record_sync = comm.gather(local_results, dst=0) + if comm.get_world_size() > 1: + dist.reduce(total_hist, dst=0) + for hist in range_hist_tensors.values(): + dist.reduce(hist, dst=0) if comm.is_main_process(): - merged_results = [] - for _ in range(len(record_sync)): - r = record_sync.pop() - merged_results.extend(r) - del r - mapped_class_names = class_mapping_to_names(self.cfg.class_mapping, ignore_index) assert len(mapped_class_names) == num_classes, ( "class_mapping_to_names length must match num_classes: " f"{len(mapped_class_names)} vs {num_classes}" @@ -281,23 +299,20 @@ def test(self): label2cat = {i: mapped_class_names[i] for i in range(num_classes)} if self.cfg.data.test.type == "S3DISDataset": - total_hist = sum(fast_hist(r["pred"].ravel(), r["gt"].ravel(), num_classes) for r in merged_results) - iou = per_class_iou(total_hist) - intersection = np.diag(total_hist) - union = total_hist.sum(1) + total_hist.sum(0) - np.diag(total_hist) - target = total_hist.sum(1) + s3dis_hist = total_hist.cpu().numpy() + intersection = np.diag(s3dis_hist) + union = s3dis_hist.sum(1) + s3dis_hist.sum(0) - np.diag(s3dis_hist) + target = s3dis_hist.sum(1) torch.save( dict(intersection=intersection, union=union, target=target), os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), ) - eval_result: SegEvalResult = t4_seg_eval( - gt_labels=[r["gt"] for r in merged_results], - seg_preds=[r["pred"] for r in merged_results], + eval_result: SegEvalResult = t4_seg_eval_from_hists( + total_hist=total_hist.cpu().numpy(), label2cat=label2cat, ignore_index=ignore_index, - coords_list=[r.get("coord") for r in merged_results] if distance_ranges else None, - distance_ranges=distance_ranges if distance_ranges else None, + range_hists={label: hist.cpu().numpy() for label, hist in range_hist_tensors.items()}, logger=logger, ) From 9c3a8778a9df2a951054c7f28399d7f450431fba Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Tue, 17 Mar 2026 23:41:37 +0900 Subject: [PATCH 16/23] fix(PTv3): reorder imports due to deployment issue caused by mmcv's implicit spconv load Signed-off-by: Amadeusz Szymko --- projects/PTv3/tools/export.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/projects/PTv3/tools/export.py b/projects/PTv3/tools/export.py index a96ded2ae..de50375d1 100644 --- a/projects/PTv3/tools/export.py +++ b/projects/PTv3/tools/export.py @@ -1,5 +1,4 @@ import numpy as np -import SparseConvolution # NOTE(knzo25): do not remove this import, it is needed for onnx export import spconv.pytorch as spconv import torch from engines.defaults import ( @@ -12,6 +11,9 @@ from models.utils.structure import Point, bit_length_tensor from torch.nn import functional as F +# NOTE: do not remove this import, it is needed for onnx export +import SparseConvolution + class WrappedModel(torch.nn.Module): From 9cea0012e25eb78648c54a9fa101ac1589008d0b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:42:07 +0000 Subject: [PATCH 17/23] ci(pre-commit): autofix --- projects/PTv3/tools/export.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/PTv3/tools/export.py b/projects/PTv3/tools/export.py index de50375d1..81c26e658 100644 --- a/projects/PTv3/tools/export.py +++ b/projects/PTv3/tools/export.py @@ -1,4 +1,7 @@ import numpy as np + +# NOTE: do not remove this import, it is needed for onnx export +import SparseConvolution import spconv.pytorch as spconv import torch from engines.defaults import ( @@ -11,9 +14,6 @@ from models.utils.structure import Point, bit_length_tensor from torch.nn import functional as F -# NOTE: do not remove this import, it is needed for onnx export -import SparseConvolution - class WrappedModel(torch.nn.Module): From 9f9170150e8c447ba487672cb68960bf4464ab31 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Wed, 18 Mar 2026 00:06:27 +0900 Subject: [PATCH 18/23] fix(PTv3): reorder imports due to deployment issue caused by mmcv's implicit spconv load Signed-off-by: Amadeusz Szymko --- projects/PTv3/tools/export.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/PTv3/tools/export.py b/projects/PTv3/tools/export.py index 81c26e658..ae24c23b3 100644 --- a/projects/PTv3/tools/export.py +++ b/projects/PTv3/tools/export.py @@ -1,7 +1,4 @@ import numpy as np - -# NOTE: do not remove this import, it is needed for onnx export -import SparseConvolution import spconv.pytorch as spconv import torch from engines.defaults import ( @@ -14,6 +11,9 @@ from models.utils.structure import Point, bit_length_tensor from torch.nn import functional as F +# NOTE: keep this import last; it overrides sparse conv registration for export. +import SparseConvolution # isort: skip + class WrappedModel(torch.nn.Module): From e45c323055454b71ec3eac7991d0ca4fa11cfdb7 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Wed, 18 Mar 2026 00:17:52 +0900 Subject: [PATCH 19/23] feat(autoware_ml): unify metrics naming convention Signed-off-by: Amadeusz Szymko --- autoware_ml/hooks/__init__.py | 2 + autoware_ml/hooks/logger_hook.py | 21 +++++++ autoware_ml/hooks/t4_seg_tensorboard_hook.py | 63 +++++++++++++++++++ .../segmentation3d/evaluation/__init__.py | 3 + .../evaluation/metrics/t4_seg_metric.py | 47 ++------------ .../segmentation3d/evaluation/tensorboard.py | 61 ++++++++++++++++++ .../frnet_1xb8_t4dataset-ot128-seg.py | 4 +- .../frnet_1xb8_t4dataset-qt128-seg.py | 4 +- projects/PTv3/engines/hooks/evaluator.py | 42 ++++--------- projects/PTv3/engines/test.py | 40 ++++-------- 10 files changed, 183 insertions(+), 104 deletions(-) create mode 100644 autoware_ml/hooks/t4_seg_tensorboard_hook.py create mode 100644 autoware_ml/segmentation3d/evaluation/tensorboard.py diff --git a/autoware_ml/hooks/__init__.py b/autoware_ml/hooks/__init__.py index 62666b52e..ef3ff9871 100644 --- a/autoware_ml/hooks/__init__.py +++ b/autoware_ml/hooks/__init__.py @@ -6,6 +6,7 @@ PytorchTrainingProfilerHook, PytorchValidationProfilerHook, ) +from .t4_seg_tensorboard_hook import T4SegTensorboardHook __all__ = [ "MomentumInfoHook", @@ -14,4 +15,5 @@ "PytorchValidationProfilerHook", "LossScaleInfoHook", "LoggerHook", + "T4SegTensorboardHook", ] diff --git a/autoware_ml/hooks/logger_hook.py b/autoware_ml/hooks/logger_hook.py index 409a74f9e..8f4171e50 100644 --- a/autoware_ml/hooks/logger_hook.py +++ b/autoware_ml/hooks/logger_hook.py @@ -22,6 +22,7 @@ def __init__( log_metric_by_epoch: bool = True, backend_args: Optional[dict] = None, logging_inference_to_tensorboard: bool = False, + log_metrics_to_tensorboard: bool = True, ) -> None: """ Inherited from LoggerHook, please check the base class. @@ -39,11 +40,31 @@ def __init__( backend_args=backend_args, ) self._logging_inference_to_tensorboard = logging_inference_to_tensorboard + self._log_metrics_to_tensorboard = log_metrics_to_tensorboard # There's no test iter in https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L477 where # runner.iter doesn't increase by 1 after an iteration during inference # Note that we assume that it's running in a single-gpu environment self._test_iter = 0 + def after_val_epoch(self, runner, metrics=None) -> None: + """Optionally skip default TensorBoard metric logging for validation.""" + tag, log_str = runner.log_processor.get_log_after_epoch(runner, len(runner.val_dataloader), "val") + runner.logger.info(log_str) + if not self._log_metrics_to_tensorboard: + return + if self.log_metric_by_epoch: + if isinstance(runner._train_loop, dict) or runner._train_loop is None: + epoch = 0 + else: + epoch = runner.epoch + runner.visualizer.add_scalars(tag, step=epoch, file_path=self.json_log_path) + else: + if isinstance(runner._train_loop, dict) or runner._train_loop is None: + iter = 0 + else: + iter = runner.iter + runner.visualizer.add_scalars(tag, step=iter, file_path=self.json_log_path) + def after_test_iter(self, runner, batch_idx, data_batch=None, outputs=None): """Everything is the same to LoggerHook except it saves info to tensorboard as well.""" diff --git a/autoware_ml/hooks/t4_seg_tensorboard_hook.py b/autoware_ml/hooks/t4_seg_tensorboard_hook.py new file mode 100644 index 000000000..1f632f863 --- /dev/null +++ b/autoware_ml/hooks/t4_seg_tensorboard_hook.py @@ -0,0 +1,63 @@ +import matplotlib.pyplot as plt +from mmengine.hooks import Hook +from mmengine.registry import HOOKS +from mmengine.visualization import Visualizer + +from autoware_ml.segmentation3d.evaluation import ( + T4SegMetric, + build_t4_seg_tb_scalars, + figure_to_numpy, + iter_t4_seg_confusion_matrix_figures, +) + + +@HOOKS.register_module() +class T4SegTensorboardHook(Hook): + """Log shared T4 segmentation TensorBoard tags for MMEngine runners.""" + + priority = "LOW" + + def after_val_epoch(self, runner, metrics=None): + self._log_stage(runner, stage="val", step=runner.iter) + + def after_test_epoch(self, runner, metrics=None): + self._log_stage(runner, stage="test", step=0) + + def _log_stage(self, runner, stage: str, step: int) -> None: + metric = self._get_metric(runner, stage) + if metric is None or metric.last_eval_result is None: + return + + try: + vis = Visualizer.get_current_instance() + except Exception: + return + + class_names = [metric.last_label2cat[i] for i in sorted(metric.last_label2cat)] + scalars = build_t4_seg_tb_scalars( + metrics=metric.last_eval_result.metrics, + class_names=class_names, + stage=stage, + distance_ranges=metric.distance_ranges, + ) + if scalars: + vis.add_scalars(scalars, step=step) + + for tag, fig in iter_t4_seg_confusion_matrix_figures(metric.last_eval_result, class_names, stage): + try: + vis.add_image(tag, figure_to_numpy(fig), step=step) + except Exception: + pass + finally: + plt.close(fig) + + @staticmethod + def _get_metric(runner, stage: str): + loop = runner.val_loop if stage == "val" else runner.test_loop + evaluator = getattr(loop, "evaluator", None) + if evaluator is None: + return None + for metric in getattr(evaluator, "metrics", []): + if isinstance(metric, T4SegMetric): + return metric + return None diff --git a/autoware_ml/segmentation3d/evaluation/__init__.py b/autoware_ml/segmentation3d/evaluation/__init__.py index d0d38e2c5..d2400edc7 100644 --- a/autoware_ml/segmentation3d/evaluation/__init__.py +++ b/autoware_ml/segmentation3d/evaluation/__init__.py @@ -20,10 +20,12 @@ update_seg_eval_histograms, ) from .metrics.t4_seg_metric import T4SegMetric +from .tensorboard import build_t4_seg_tb_scalars, iter_t4_seg_confusion_matrix_figures __all__ = [ "SegEvalResult", "T4SegMetric", + "build_t4_seg_tb_scalars", "compute_bev_distance", "fast_hist", "figure_to_numpy", @@ -38,5 +40,6 @@ "range_label", "t4_seg_eval", "t4_seg_eval_from_hists", + "iter_t4_seg_confusion_matrix_figures", "update_seg_eval_histograms", ] diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py index 33142abec..497719923 100644 --- a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -5,17 +5,13 @@ import tempfile from typing import Dict, List, Optional, Sequence, Tuple -import matplotlib.pyplot as plt import mmcv import numpy as np from mmdet3d.registry import METRICS from mmengine.evaluator import BaseMetric from mmengine.logging import MMLogger -from mmengine.visualization import Visualizer from autoware_ml.segmentation3d.evaluation.functional.t4_seg_eval import ( - figure_to_numpy, - plot_confusion_matrix, t4_seg_eval, ) @@ -60,8 +56,8 @@ def __init__( self._ignore_index = ignore_index self.distance_ranges = distance_ranges or [] self.submission_prefix = submission_prefix - # Counter used as the TensorBoard global-step for CM images. - self._eval_step: int = 0 + self.last_eval_result = None + self.last_label2cat: Dict[int, str] = {} def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: """Collect one batch of model outputs for later aggregation.""" @@ -145,8 +141,8 @@ def compute_metrics(self, results: list) -> Dict[str, float]: "coordinate extraction is still misaligned." ) - self._log_confusion_matrix_images(eval_result, label2cat) - self._eval_step += 1 + self.last_eval_result = eval_result + self.last_label2cat = dict(label2cat) return eval_result.metrics @@ -302,38 +298,3 @@ def _get_ignore_index(self) -> int: return self._ignore_index meta = getattr(self, "dataset_meta", {}) or {} return int(meta.get("ignore_index", -1)) - - def _log_confusion_matrix_images(self, eval_result, label2cat: Dict[int, str]) -> None: - """Log normalised confusion-matrix images to TensorBoard (rank-0 only).""" - try: - vis = Visualizer.get_current_instance() - except Exception: - return - - num_classes = int(eval_result.cm.shape[0]) if eval_result.cm is not None else len(label2cat) - class_names = [label2cat.get(i, str(i)) for i in range(num_classes)] - step = self._eval_step - tag_prefix = f"{self.prefix}/" if self.prefix else "" - - if eval_result.cm is not None: - cm_label = "" if eval_result.cm.sum() > 0 else "empty" - fig = plot_confusion_matrix(eval_result.cm, class_names, label=cm_label) - img = figure_to_numpy(fig) - try: - vis.add_image(f"{tag_prefix}confusion_matrix", img, step=step) - except Exception: - pass - plt.close(fig) - - for lbl, rcm in eval_result.range_cms.items(): - if rcm is None: - continue - cm_label = lbl if rcm.sum() > 0 else f"{lbl} (empty)" - fig = plot_confusion_matrix(rcm, class_names, label=cm_label) - img = figure_to_numpy(fig) - tag = f"confusion_matrix_{lbl.replace('-', '_').replace(' ', '_')}" - try: - vis.add_image(f"{tag_prefix}{tag}", img, step=step) - except Exception: - pass - plt.close(fig) diff --git a/autoware_ml/segmentation3d/evaluation/tensorboard.py b/autoware_ml/segmentation3d/evaluation/tensorboard.py new file mode 100644 index 000000000..d79006f38 --- /dev/null +++ b/autoware_ml/segmentation3d/evaluation/tensorboard.py @@ -0,0 +1,61 @@ +# Copyright (c) TIER IV, Inc. All rights reserved. +"""Shared TensorBoard naming for T4 segmentation metrics.""" + +from __future__ import annotations + +from typing import Dict, Iterable, List, Optional, Tuple + +from .functional.t4_seg_eval import plot_confusion_matrix, range_label + +_SUMMARY_KEYS = ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1") + + +def build_t4_seg_tb_scalars( + metrics: Dict[str, float], + class_names: List[str], + stage: str, + distance_ranges: Optional[Iterable[Tuple[float, float]]] = None, +) -> Dict[str, float]: + """Map canonical metric keys to the shared TensorBoard naming scheme.""" + tb_scalars: Dict[str, float] = {} + + for key in _SUMMARY_KEYS: + if key in metrics: + tb_scalars[f"{stage}/{key}"] = metrics[key] + + for class_name in class_names: + if class_name in metrics: + tb_scalars[f"{stage}/class_iou/{class_name}"] = metrics[class_name] + for sub in ("precision", "recall", "f1"): + metric_key = f"{sub}/{class_name}" + if metric_key in metrics: + tb_scalars[f"{stage}/class_{sub}/{class_name}"] = metrics[metric_key] + + for lo, hi in distance_ranges or []: + bucket = range_label(lo, hi) + for key in _SUMMARY_KEYS: + metric_key = f"{bucket}/{key}" + if metric_key in metrics: + tb_scalars[f"{stage}/range/{bucket}/{key}"] = metrics[metric_key] + for class_name in class_names: + metric_key = f"{bucket}/{class_name}" + if metric_key in metrics: + tb_scalars[f"{stage}/range/{bucket}/class_iou/{class_name}"] = metrics[metric_key] + for sub in ("precision", "recall", "f1"): + metric_key = f"{bucket}/{sub}/{class_name}" + if metric_key in metrics: + tb_scalars[f"{stage}/range/{bucket}/class_{sub}/{class_name}"] = metrics[metric_key] + + return tb_scalars + + +def iter_t4_seg_confusion_matrix_figures(eval_result, class_names: List[str], stage: str): + """Yield standardised TensorBoard tags and matplotlib figures.""" + if eval_result.cm is not None and eval_result.cm.sum() > 0: + yield f"{stage}/confusion_matrix", plot_confusion_matrix(eval_result.cm, class_names) + + for bucket, range_cm in eval_result.range_cms.items(): + if range_cm is None or range_cm.sum() == 0: + continue + tag = f"{stage}/confusion_matrix_{bucket.replace('-', '_').replace(' ', '_')}" + yield tag, plot_confusion_matrix(range_cm, class_names, label=bucket) diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py index 6c0200e6c..978106382 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py @@ -9,6 +9,7 @@ "projects.FRNet.frnet.datasets", "projects.FRNet.frnet.datasets.transforms", "projects.FRNet.frnet.models", + "autoware_ml.hooks", "autoware_ml.segmentation3d.evaluation.metrics", ], allow_failed_imports=False, @@ -336,6 +337,7 @@ log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False) default_hooks = dict( - logger=dict(type="LoggerHook", log_metric_by_epoch=False), + logger=dict(type="LoggerHook", log_metric_by_epoch=False, log_metrics_to_tensorboard=False), checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou"), ) +custom_hooks = [dict(type="T4SegTensorboardHook")] diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py index a711a08b1..6f4a7f2bb 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py @@ -9,6 +9,7 @@ "projects.FRNet.frnet.datasets", "projects.FRNet.frnet.datasets.transforms", "projects.FRNet.frnet.models", + "autoware_ml.hooks", "autoware_ml.segmentation3d.evaluation.metrics", ], allow_failed_imports=False, @@ -336,6 +337,7 @@ log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False) default_hooks = dict( - logger=dict(type="LoggerHook", log_metric_by_epoch=False), + logger=dict(type="LoggerHook", log_metric_by_epoch=False, log_metrics_to_tensorboard=False), checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou"), ) +custom_hooks = [dict(type="T4SegTensorboardHook")] diff --git a/projects/PTv3/engines/hooks/evaluator.py b/projects/PTv3/engines/hooks/evaluator.py index 7fa9a9b41..409f76c65 100644 --- a/projects/PTv3/engines/hooks/evaluator.py +++ b/projects/PTv3/engines/hooks/evaluator.py @@ -17,7 +17,8 @@ from autoware_ml.segmentation3d.datasets.utils import class_mapping_to_names from autoware_ml.segmentation3d.evaluation import ( SegEvalResult, - plot_confusion_matrix, + build_t4_seg_tb_scalars, + iter_t4_seg_confusion_matrix_figures, t4_seg_eval_from_hists, update_seg_eval_histograms, ) @@ -132,36 +133,17 @@ def eval(self): writer = self.trainer.writer if writer is not None: writer.add_scalar("val/loss", loss_avg, epoch) - m = eval_result.metrics - for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): - writer.add_scalar(f"val/{key}", m.get(key, 0.0), epoch) - for name in mapped_class_names: - writer.add_scalar(f"val/class_iou/{name}", m.get(name, 0.0), epoch) - for sub in ("precision", "recall", "f1"): - writer.add_scalar(f"val/class_{sub}/{name}", m.get(f"{sub}/{name}", 0.0), epoch) - for lo, hi in distance_ranges: - lbl = f"{lo:g}-{hi:g}m" - for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): - writer.add_scalar(f"val/range/{lbl}/{key}", m.get(f"{lbl}/{key}", 0.0), epoch) - for name in mapped_class_names: - writer.add_scalar(f"val/range/{lbl}/class_iou/{name}", m.get(f"{lbl}/{name}", 0.0), epoch) - for sub in ("precision", "recall", "f1"): - writer.add_scalar( - f"val/range/{lbl}/class_{sub}/{name}", - m.get(f"{lbl}/{sub}/{name}", 0.0), - epoch, - ) - - if eval_result.cm is not None and eval_result.cm.sum() > 0: - fig = plot_confusion_matrix(eval_result.cm, mapped_class_names) - writer.add_figure("val/confusion_matrix", fig, epoch) + for tag, value in build_t4_seg_tb_scalars( + metrics=eval_result.metrics, + class_names=mapped_class_names, + stage="val", + distance_ranges=distance_ranges, + ).items(): + writer.add_scalar(tag, value, epoch) + + for tag, fig in iter_t4_seg_confusion_matrix_figures(eval_result, mapped_class_names, "val"): + writer.add_figure(tag, fig, epoch) plt.close(fig) - for lbl, rcm in eval_result.range_cms.items(): - if rcm is not None and rcm.sum() > 0: - fig = plot_confusion_matrix(rcm, mapped_class_names, label=lbl) - tag = f"val/confusion_matrix_{lbl.replace('-', '_').replace(' ', '_')}" - writer.add_figure(tag, fig, epoch) - plt.close(fig) self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") self.trainer.comm_info["current_metric_value"] = eval_result.metrics.get("miou", 0.0) diff --git a/projects/PTv3/engines/test.py b/projects/PTv3/engines/test.py index e82db4fc3..39a5ccbbc 100644 --- a/projects/PTv3/engines/test.py +++ b/projects/PTv3/engines/test.py @@ -34,7 +34,8 @@ from autoware_ml.segmentation3d.datasets.utils import class_mapping_to_names from autoware_ml.segmentation3d.evaluation import ( SegEvalResult, - plot_confusion_matrix, + build_t4_seg_tb_scalars, + iter_t4_seg_confusion_matrix_figures, t4_seg_eval_from_hists, update_seg_eval_histograms, ) @@ -317,35 +318,16 @@ def test(self): ) if self.writer is not None: - m = eval_result.metrics - for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): - self.writer.add_scalar(f"test/{key}", m.get(key, 0.0), 0) - for name in mapped_class_names: - self.writer.add_scalar(f"test/class_iou/{name}", m.get(name, 0.0), 0) - for sub in ("precision", "recall", "f1"): - self.writer.add_scalar(f"test/class_{sub}/{name}", m.get(f"{sub}/{name}", 0.0), 0) - for lo, hi in distance_ranges: - lbl = f"{lo:g}-{hi:g}m" - for key in ("miou", "acc", "acc_cls", "mprecision", "mrecall", "mf1"): - self.writer.add_scalar(f"test/range/{lbl}/{key}", m.get(f"{lbl}/{key}", 0.0), 0) - for name in mapped_class_names: - self.writer.add_scalar(f"test/range/{lbl}/class_iou/{name}", m.get(f"{lbl}/{name}", 0.0), 0) - for sub in ("precision", "recall", "f1"): - self.writer.add_scalar( - f"test/range/{lbl}/class_{sub}/{name}", - m.get(f"{lbl}/{sub}/{name}", 0.0), - 0, - ) - if eval_result.cm is not None and eval_result.cm.sum() > 0: - fig = plot_confusion_matrix(eval_result.cm, mapped_class_names) - self.writer.add_figure("test/confusion_matrix", fig, 0) + for tag, value in build_t4_seg_tb_scalars( + metrics=eval_result.metrics, + class_names=mapped_class_names, + stage="test", + distance_ranges=distance_ranges, + ).items(): + self.writer.add_scalar(tag, value, 0) + for tag, fig in iter_t4_seg_confusion_matrix_figures(eval_result, mapped_class_names, "test"): + self.writer.add_figure(tag, fig, 0) plt.close(fig) - for lbl, rcm in eval_result.range_cms.items(): - if rcm is not None and rcm.sum() > 0: - fig = plot_confusion_matrix(rcm, mapped_class_names, label=lbl) - tag = f"test/confusion_matrix_{lbl.replace('-', '_').replace(' ', '_')}" - self.writer.add_figure(tag, fig, 0) - plt.close(fig) self.writer.flush() if self.writer is not None: From 253248bcdb2621ea30694a1678fab9fffc2471c9 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Mon, 23 Mar 2026 11:38:24 +0900 Subject: [PATCH 20/23] fix(segmentation3d): exclude ignore_index from confusion matrix in T4SegMetric Signed-off-by: Amadeusz Szymko --- .../segmentation3d/evaluation/metrics/t4_seg_metric.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py index 497719923..ffea3d2e1 100644 --- a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -105,11 +105,14 @@ def compute_metrics(self, results: list) -> Dict[str, float]: ignore_index = self._get_ignore_index() label2cat = self._get_label2cat() + # Do not include ignore_index in label2cat. When ignore_index sits outside [0, num_classes) fast_hist naturally + # drops those points via its ``labels < num_classes`` mask. Adding it would expand the confusion matrix + # and pollute acc / acc_cls. + label2cat.pop(ignore_index, None) target_num_classes = self._num_classes or len(label2cat) - target_num_classes = max(target_num_classes, ignore_index + 1) for idx in range(target_num_classes): - if idx not in label2cat: - label2cat[idx] = "ignore" if idx == ignore_index else str(idx) + if idx not in label2cat and idx != ignore_index: + label2cat[idx] = str(idx) gt_labels = [r["gt"] for r in results] seg_preds = [r["pred"] for r in results] From 8ded56d27773255b97668be88acec556bb501692 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Mon, 23 Mar 2026 11:53:16 +0900 Subject: [PATCH 21/23] refactor(segmentation3d): remove unused submission export from T4SegMetric Signed-off-by: Amadeusz Szymko --- .../evaluation/metrics/t4_seg_metric.py | 52 ------------------- 1 file changed, 52 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py index ffea3d2e1..028112ee5 100644 --- a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -1,11 +1,8 @@ # Copyright (c) TIER IV, Inc. All rights reserved. """MMEngine metric adapter for shared T4 segmentation evaluation.""" -import os.path as osp -import tempfile from typing import Dict, List, Optional, Sequence, Tuple -import mmcv import numpy as np from mmdet3d.registry import METRICS from mmengine.evaluator import BaseMetric @@ -34,9 +31,6 @@ class T4SegMetric(BaseMetric): Device used for collecting results across ranks. ``'cpu'`` or ``'gpu'``. prefix: Optional metric-name prefix. - submission_prefix: - If set, predictions are exported in ScanNet TXT format to this path - instead of computing metrics. """ default_prefix: Optional[str] = None @@ -48,14 +42,12 @@ def __init__( distance_ranges: Optional[List[Tuple[float, float]]] = None, collect_device: str = "cpu", prefix: Optional[str] = None, - submission_prefix: Optional[str] = None, **kwargs, ): super().__init__(prefix=prefix, collect_device=collect_device) self._num_classes = num_classes self._ignore_index = ignore_index self.distance_ranges = distance_ranges or [] - self.submission_prefix = submission_prefix self.last_eval_result = None self.last_label2cat: Dict[int, str] = {} @@ -85,8 +77,6 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: pred=pred, gt=gt, coord=coord_i, - # Keep original annotation info for submission export. - eval_ann_info=ann_field, ) ) @@ -94,10 +84,6 @@ def compute_metrics(self, results: list) -> Dict[str, float]: """Aggregate per-batch results and return the full metrics dict.""" logger: MMLogger = MMLogger.get_current_instance() - if self.submission_prefix: - self.format_results(results) - return {} - if not results: logger.warning("T4SegMetric: no results to evaluate.") return {} @@ -149,44 +135,6 @@ def compute_metrics(self, results: list) -> Dict[str, float]: return eval_result.metrics - def format_results(self, results: list) -> None: - """Export predictions to TXT files for submission (ScanNet format).""" - submission_prefix = self.submission_prefix - if submission_prefix is None: - submission_prefix = osp.join(tempfile.mkdtemp(), "results") - mmcv.mkdir_or_exist(submission_prefix) - - ignore_index_val = self._get_ignore_index() - label2cat_map = self._get_label2cat() - base_num_labels = len(label2cat_map) - - # Ensure cat2label covers all possible prediction indices, including ignore_index. - num_labels = max( - base_num_labels, - getattr(self, "_num_classes", base_num_labels), - ) - if isinstance(ignore_index_val, int) and ignore_index_val >= 0: - num_labels = max(num_labels, ignore_index_val + 1) - - cat2label = np.zeros(num_labels, dtype=np.int64) - for out_idx, _ in label2cat_map.items(): - if out_idx != ignore_index_val and 0 <= out_idx < num_labels: - cat2label[out_idx] = out_idx - - meta = getattr(self, "dataset_meta", {}) or {} - if "label2cat" in meta: - for original_label, output_idx in meta["label2cat"].items(): - if isinstance(output_idx, int) and output_idx != ignore_index_val: - cat2label[output_idx] = original_label - - for r in results: - ann = r.get("eval_ann_info", {}) - sample_idx = (ann.get("point_cloud") or {}).get("lidar_idx", "unknown") - pred_sem = r["pred"].astype(np.int64) - pred_label = cat2label[pred_sem] - curr_file = f"{submission_prefix}/{sample_idx}.txt" - np.savetxt(curr_file, pred_label, fmt="%d") - @staticmethod def _to_numpy(v) -> Optional[np.ndarray]: """Convert tensor / array-like to a flat int64 numpy array.""" From a2a8c16afc295964a99c6b1841036982f3d09dd3 Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Wed, 25 Mar 2026 18:26:03 +0900 Subject: [PATCH 22/23] refactor(autoware_ml): dedicated logger hook Signed-off-by: Amadeusz Szymko --- autoware_ml/hooks/__init__.py | 2 ++ autoware_ml/hooks/logger_hook.py | 21 ------------------- autoware_ml/hooks/t4_seg_logger_hook.py | 13 ++++++++++++ .../frnet_1xb8_t4dataset-ot128-seg.py | 2 +- .../frnet_1xb8_t4dataset-qt128-seg.py | 2 +- 5 files changed, 17 insertions(+), 23 deletions(-) create mode 100644 autoware_ml/hooks/t4_seg_logger_hook.py diff --git a/autoware_ml/hooks/__init__.py b/autoware_ml/hooks/__init__.py index ef3ff9871..ada9b7621 100644 --- a/autoware_ml/hooks/__init__.py +++ b/autoware_ml/hooks/__init__.py @@ -6,6 +6,7 @@ PytorchTrainingProfilerHook, PytorchValidationProfilerHook, ) +from .t4_seg_logger_hook import T4SegLoggerHook from .t4_seg_tensorboard_hook import T4SegTensorboardHook __all__ = [ @@ -15,5 +16,6 @@ "PytorchValidationProfilerHook", "LossScaleInfoHook", "LoggerHook", + "T4SegLoggerHook", "T4SegTensorboardHook", ] diff --git a/autoware_ml/hooks/logger_hook.py b/autoware_ml/hooks/logger_hook.py index 8f4171e50..409a74f9e 100644 --- a/autoware_ml/hooks/logger_hook.py +++ b/autoware_ml/hooks/logger_hook.py @@ -22,7 +22,6 @@ def __init__( log_metric_by_epoch: bool = True, backend_args: Optional[dict] = None, logging_inference_to_tensorboard: bool = False, - log_metrics_to_tensorboard: bool = True, ) -> None: """ Inherited from LoggerHook, please check the base class. @@ -40,31 +39,11 @@ def __init__( backend_args=backend_args, ) self._logging_inference_to_tensorboard = logging_inference_to_tensorboard - self._log_metrics_to_tensorboard = log_metrics_to_tensorboard # There's no test iter in https://github.com/open-mmlab/mmengine/blob/main/mmengine/runner/loops.py#L477 where # runner.iter doesn't increase by 1 after an iteration during inference # Note that we assume that it's running in a single-gpu environment self._test_iter = 0 - def after_val_epoch(self, runner, metrics=None) -> None: - """Optionally skip default TensorBoard metric logging for validation.""" - tag, log_str = runner.log_processor.get_log_after_epoch(runner, len(runner.val_dataloader), "val") - runner.logger.info(log_str) - if not self._log_metrics_to_tensorboard: - return - if self.log_metric_by_epoch: - if isinstance(runner._train_loop, dict) or runner._train_loop is None: - epoch = 0 - else: - epoch = runner.epoch - runner.visualizer.add_scalars(tag, step=epoch, file_path=self.json_log_path) - else: - if isinstance(runner._train_loop, dict) or runner._train_loop is None: - iter = 0 - else: - iter = runner.iter - runner.visualizer.add_scalars(tag, step=iter, file_path=self.json_log_path) - def after_test_iter(self, runner, batch_idx, data_batch=None, outputs=None): """Everything is the same to LoggerHook except it saves info to tensorboard as well.""" diff --git a/autoware_ml/hooks/t4_seg_logger_hook.py b/autoware_ml/hooks/t4_seg_logger_hook.py new file mode 100644 index 000000000..2c3fa5d12 --- /dev/null +++ b/autoware_ml/hooks/t4_seg_logger_hook.py @@ -0,0 +1,13 @@ +from mmengine.registry import HOOKS + +from .logger_hook import LoggerHook + + +@HOOKS.register_module() +class T4SegLoggerHook(LoggerHook): + """Logger hook for T4 segmentation configs using custom TensorBoard metric tags.""" + + def after_val_epoch(self, runner, metrics=None) -> None: + """Log validation results without the default TensorBoard scalar dump.""" + _, log_str = runner.log_processor.get_log_after_epoch(runner, len(runner.val_dataloader), "val") + runner.logger.info(log_str) diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py index 978106382..65f981b99 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-ot128-seg.py @@ -337,7 +337,7 @@ log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False) default_hooks = dict( - logger=dict(type="LoggerHook", log_metric_by_epoch=False, log_metrics_to_tensorboard=False), + logger=dict(type="T4SegLoggerHook", log_metric_by_epoch=False), checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou"), ) custom_hooks = [dict(type="T4SegTensorboardHook")] diff --git a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py index 6f4a7f2bb..c614f9758 100644 --- a/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py +++ b/projects/FRNet/configs/t4dataset/frnet_1xb8_t4dataset-qt128-seg.py @@ -337,7 +337,7 @@ log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False) default_hooks = dict( - logger=dict(type="LoggerHook", log_metric_by_epoch=False, log_metrics_to_tensorboard=False), + logger=dict(type="T4SegLoggerHook", log_metric_by_epoch=False), checkpoint=dict(type="CheckpointHook", by_epoch=False, interval=-1, save_best="miou"), ) custom_hooks = [dict(type="T4SegTensorboardHook")] From eedb405c72af8d5e4f95e1366cdf1b789f6fe0da Mon Sep 17 00:00:00 2001 From: Amadeusz Szymko Date: Wed, 25 Mar 2026 18:32:41 +0900 Subject: [PATCH 23/23] feat(segmentation3d): log warning & use dataclass Signed-off-by: Amadeusz Szymko --- .../evaluation/metrics/t4_seg_metric.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py index 028112ee5..a1baf8e24 100644 --- a/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py +++ b/autoware_ml/segmentation3d/evaluation/metrics/t4_seg_metric.py @@ -1,6 +1,7 @@ # Copyright (c) TIER IV, Inc. All rights reserved. """MMEngine metric adapter for shared T4 segmentation evaluation.""" +from dataclasses import dataclass from typing import Dict, List, Optional, Sequence, Tuple import numpy as np @@ -13,6 +14,13 @@ ) +@dataclass +class T4SegMetricSample: + pred: np.ndarray + gt: np.ndarray + coord: Optional[np.ndarray] = None + + @METRICS.register_module() class T4SegMetric(BaseMetric): """3D semantic segmentation evaluation metric for T4 datasets. @@ -54,6 +62,7 @@ def __init__( def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: """Collect one batch of model outputs for later aggregation.""" batch_coords = self._extract_batch_coords(data_batch, data_samples) + logger: MMLogger = MMLogger.get_current_instance() for i, data_sample in enumerate(data_samples): pred_field = data_sample.get("pred_pts_seg", {}) @@ -62,7 +71,14 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: pred = self._to_numpy(pred_field.get("pts_semantic_mask")) gt = self._to_numpy(ann_field.get("pts_semantic_mask")) - if pred is None or gt is None or pred.size != gt.size: + if pred is None or gt is None: + logger.warning("T4SegMetric: skipping sample with missing prediction or ground-truth labels.") + continue + if pred.size != gt.size: + logger.warning( + "T4SegMetric: skipping sample because prediction and ground-truth lengths differ: " + f"{pred.size} vs {gt.size}." + ) continue coord_i = batch_coords[i] if batch_coords else None @@ -72,13 +88,7 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: elif coord_i.shape[0] < gt.size: coord_i = None - self.results.append( - dict( - pred=pred, - gt=gt, - coord=coord_i, - ) - ) + self.results.append(T4SegMetricSample(pred=pred, gt=gt, coord=coord_i)) def compute_metrics(self, results: list) -> Dict[str, float]: """Aggregate per-batch results and return the full metrics dict.""" @@ -100,9 +110,9 @@ def compute_metrics(self, results: list) -> Dict[str, float]: if idx not in label2cat and idx != ignore_index: label2cat[idx] = str(idx) - gt_labels = [r["gt"] for r in results] - seg_preds = [r["pred"] for r in results] - coords_list = [r.get("coord") for r in results] if self.distance_ranges else None + gt_labels = [r.gt for r in results] + seg_preds = [r.pred for r in results] + coords_list = [r.coord for r in results] if self.distance_ranges else None if self.distance_ranges and (not coords_list or all(c is None for c in coords_list)): logger.warning( "T4SegMetric: distance_ranges is configured but no coordinates "