From 71d6670e7d36b9185eac056f1d2e5ef175d9913f Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Wed, 16 Jul 2025 19:50:37 +0200
Subject: [PATCH 01/24] fix(metrics): update area calculation in
 MeanAveragePrecision to use area property as fallback if it's not COCO

---
 supervision/metrics/mean_average_precision.py |  10 +-
 .../test_mean_average_precision_area.py       | 179 ++++++++++++++++++
 2 files changed, 187 insertions(+), 2 deletions(-)
 create mode 100644 test/metrics/test_mean_average_precision_area.py

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 5100f39ac1..14b0cbf057 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -1221,7 +1221,7 @@ def _prepare_targets(self, targets):
         for image_id, image_targets in enumerate(targets):
             if self._image_indices is not None:
                 image_id = self._image_indices[image_id]
-            for target in image_targets:
+            for target_idx, target in enumerate(image_targets):
                 xyxy = target[0]  # or xyxy = prediction[0]; xyxy[2:4] -= xyxy[0:2]
                 xywh = [xyxy[0], xyxy[1], xyxy[2] - xyxy[0], xyxy[3] - xyxy[1]]
                 # Get "area" and "iscrowd" (default 0) from data
@@ -1231,8 +1231,14 @@ def _prepare_targets(self, targets):
                     category_id = self._class_mapping[target[3].item()]
                 else:
                     category_id = target[3].item()
+                
+                # Use area from data if available, otherwise use Detections.area property
+                area = data.get("area") if data else None
+                if area is None:
+                    area = image_targets.area[target_idx]  # Use supervision's .area property
+                
                 dict_annotation = {
-                    "area": data.get("area", 0),
+                    "area": area,
                     "iscrowd": data.get("iscrowd", 0),
                     "image_id": image_id,
                     "bbox": xywh,
diff --git a/test/metrics/test_mean_average_precision_area.py b/test/metrics/test_mean_average_precision_area.py
new file mode 100644
index 0000000000..5a3af7d546
--- /dev/null
+++ b/test/metrics/test_mean_average_precision_area.py
@@ -0,0 +1,179 @@
+from __future__ import annotations
+
+import numpy as np
+
+from supervision.detection.core import Detections
+from supervision.metrics.mean_average_precision import MeanAveragePrecision
+
+
+class TestMeanAveragePrecisionArea:
+    """Test area calculation in MeanAveragePrecision."""
+
+    def test_area_calculated_from_bbox_when_data_empty(self):
+        """Test that area is calculated from bbox when data is empty (normal case)."""
+        # Create detections with empty data (normal case)
+        gt = Detections(
+            xyxy=np.array([
+                [10, 10, 40, 40],      # Small: 30x30 = 900
+                [100, 100, 200, 150],  # Medium: 100x50 = 5000
+                [300, 300, 500, 400]   # Large: 200x100 = 20000
+            ], dtype=np.float32),
+            class_id=np.array([0, 0, 0]),
+            confidence=np.array([1.0, 1.0, 1.0])
+        )
+        
+        pred = Detections(
+            xyxy=gt.xyxy.copy(),
+            class_id=gt.class_id.copy(),
+            confidence=np.array([0.9, 0.9, 0.9])
+        )
+        
+        # Verify data is empty (normal case)
+        assert gt.data == {}
+        assert pred.data == {}
+        
+        # Create mAP metric and test area calculation
+        map_metric = MeanAveragePrecision()
+        map_metric.update([pred], [gt])
+        
+        # Check that areas were calculated correctly from bbox
+        prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
+        
+        areas = [ann["area"] for ann in prepared_targets["annotations"]]
+        expected_areas = [900.0, 5000.0, 20000.0]  # width * height for each bbox
+        
+        assert np.allclose(areas, expected_areas, rtol=1e-05, atol=1e-08), f"Expected {expected_areas}, got {areas}"
+        
+        # Verify mAP works correctly (no -1.0 for medium/large objects)
+        result = map_metric.compute()
+        assert result.medium_objects.map50 >= 0.0, "Medium objects should have valid mAP"
+        assert result.large_objects.map50 >= 0.0, "Large objects should have valid mAP"
+
+    def test_area_preserved_when_provided_in_data(self):
+        """Test that area from data is preserved when provided (COCO case)."""
+        # Create detections with area in data (COCO style)
+        gt = Detections(
+            xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),  # Would be 5000
+            class_id=np.array([0]),
+            confidence=np.array([1.0])
+        )
+        
+        # Add custom area to data (different from calculated)
+        gt.data = {"area": np.array([3000.0])}
+        
+        pred = Detections(
+            xyxy=gt.xyxy.copy(),
+            class_id=gt.class_id.copy(),
+            confidence=np.array([0.9])
+        )
+        pred.data = {"area": np.array([3000.0])}
+        
+        # Test area calculation
+        map_metric = MeanAveragePrecision()
+        map_metric.update([pred], [gt])
+        
+        # Check that provided area is used (not calculated)
+        prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
+        used_area = prepared_targets["annotations"][0]["area"]
+        
+        assert np.allclose(used_area, 3000.0, rtol=1e-05, atol=1e-08), f"Should use provided area 3000.0, got {used_area}"
+        
+        # Verify it's different from what would be calculated
+        calculated_area = (200 - 100) * (150 - 100)  # 100 * 50 = 5000
+        assert not np.allclose(used_area, calculated_area, rtol=1e-05, atol=1e-08), "Should use provided area, not calculated"
+
+    def test_mixed_area_sources(self):
+        """Test mix of detections with and without area in data."""
+        # Create detections where some have area in data, others don't
+        gt1 = Detections(
+            xyxy=np.array([[10, 10, 40, 40]], dtype=np.float32),  # 900
+            class_id=np.array([0])
+        )
+        # No area in data - should be calculated
+        
+        gt2 = Detections(
+            xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),  # 5000
+            class_id=np.array([1])
+        )
+        # Add area in data - should be preserved
+        gt2.data = {"area": np.array([3000.0])}
+        
+        pred1 = Detections(
+            xyxy=gt1.xyxy.copy(),
+            class_id=gt1.class_id.copy(),
+            confidence=np.array([0.9])
+        )
+        
+        pred2 = Detections(
+            xyxy=gt2.xyxy.copy(),
+            class_id=gt2.class_id.copy(),
+            confidence=np.array([0.8])
+        )
+        pred2.data = {"area": np.array([3000.0])}
+        
+        # Test area calculation for mixed sources
+        map_metric = MeanAveragePrecision()
+        map_metric.update([pred1, pred2], [gt1, gt2])
+        
+        prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
+        areas = [ann["area"] for ann in prepared_targets["annotations"]]
+        
+        expected_areas = [900.0, 3000.0]  # calculated, then provided
+        assert np.allclose(areas, expected_areas, rtol=1e-05, atol=1e-08), f"Expected {expected_areas}, got {areas}"
+
+    def test_size_specific_map_works_correctly(self):
+        """Test that size-specific mAP works correctly with area fix."""
+        # Create detections with one object of each size
+        gt = Detections(
+            xyxy=np.array([
+                [10, 10, 40, 40],      # Small: 30x30 = 900 < 1024
+                [100, 100, 200, 150],  # Medium: 100x50 = 5000 (1024 <= x < 9216)
+                [300, 300, 500, 400]   # Large: 200x100 = 20000 >= 9216
+            ], dtype=np.float32),
+            class_id=np.array([0, 0, 0])
+        )
+        
+        # Perfect predictions
+        pred = Detections(
+            xyxy=gt.xyxy.copy(),
+            class_id=gt.class_id.copy(),
+            confidence=np.array([0.9, 0.9, 0.9])
+        )
+        
+        # Test mAP calculation
+        map_metric = MeanAveragePrecision()
+        map_metric.update([pred], [gt])
+        result = map_metric.compute()
+        
+        # All size categories should have valid results (not -1.0)
+        assert result.small_objects.map50 >= 0.0, "Small objects should have valid mAP"
+        assert result.medium_objects.map50 >= 0.0, "Medium objects should have valid mAP"
+        assert result.large_objects.map50 >= 0.0, "Large objects should have valid mAP"
+        
+        # Perfect matches should yield high mAP for medium and large
+        assert result.medium_objects.map50 > 0.9, "Perfect medium matches should have high mAP"
+        assert result.large_objects.map50 > 0.9, "Perfect large matches should have high mAP"
+
+    def test_area_uses_detections_property(self):
+        """Test that area calculation uses Detections.area property correctly."""
+        # Create detection
+        gt = Detections(
+            xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),
+            class_id=np.array([0])
+        )
+        
+        pred = Detections(
+            xyxy=gt.xyxy.copy(),
+            class_id=gt.class_id.copy(),
+            confidence=np.array([0.9])
+        )
+        
+        # Test that internal calculation matches Detections.area property
+        map_metric = MeanAveragePrecision()
+        map_metric.update([pred], [gt])
+        
+        prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
+        used_area = prepared_targets["annotations"][0]["area"]
+        expected_area = gt.area[0]
+        
+        assert np.allclose(used_area, expected_area, rtol=1e-05, atol=1e-08), f"Should use Detections.area property {expected_area}, got {used_area}" 
\ No newline at end of file

From 3f926c618aa585356b0ac8f3279cc52daf31fa47 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Wed, 16 Jul 2025 19:51:59 +0200
Subject: [PATCH 02/24] fix: cleanup comments

---
 supervision/metrics/mean_average_precision.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 14b0cbf057..840494dd75 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -1232,10 +1232,10 @@ def _prepare_targets(self, targets):
                 else:
                     category_id = target[3].item()
                 
-                # Use area from data if available, otherwise use Detections.area property
+                # Use area from data if available (coco format), otherwise use Detections.area property
                 area = data.get("area") if data else None
                 if area is None:
-                    area = image_targets.area[target_idx]  # Use supervision's .area property
+                    area = image_targets.area[target_idx]
                 
                 dict_annotation = {
                     "area": area,

From 4009f9d88653a60ed3b15d5e2cb132975659c364 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 16 Jul 2025 18:00:42 +0000
Subject: [PATCH 03/24] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/metrics/mean_average_precision.py |   4 +-
 .../test_mean_average_precision_area.py       | 146 ++++++++++--------
 2 files changed, 85 insertions(+), 65 deletions(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 840494dd75..bb1147d89c 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -1231,12 +1231,12 @@ def _prepare_targets(self, targets):
                     category_id = self._class_mapping[target[3].item()]
                 else:
                     category_id = target[3].item()
-                
+
                 # Use area from data if available (coco format), otherwise use Detections.area property
                 area = data.get("area") if data else None
                 if area is None:
                     area = image_targets.area[target_idx]
-                
+
                 dict_annotation = {
                     "area": area,
                     "iscrowd": data.get("iscrowd", 0),
diff --git a/test/metrics/test_mean_average_precision_area.py b/test/metrics/test_mean_average_precision_area.py
index 5a3af7d546..17257701a8 100644
--- a/test/metrics/test_mean_average_precision_area.py
+++ b/test/metrics/test_mean_average_precision_area.py
@@ -13,40 +13,47 @@ def test_area_calculated_from_bbox_when_data_empty(self):
         """Test that area is calculated from bbox when data is empty (normal case)."""
         # Create detections with empty data (normal case)
         gt = Detections(
-            xyxy=np.array([
-                [10, 10, 40, 40],      # Small: 30x30 = 900
-                [100, 100, 200, 150],  # Medium: 100x50 = 5000
-                [300, 300, 500, 400]   # Large: 200x100 = 20000
-            ], dtype=np.float32),
+            xyxy=np.array(
+                [
+                    [10, 10, 40, 40],  # Small: 30x30 = 900
+                    [100, 100, 200, 150],  # Medium: 100x50 = 5000
+                    [300, 300, 500, 400],  # Large: 200x100 = 20000
+                ],
+                dtype=np.float32,
+            ),
             class_id=np.array([0, 0, 0]),
-            confidence=np.array([1.0, 1.0, 1.0])
+            confidence=np.array([1.0, 1.0, 1.0]),
         )
-        
+
         pred = Detections(
             xyxy=gt.xyxy.copy(),
             class_id=gt.class_id.copy(),
-            confidence=np.array([0.9, 0.9, 0.9])
+            confidence=np.array([0.9, 0.9, 0.9]),
         )
-        
+
         # Verify data is empty (normal case)
         assert gt.data == {}
         assert pred.data == {}
-        
+
         # Create mAP metric and test area calculation
         map_metric = MeanAveragePrecision()
         map_metric.update([pred], [gt])
-        
+
         # Check that areas were calculated correctly from bbox
         prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
-        
+
         areas = [ann["area"] for ann in prepared_targets["annotations"]]
         expected_areas = [900.0, 5000.0, 20000.0]  # width * height for each bbox
-        
-        assert np.allclose(areas, expected_areas, rtol=1e-05, atol=1e-08), f"Expected {expected_areas}, got {areas}"
-        
+
+        assert np.allclose(areas, expected_areas, rtol=1e-05, atol=1e-08), (
+            f"Expected {expected_areas}, got {areas}"
+        )
+
         # Verify mAP works correctly (no -1.0 for medium/large objects)
         result = map_metric.compute()
-        assert result.medium_objects.map50 >= 0.0, "Medium objects should have valid mAP"
+        assert result.medium_objects.map50 >= 0.0, (
+            "Medium objects should have valid mAP"
+        )
         assert result.large_objects.map50 >= 0.0, "Large objects should have valid mAP"
 
     def test_area_preserved_when_provided_in_data(self):
@@ -55,125 +62,138 @@ def test_area_preserved_when_provided_in_data(self):
         gt = Detections(
             xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),  # Would be 5000
             class_id=np.array([0]),
-            confidence=np.array([1.0])
+            confidence=np.array([1.0]),
         )
-        
+
         # Add custom area to data (different from calculated)
         gt.data = {"area": np.array([3000.0])}
-        
+
         pred = Detections(
-            xyxy=gt.xyxy.copy(),
-            class_id=gt.class_id.copy(),
-            confidence=np.array([0.9])
+            xyxy=gt.xyxy.copy(), class_id=gt.class_id.copy(), confidence=np.array([0.9])
         )
         pred.data = {"area": np.array([3000.0])}
-        
+
         # Test area calculation
         map_metric = MeanAveragePrecision()
         map_metric.update([pred], [gt])
-        
+
         # Check that provided area is used (not calculated)
         prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
         used_area = prepared_targets["annotations"][0]["area"]
-        
-        assert np.allclose(used_area, 3000.0, rtol=1e-05, atol=1e-08), f"Should use provided area 3000.0, got {used_area}"
-        
+
+        assert np.allclose(used_area, 3000.0, rtol=1e-05, atol=1e-08), (
+            f"Should use provided area 3000.0, got {used_area}"
+        )
+
         # Verify it's different from what would be calculated
         calculated_area = (200 - 100) * (150 - 100)  # 100 * 50 = 5000
-        assert not np.allclose(used_area, calculated_area, rtol=1e-05, atol=1e-08), "Should use provided area, not calculated"
+        assert not np.allclose(used_area, calculated_area, rtol=1e-05, atol=1e-08), (
+            "Should use provided area, not calculated"
+        )
 
     def test_mixed_area_sources(self):
         """Test mix of detections with and without area in data."""
         # Create detections where some have area in data, others don't
         gt1 = Detections(
             xyxy=np.array([[10, 10, 40, 40]], dtype=np.float32),  # 900
-            class_id=np.array([0])
+            class_id=np.array([0]),
         )
         # No area in data - should be calculated
-        
+
         gt2 = Detections(
             xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),  # 5000
-            class_id=np.array([1])
+            class_id=np.array([1]),
         )
         # Add area in data - should be preserved
         gt2.data = {"area": np.array([3000.0])}
-        
+
         pred1 = Detections(
             xyxy=gt1.xyxy.copy(),
             class_id=gt1.class_id.copy(),
-            confidence=np.array([0.9])
+            confidence=np.array([0.9]),
         )
-        
+
         pred2 = Detections(
             xyxy=gt2.xyxy.copy(),
             class_id=gt2.class_id.copy(),
-            confidence=np.array([0.8])
+            confidence=np.array([0.8]),
         )
         pred2.data = {"area": np.array([3000.0])}
-        
+
         # Test area calculation for mixed sources
         map_metric = MeanAveragePrecision()
         map_metric.update([pred1, pred2], [gt1, gt2])
-        
+
         prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
         areas = [ann["area"] for ann in prepared_targets["annotations"]]
-        
+
         expected_areas = [900.0, 3000.0]  # calculated, then provided
-        assert np.allclose(areas, expected_areas, rtol=1e-05, atol=1e-08), f"Expected {expected_areas}, got {areas}"
+        assert np.allclose(areas, expected_areas, rtol=1e-05, atol=1e-08), (
+            f"Expected {expected_areas}, got {areas}"
+        )
 
     def test_size_specific_map_works_correctly(self):
         """Test that size-specific mAP works correctly with area fix."""
         # Create detections with one object of each size
         gt = Detections(
-            xyxy=np.array([
-                [10, 10, 40, 40],      # Small: 30x30 = 900 < 1024
-                [100, 100, 200, 150],  # Medium: 100x50 = 5000 (1024 <= x < 9216)
-                [300, 300, 500, 400]   # Large: 200x100 = 20000 >= 9216
-            ], dtype=np.float32),
-            class_id=np.array([0, 0, 0])
-        )
-        
+            xyxy=np.array(
+                [
+                    [10, 10, 40, 40],  # Small: 30x30 = 900 < 1024
+                    [100, 100, 200, 150],  # Medium: 100x50 = 5000 (1024 <= x < 9216)
+                    [300, 300, 500, 400],  # Large: 200x100 = 20000 >= 9216
+                ],
+                dtype=np.float32,
+            ),
+            class_id=np.array([0, 0, 0]),
+        )
+
         # Perfect predictions
         pred = Detections(
             xyxy=gt.xyxy.copy(),
             class_id=gt.class_id.copy(),
-            confidence=np.array([0.9, 0.9, 0.9])
+            confidence=np.array([0.9, 0.9, 0.9]),
         )
-        
+
         # Test mAP calculation
         map_metric = MeanAveragePrecision()
         map_metric.update([pred], [gt])
         result = map_metric.compute()
-        
+
         # All size categories should have valid results (not -1.0)
         assert result.small_objects.map50 >= 0.0, "Small objects should have valid mAP"
-        assert result.medium_objects.map50 >= 0.0, "Medium objects should have valid mAP"
+        assert result.medium_objects.map50 >= 0.0, (
+            "Medium objects should have valid mAP"
+        )
         assert result.large_objects.map50 >= 0.0, "Large objects should have valid mAP"
-        
+
         # Perfect matches should yield high mAP for medium and large
-        assert result.medium_objects.map50 > 0.9, "Perfect medium matches should have high mAP"
-        assert result.large_objects.map50 > 0.9, "Perfect large matches should have high mAP"
+        assert result.medium_objects.map50 > 0.9, (
+            "Perfect medium matches should have high mAP"
+        )
+        assert result.large_objects.map50 > 0.9, (
+            "Perfect large matches should have high mAP"
+        )
 
     def test_area_uses_detections_property(self):
         """Test that area calculation uses Detections.area property correctly."""
         # Create detection
         gt = Detections(
             xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),
-            class_id=np.array([0])
+            class_id=np.array([0]),
         )
-        
+
         pred = Detections(
-            xyxy=gt.xyxy.copy(),
-            class_id=gt.class_id.copy(),
-            confidence=np.array([0.9])
+            xyxy=gt.xyxy.copy(), class_id=gt.class_id.copy(), confidence=np.array([0.9])
         )
-        
+
         # Test that internal calculation matches Detections.area property
         map_metric = MeanAveragePrecision()
         map_metric.update([pred], [gt])
-        
+
         prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
         used_area = prepared_targets["annotations"][0]["area"]
         expected_area = gt.area[0]
-        
-        assert np.allclose(used_area, expected_area, rtol=1e-05, atol=1e-08), f"Should use Detections.area property {expected_area}, got {used_area}" 
\ No newline at end of file
+
+        assert np.allclose(used_area, expected_area, rtol=1e-05, atol=1e-08), (
+            f"Should use Detections.area property {expected_area}, got {used_area}"
+        )

From a444b3bde878ff5ef860a2bbb2c72d7a30a92729 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Wed, 16 Jul 2025 20:06:27 +0200
Subject: [PATCH 04/24] fix: cleanup comment

---
 supervision/metrics/mean_average_precision.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 840494dd75..fd273916d9 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -1232,7 +1232,8 @@ def _prepare_targets(self, targets):
                 else:
                     category_id = target[3].item()
                 
-                # Use area from data if available (coco format), otherwise use Detections.area property
+                # Use area from data if available (e.g., COCO datasets)
+                # Otherwise use Detections.area property
                 area = data.get("area") if data else None
                 if area is None:
                     area = image_targets.area[target_idx]

From 40d4c6f554112591db55fe21c7759a515f8f4189 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 16 Jul 2025 18:07:48 +0000
Subject: [PATCH 05/24] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/metrics/mean_average_precision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index bd83a4bbe7..53965296ed 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -1231,7 +1231,7 @@ def _prepare_targets(self, targets):
                     category_id = self._class_mapping[target[3].item()]
                 else:
                     category_id = target[3].item()
-                
+
                 # Use area from data if available (e.g., COCO datasets)
                 # Otherwise use Detections.area property
                 area = data.get("area") if data else None

From 0ddbcb2010131973c377e47a60cb3d3ccea5c3f3 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Thu, 17 Jul 2025 09:49:21 +0200
Subject: [PATCH 06/24] fix: start id with 1 because 0 means no match

---
 supervision/metrics/mean_average_precision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 5100f39ac1..e22eb32256 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -1237,7 +1237,7 @@ def _prepare_targets(self, targets):
                     "image_id": image_id,
                     "bbox": xywh,
                     "category_id": category_id,
-                    "id": len(annotations),  # incrementally increase the id
+                    "id": len(annotations) + 1,  # incrementally increase the id
                 }
                 annotations.append(dict_annotation)
         # Category list

From 2c67cee720b39cbc26d5b1143aedb572e889d493 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Thu, 17 Jul 2025 09:57:55 +0200
Subject: [PATCH 07/24] fix: tests and adjust comment

---
 supervision/metrics/mean_average_precision.py |  2 +-
 test/metrics/test_mean_average_precision.py   | 66 +++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 test/metrics/test_mean_average_precision.py

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index e22eb32256..f5fa315926 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -1237,7 +1237,7 @@ def _prepare_targets(self, targets):
                     "image_id": image_id,
                     "bbox": xywh,
                     "category_id": category_id,
-                    "id": len(annotations) + 1,  # incrementally increase the id
+                    "id": len(annotations) + 1,  # Start IDs from 1 (0 means no match)
                 }
                 annotations.append(dict_annotation)
         # Category list
diff --git a/test/metrics/test_mean_average_precision.py b/test/metrics/test_mean_average_precision.py
new file mode 100644
index 0000000000..b3b6824952
--- /dev/null
+++ b/test/metrics/test_mean_average_precision.py
@@ -0,0 +1,66 @@
+"""
+Tests for Mean Average Precision ID=0 bug fix
+"""
+import numpy as np
+import pytest
+
+from supervision.detection.core import Detections
+from supervision.metrics.mean_average_precision import MeanAveragePrecision
+
+
+def test_single_perfect_detection():
+    """Test that single perfect detection gets 1.0 mAP (not 0.0 due to ID=0 bug)"""
+    # Perfect detection (identical prediction and target)
+    detection = Detections(
+        xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64),
+        class_id=np.array([0]),
+        confidence=np.array([0.9])
+    )
+    
+    metric = MeanAveragePrecision()
+    metric.update([detection], [detection])
+    result = metric.compute()
+    
+    # Should be perfect 1.0 mAP, not 0.0 due to ID=0 bug
+    assert abs(result.map50_95 - 1.0) < 1e-6
+
+
+def test_multiple_perfect_detections():
+    """Test that multiple perfect detections get 1.0 mAP"""
+    # Multiple perfect detections in one image
+    detections = Detections(
+        xyxy=np.array([
+            [10, 10, 50, 50],
+            [100, 100, 140, 140],
+            [200, 200, 240, 240]
+        ], dtype=np.float64),
+        class_id=np.array([0, 0, 0]),
+        confidence=np.array([0.9, 0.9, 0.9])
+    )
+    
+    metric = MeanAveragePrecision()
+    metric.update([detections], [detections])
+    result = metric.compute()
+    
+    # Should be perfect 1.0 mAP
+    assert abs(result.map50_95 - 1.0) < 1e-6
+
+
+def test_batch_updates_perfect_detections():
+    """Test that batch updates with perfect detections get 1.0 mAP"""
+    # Single perfect detection for multiple batch updates
+    detection = Detections(
+        xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64),
+        class_id=np.array([0]),
+        confidence=np.array([0.9])
+    )
+    
+    metric = MeanAveragePrecision()
+    # Add 3 batch updates
+    metric.update([detection], [detection])
+    metric.update([detection], [detection])
+    metric.update([detection], [detection])
+    result = metric.compute()
+    
+    # Should be perfect 1.0 mAP across all batches
+    assert abs(result.map50_95 - 1.0) < 1e-6 
\ No newline at end of file

From 2907c40ecce4a5d9e8e957da08fc636c2d6af804 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 17 Jul 2025 07:59:58 +0000
Subject: [PATCH 08/24] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/metrics/test_mean_average_precision.py | 31 ++++++++++-----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/test/metrics/test_mean_average_precision.py b/test/metrics/test_mean_average_precision.py
index b3b6824952..bb0f91eff2 100644
--- a/test/metrics/test_mean_average_precision.py
+++ b/test/metrics/test_mean_average_precision.py
@@ -1,8 +1,8 @@
 """
 Tests for Mean Average Precision ID=0 bug fix
 """
+
 import numpy as np
-import pytest
 
 from supervision.detection.core import Detections
 from supervision.metrics.mean_average_precision import MeanAveragePrecision
@@ -14,13 +14,13 @@ def test_single_perfect_detection():
     detection = Detections(
         xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64),
         class_id=np.array([0]),
-        confidence=np.array([0.9])
+        confidence=np.array([0.9]),
     )
-    
+
     metric = MeanAveragePrecision()
     metric.update([detection], [detection])
     result = metric.compute()
-    
+
     # Should be perfect 1.0 mAP, not 0.0 due to ID=0 bug
     assert abs(result.map50_95 - 1.0) < 1e-6
 
@@ -29,19 +29,18 @@ def test_multiple_perfect_detections():
     """Test that multiple perfect detections get 1.0 mAP"""
     # Multiple perfect detections in one image
     detections = Detections(
-        xyxy=np.array([
-            [10, 10, 50, 50],
-            [100, 100, 140, 140],
-            [200, 200, 240, 240]
-        ], dtype=np.float64),
+        xyxy=np.array(
+            [[10, 10, 50, 50], [100, 100, 140, 140], [200, 200, 240, 240]],
+            dtype=np.float64,
+        ),
         class_id=np.array([0, 0, 0]),
-        confidence=np.array([0.9, 0.9, 0.9])
+        confidence=np.array([0.9, 0.9, 0.9]),
     )
-    
+
     metric = MeanAveragePrecision()
     metric.update([detections], [detections])
     result = metric.compute()
-    
+
     # Should be perfect 1.0 mAP
     assert abs(result.map50_95 - 1.0) < 1e-6
 
@@ -52,15 +51,15 @@ def test_batch_updates_perfect_detections():
     detection = Detections(
         xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64),
         class_id=np.array([0]),
-        confidence=np.array([0.9])
+        confidence=np.array([0.9]),
     )
-    
+
     metric = MeanAveragePrecision()
     # Add 3 batch updates
     metric.update([detection], [detection])
     metric.update([detection], [detection])
     metric.update([detection], [detection])
     result = metric.compute()
-    
+
     # Should be perfect 1.0 mAP across all batches
-    assert abs(result.map50_95 - 1.0) < 1e-6 
\ No newline at end of file
+    assert abs(result.map50_95 - 1.0) < 1e-6

From 2e72e7089392f9aa2151288262f92abd77bcee1e Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Fri, 18 Jul 2025 09:04:41 +0200
Subject: [PATCH 09/24] basic fix

---
 supervision/metrics/mean_average_precision.py | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index f5fa315926..9e9952ed6d 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -57,7 +57,11 @@ class and IoU threshold. Shape: `(num_target_classes, num_iou_thresholds)`
 
     @property
     def map50_95(self) -> float:
-        return self.mAP_scores.mean()
+        valid_scores = self.mAP_scores[self.mAP_scores > -1]
+        if len(valid_scores) > 0:
+            return valid_scores.mean()
+        else:
+            return -1
 
     @property
     def map50(self) -> float:
@@ -917,35 +921,38 @@ def _accumulate(self):
             :, :, :, area_range_idx, max_100_dets_idx
         ]
         # mAP over thresholds (dimension=num_thresholds)
-        mAP_scores_all_sizes = average_precision_all_sizes.mean(axis=(1, 2))
+        # Use masked array to exclude -1 values when computing mean
+        masked = np.ma.masked_equal(average_precision_all_sizes, -1)
+        mAP_scores_all_sizes = np.ma.filled(masked.mean(axis=(1, 2)), -1)
         # AP per class
-        ap_per_class_all_sizes = average_precision_all_sizes.mean(axis=1).transpose(
-            1, 0
-        )
+        ap_per_class_all_sizes = np.ma.filled(masked.mean(axis=1), -1).transpose(1, 0)
 
         # Average precision for SMALL objects and 100 max detections
         small_area_range_idx = list(ObjectSize).index(ObjectSize.SMALL)
         average_precision_small = precision[
             :, :, :, small_area_range_idx, max_100_dets_idx
         ]
-        mAP_scores_small = average_precision_small.mean(axis=(1, 2))
-        ap_per_class_small = average_precision_small.mean(axis=1).transpose(1, 0)
+        masked_small = np.ma.masked_equal(average_precision_small, -1)
+        mAP_scores_small = np.ma.filled(masked_small.mean(axis=(1, 2)), -1)
+        ap_per_class_small = np.ma.filled(masked_small.mean(axis=1), -1).transpose(1, 0)
 
         # Average precision for MEDIUM objects and 100 max detections
         medium_area_range_idx = list(ObjectSize).index(ObjectSize.MEDIUM)
         average_precision_medium = precision[
             :, :, :, medium_area_range_idx, max_100_dets_idx
         ]
-        mAP_scores_medium = average_precision_medium.mean(axis=(1, 2))
-        ap_per_class_medium = average_precision_medium.mean(axis=1).transpose(1, 0)
+        masked_medium = np.ma.masked_equal(average_precision_medium, -1)
+        mAP_scores_medium = np.ma.filled(masked_medium.mean(axis=(1, 2)), -1)
+        ap_per_class_medium = np.ma.filled(masked_medium.mean(axis=1), -1).transpose(1, 0)
 
         # Average precision for LARGE objects and 100 max detections
         large_area_range_idx = list(ObjectSize).index(ObjectSize.LARGE)
         average_precision_large = precision[
             :, :, :, large_area_range_idx, max_100_dets_idx
         ]
-        mAP_scores_large = average_precision_large.mean(axis=(1, 2))
-        ap_per_class_large = average_precision_large.mean(axis=1).transpose(1, 0)
+        masked_large = np.ma.masked_equal(average_precision_large, -1)
+        mAP_scores_large = np.ma.filled(masked_large.mean(axis=(1, 2)), -1)
+        ap_per_class_large = np.ma.filled(masked_large.mean(axis=1), -1).transpose(1, 0)
 
         self.results = {
             "params": self.params,

From 001a4254f80726e75af73ec48b5f8008e1ba0cb1 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Fri, 18 Jul 2025 09:24:00 +0200
Subject: [PATCH 10/24] fix: improve code, more fixes and add tests

---
 supervision/metrics/mean_average_precision.py |  40 ++-
 test/metrics/test_mean_average_precision.py   | 254 ++++++++++++++++++
 2 files changed, 285 insertions(+), 9 deletions(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 9e9952ed6d..fc0e0572e0 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -425,6 +425,11 @@ def load_predictions(self, predictions: list[dict]) -> EvaluationDataset:
         if not isinstance(predictions, list):
             raise ValueError("results must be a list")
 
+        # Handle empty predictions
+        if len(predictions) == 0:
+            predictions_dataset.dataset["annotations"] = []
+            return predictions_dataset
+
         ids = [pred["image_id"] for pred in predictions]
 
         # Make sure the image ids from predictions exist in the current dataset
@@ -923,9 +928,14 @@ def _accumulate(self):
         # mAP over thresholds (dimension=num_thresholds)
         # Use masked array to exclude -1 values when computing mean
         masked = np.ma.masked_equal(average_precision_all_sizes, -1)
-        mAP_scores_all_sizes = np.ma.filled(masked.mean(axis=(1, 2)), -1)
-        # AP per class
-        ap_per_class_all_sizes = np.ma.filled(masked.mean(axis=1), -1).transpose(1, 0)
+        # Check if all values are masked (empty array)
+        if masked.count() == 0:
+            mAP_scores_all_sizes = np.full(num_iou_thresholds, -1)
+            ap_per_class_all_sizes = np.full((num_categories, num_iou_thresholds), -1)
+        else:
+            mAP_scores_all_sizes = np.ma.filled(masked.mean(axis=(1, 2)), -1)
+            # AP per class
+            ap_per_class_all_sizes = np.ma.filled(masked.mean(axis=1), -1).transpose(1, 0)
 
         # Average precision for SMALL objects and 100 max detections
         small_area_range_idx = list(ObjectSize).index(ObjectSize.SMALL)
@@ -933,8 +943,12 @@ def _accumulate(self):
             :, :, :, small_area_range_idx, max_100_dets_idx
         ]
         masked_small = np.ma.masked_equal(average_precision_small, -1)
-        mAP_scores_small = np.ma.filled(masked_small.mean(axis=(1, 2)), -1)
-        ap_per_class_small = np.ma.filled(masked_small.mean(axis=1), -1).transpose(1, 0)
+        if masked_small.count() == 0:
+            mAP_scores_small = np.full(num_iou_thresholds, -1)
+            ap_per_class_small = np.full((num_categories, num_iou_thresholds), -1)
+        else:
+            mAP_scores_small = np.ma.filled(masked_small.mean(axis=(1, 2)), -1)
+            ap_per_class_small = np.ma.filled(masked_small.mean(axis=1), -1).transpose(1, 0)
 
         # Average precision for MEDIUM objects and 100 max detections
         medium_area_range_idx = list(ObjectSize).index(ObjectSize.MEDIUM)
@@ -942,8 +956,12 @@ def _accumulate(self):
             :, :, :, medium_area_range_idx, max_100_dets_idx
         ]
         masked_medium = np.ma.masked_equal(average_precision_medium, -1)
-        mAP_scores_medium = np.ma.filled(masked_medium.mean(axis=(1, 2)), -1)
-        ap_per_class_medium = np.ma.filled(masked_medium.mean(axis=1), -1).transpose(1, 0)
+        if masked_medium.count() == 0:
+            mAP_scores_medium = np.full(num_iou_thresholds, -1)
+            ap_per_class_medium = np.full((num_categories, num_iou_thresholds), -1)
+        else:
+            mAP_scores_medium = np.ma.filled(masked_medium.mean(axis=(1, 2)), -1)
+            ap_per_class_medium = np.ma.filled(masked_medium.mean(axis=1), -1).transpose(1, 0)
 
         # Average precision for LARGE objects and 100 max detections
         large_area_range_idx = list(ObjectSize).index(ObjectSize.LARGE)
@@ -951,8 +969,12 @@ def _accumulate(self):
             :, :, :, large_area_range_idx, max_100_dets_idx
         ]
         masked_large = np.ma.masked_equal(average_precision_large, -1)
-        mAP_scores_large = np.ma.filled(masked_large.mean(axis=(1, 2)), -1)
-        ap_per_class_large = np.ma.filled(masked_large.mean(axis=1), -1).transpose(1, 0)
+        if masked_large.count() == 0:
+            mAP_scores_large = np.full(num_iou_thresholds, -1)
+            ap_per_class_large = np.full((num_categories, num_iou_thresholds), -1)
+        else:
+            mAP_scores_large = np.ma.filled(masked_large.mean(axis=(1, 2)), -1)
+            ap_per_class_large = np.ma.filled(masked_large.mean(axis=1), -1).transpose(1, 0)
 
         self.results = {
             "params": self.params,
diff --git a/test/metrics/test_mean_average_precision.py b/test/metrics/test_mean_average_precision.py
index bb0f91eff2..931b1bb41c 100644
--- a/test/metrics/test_mean_average_precision.py
+++ b/test/metrics/test_mean_average_precision.py
@@ -63,3 +63,257 @@ def test_batch_updates_perfect_detections():
 
     # Should be perfect 1.0 mAP across all batches
     assert abs(result.map50_95 - 1.0) < 1e-6
+
+
+def test_scenario_1_success_case_imperfect_match():
+    """Scenario 1: Success Case with imperfect match"""
+    # Small object (class 0) - area = 30*30 = 900 < 1024
+    small_perfect = Detections(
+        xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+        class_id=np.array([0]),
+        confidence=np.array([0.95]),
+        data={"area": np.array([900])}
+    )
+    
+    # Medium object (class 1) - area = 50*50 = 2500 (between 1024 and 9216)
+    medium_target = Detections(
+        xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64),
+        class_id=np.array([1]),
+        data={"area": np.array([2500])}
+    )
+    medium_pred = Detections(
+        xyxy=np.array([[12, 12, 60, 60]], dtype=np.float64),  # Slightly off
+        class_id=np.array([1]),
+        confidence=np.array([0.9]),
+        data={"area": np.array([2304])}  # 48*48
+    )
+    
+    # Large objects (classes 0, 1, 2) - area = 100*100 = 10000 > 9216
+    large_targets = Detections(
+        xyxy=np.array([
+            [10, 10, 110, 110],
+            [120, 120, 220, 220],
+            [230, 230, 330, 330]
+        ], dtype=np.float64),
+        class_id=np.array([2, 0, 1]),
+        data={"area": np.array([10000, 10000, 10000])}
+    )
+    large_preds = Detections(
+        xyxy=np.array([
+            [10, 10, 110, 110],
+            [120, 120, 220, 220],
+            [230, 230, 330, 330]
+        ], dtype=np.float64),
+        class_id=np.array([2, 0, 1]),
+        confidence=np.array([0.9, 0.9, 0.9]),
+        data={"area": np.array([10000, 10000, 10000])}
+    )
+    
+    metric = MeanAveragePrecision()
+    metric.update([small_perfect], [small_perfect])
+    metric.update([medium_pred], [medium_target])
+    metric.update([large_preds], [large_targets])
+    result = metric.compute()
+    
+    # Should be close to 0.9 (slightly less than perfect due to medium object)
+    assert 0.85 < result.map50_95 < 0.98  # Adjusted upper bound
+    assert result.medium_objects.map50_95 < 1.0  # Medium should be less than perfect
+
+
+def test_scenario_2_missed_detection():
+    """Scenario 2: GT Present, No Prediction (Missed Detection)"""
+    # Small object - area = 30*30 = 900 < 1024
+    small_detection = Detections(
+        xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+        class_id=np.array([0]),
+        confidence=np.array([0.95]),
+        data={"area": np.array([900])}
+    )
+    
+    # Medium object - area = 50*50 = 2500 (between 1024 and 9216) - no prediction (missed)
+    medium_target = Detections(
+        xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64),
+        class_id=np.array([1]),
+        data={"area": np.array([2500])}
+    )
+    no_medium_pred = Detections.empty()
+    
+    # Large objects - area = 100*100 = 10000 > 9216
+    large_detections = Detections(
+        xyxy=np.array([
+            [10, 10, 110, 110],
+            [120, 120, 220, 220],
+            [230, 230, 330, 330]
+        ], dtype=np.float64),
+        class_id=np.array([2, 0, 1]),
+        confidence=np.array([0.9, 0.9, 0.9]),
+        data={"area": np.array([10000, 10000, 10000])}
+    )
+    
+    metric = MeanAveragePrecision()
+    metric.update([small_detection], [small_detection])
+    metric.update([no_medium_pred], [medium_target])
+    metric.update([large_detections], [large_detections])
+    result = metric.compute()
+    
+    # Medium objects should have 0.0 mAP (missed detection)
+    assert abs(result.medium_objects.map50_95 - 0.0) < 1e-6
+
+
+def test_scenario_3_false_positive():
+    """Scenario 3: No GT, Prediction Present (False Positive)"""
+    # Small object - area = 30*30 = 900 < 1024
+    small_detection = Detections(
+        xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+        class_id=np.array([0]),
+        confidence=np.array([0.95]),
+        data={"area": np.array([900])}
+    )
+    
+    # Medium object - area = 50*50 = 2500 - false positive (no GT)
+    medium_pred = Detections(
+        xyxy=np.array([[12, 12, 62, 62]], dtype=np.float64),
+        class_id=np.array([1]),
+        confidence=np.array([0.9]),
+        data={"area": np.array([2500])}
+    )
+    no_medium_target = Detections.empty()
+    
+    # Large objects - area = 100*100 = 10000 > 9216
+    large_detections = Detections(
+        xyxy=np.array([
+            [10, 10, 110, 110],
+            [120, 120, 220, 220],
+            [230, 230, 330, 330]
+        ], dtype=np.float64),
+        class_id=np.array([2, 0, 1]),
+        confidence=np.array([0.9, 0.9, 0.9]),
+        data={"area": np.array([10000, 10000, 10000])}
+    )
+    
+    metric = MeanAveragePrecision()
+    metric.update([small_detection], [small_detection])
+    metric.update([medium_pred], [no_medium_target])
+    metric.update([large_detections], [large_detections])
+    result = metric.compute()
+    
+    # Medium objects should have -1 mAP (false positive, matching pycocotools)
+    assert result.medium_objects.map50_95 == -1
+
+
+def test_scenario_4_no_data():
+    """Scenario 4: No GT, No Prediction (Category has no data)"""
+    # Small object - area = 30*30 = 900 < 1024
+    small_detection = Detections(
+        xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+        class_id=np.array([0]),
+        confidence=np.array([0.95]),
+        data={"area": np.array([900])}
+    )
+    
+    # Medium object - no data at all
+    no_medium = Detections.empty()
+    
+    # Large objects - area = 100*100 = 10000 > 9216 - only classes 0 and 2 (no class 1)
+    large_targets = Detections(
+        xyxy=np.array([
+            [10, 10, 110, 110],
+            [120, 120, 220, 220],
+        ], dtype=np.float64),
+        class_id=np.array([2, 0]),
+        data={"area": np.array([10000, 10000])}
+    )
+    large_preds = Detections(
+        xyxy=np.array([
+            [10, 10, 110, 110],
+            [120, 120, 220, 220],
+        ], dtype=np.float64),
+        class_id=np.array([2, 0]),
+        confidence=np.array([0.9, 0.9]),
+        data={"area": np.array([10000, 10000])}
+    )
+    
+    metric = MeanAveragePrecision()
+    metric.update([small_detection], [small_detection])
+    metric.update([no_medium], [no_medium])
+    metric.update([large_preds], [large_targets])
+    result = metric.compute()
+    
+    # Should NOT have negative mAP values for overall
+    assert result.map50_95 >= 0.0
+    # Medium objects should have -1 mAP (no data, matching pycocotools)
+    assert result.medium_objects.map50_95 == -1
+
+
+def test_scenario_5_only_one_class_present():
+    """Scenario 5: Only 1 of 3 Classes Present (Perfect Match)"""
+    # Only class 0 objects with perfect matches
+    detections_class_0 = [
+        Detections(
+            xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
+            class_id=np.array([0]),
+            confidence=np.array([0.95]),
+        ),
+        Detections(
+            xyxy=np.array([[20, 20, 230, 130]], dtype=np.float64),
+            class_id=np.array([0]),
+            confidence=np.array([0.9]),
+        ),
+    ]
+    
+    metric = MeanAveragePrecision()
+    for det in detections_class_0:
+        metric.update([det], [det])
+    
+    result = metric.compute()
+    
+    # Should be 1.0 mAP (perfect match for the only class present)
+    assert abs(result.map50_95 - 1.0) < 1e-6
+    assert abs(result.map50 - 1.0) < 1e-6
+    assert abs(result.map75 - 1.0) < 1e-6
+
+
+def test_mixed_classes_with_missing_detections():
+    """Test mixed scenario with some classes having no detections"""
+    # Class 0: Perfect detection
+    class_0_det = Detections(
+        xyxy=np.array([[10, 10, 50, 50]], dtype=np.float64),
+        class_id=np.array([0]),
+        confidence=np.array([0.9]),
+    )
+    
+    # Class 1: GT exists but no prediction
+    class_1_target = Detections(
+        xyxy=np.array([[60, 60, 100, 100]], dtype=np.float64),
+        class_id=np.array([1]),
+    )
+    class_1_pred = Detections.empty()
+    
+    # Class 2: Prediction exists but no GT (false positive)
+    class_2_pred = Detections(
+        xyxy=np.array([[110, 110, 150, 150]], dtype=np.float64),
+        class_id=np.array([2]),
+        confidence=np.array([0.8]),
+    )
+    class_2_target = Detections.empty()
+    
+    metric = MeanAveragePrecision()
+    metric.update([class_0_det], [class_0_det])
+    metric.update([class_1_pred], [class_1_target])
+    metric.update([class_2_pred], [class_2_target])
+    result = metric.compute()
+    
+    # Should not have negative mAP
+    assert result.map50_95 >= 0.0
+    # Should be less than 1.0 due to missed detection and false positive
+    assert result.map50_95 < 1.0
+
+
+def test_empty_predictions_and_targets():
+    """Test completely empty predictions and targets"""
+    metric = MeanAveragePrecision()
+    metric.update([Detections.empty()], [Detections.empty()])
+    result = metric.compute()
+    
+    # Should handle empty case gracefully
+    assert result.map50_95 >= -1.0  # Can be -1 to indicate no data

From afdbb0a75c6a6f35f0ea8b15f3d5dff93a28a6e1 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Fri, 18 Jul 2025 09:36:59 +0200
Subject: [PATCH 11/24] fix: keep it DRY

---
 supervision/metrics/mean_average_precision.py | 55 +++++++++----------
 test/metrics/test_mean_average_precision.py   | 11 +++-
 2 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index fc0e0572e0..640e028cf0 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -918,6 +918,27 @@ def _accumulate(self):
                             np.array(score_at_recall)
                         )
 
+        self.results = {
+            "params": self.params,
+            "counts": [num_iou_thresholds, num_recall_thresholds, num_categories, num_area_ranges, num_max_detections],
+            "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            "precision": precision,
+            "recall": recall,
+            "scores": scores,
+        }
+
+        # Helper function to compute average precision while handling -1 sentinel values
+        def compute_average_precision(precision_slice):
+            """Helper function to compute average precision while handling -1 sentinel values."""
+            masked = np.ma.masked_equal(precision_slice, -1)
+            if masked.count() == 0:
+                # All values are -1 (no data)
+                return np.full(num_iou_thresholds, -1), np.full((num_categories, num_iou_thresholds), -1)
+            else:
+                mAP_scores = np.ma.filled(masked.mean(axis=(1, 2)), -1)
+                ap_per_class = np.ma.filled(masked.mean(axis=1), -1).transpose(1, 0)
+                return mAP_scores, ap_per_class
+
         # Average precision over all sizes, 100 max detections
         area_range_idx = list(ObjectSize).index(ObjectSize.ALL)
         max_100_dets_idx = self.params.max_dets.index(100)
@@ -927,54 +948,28 @@ def _accumulate(self):
         ]
         # mAP over thresholds (dimension=num_thresholds)
         # Use masked array to exclude -1 values when computing mean
-        masked = np.ma.masked_equal(average_precision_all_sizes, -1)
-        # Check if all values are masked (empty array)
-        if masked.count() == 0:
-            mAP_scores_all_sizes = np.full(num_iou_thresholds, -1)
-            ap_per_class_all_sizes = np.full((num_categories, num_iou_thresholds), -1)
-        else:
-            mAP_scores_all_sizes = np.ma.filled(masked.mean(axis=(1, 2)), -1)
-            # AP per class
-            ap_per_class_all_sizes = np.ma.filled(masked.mean(axis=1), -1).transpose(1, 0)
+        mAP_scores_all_sizes, ap_per_class_all_sizes = compute_average_precision(average_precision_all_sizes)
 
         # Average precision for SMALL objects and 100 max detections
         small_area_range_idx = list(ObjectSize).index(ObjectSize.SMALL)
         average_precision_small = precision[
             :, :, :, small_area_range_idx, max_100_dets_idx
         ]
-        masked_small = np.ma.masked_equal(average_precision_small, -1)
-        if masked_small.count() == 0:
-            mAP_scores_small = np.full(num_iou_thresholds, -1)
-            ap_per_class_small = np.full((num_categories, num_iou_thresholds), -1)
-        else:
-            mAP_scores_small = np.ma.filled(masked_small.mean(axis=(1, 2)), -1)
-            ap_per_class_small = np.ma.filled(masked_small.mean(axis=1), -1).transpose(1, 0)
+        mAP_scores_small, ap_per_class_small = compute_average_precision(average_precision_small)
 
         # Average precision for MEDIUM objects and 100 max detections
         medium_area_range_idx = list(ObjectSize).index(ObjectSize.MEDIUM)
         average_precision_medium = precision[
             :, :, :, medium_area_range_idx, max_100_dets_idx
         ]
-        masked_medium = np.ma.masked_equal(average_precision_medium, -1)
-        if masked_medium.count() == 0:
-            mAP_scores_medium = np.full(num_iou_thresholds, -1)
-            ap_per_class_medium = np.full((num_categories, num_iou_thresholds), -1)
-        else:
-            mAP_scores_medium = np.ma.filled(masked_medium.mean(axis=(1, 2)), -1)
-            ap_per_class_medium = np.ma.filled(masked_medium.mean(axis=1), -1).transpose(1, 0)
+        mAP_scores_medium, ap_per_class_medium = compute_average_precision(average_precision_medium)
 
         # Average precision for LARGE objects and 100 max detections
         large_area_range_idx = list(ObjectSize).index(ObjectSize.LARGE)
         average_precision_large = precision[
             :, :, :, large_area_range_idx, max_100_dets_idx
         ]
-        masked_large = np.ma.masked_equal(average_precision_large, -1)
-        if masked_large.count() == 0:
-            mAP_scores_large = np.full(num_iou_thresholds, -1)
-            ap_per_class_large = np.full((num_categories, num_iou_thresholds), -1)
-        else:
-            mAP_scores_large = np.ma.filled(masked_large.mean(axis=(1, 2)), -1)
-            ap_per_class_large = np.ma.filled(masked_large.mean(axis=1), -1).transpose(1, 0)
+        mAP_scores_large, ap_per_class_large = compute_average_precision(average_precision_large)
 
         self.results = {
             "params": self.params,
diff --git a/test/metrics/test_mean_average_precision.py b/test/metrics/test_mean_average_precision.py
index 931b1bb41c..1345b0a774 100644
--- a/test/metrics/test_mean_average_precision.py
+++ b/test/metrics/test_mean_average_precision.py
@@ -315,5 +315,12 @@ def test_empty_predictions_and_targets():
     metric.update([Detections.empty()], [Detections.empty()])
     result = metric.compute()
     
-    # Should handle empty case gracefully
-    assert result.map50_95 >= -1.0  # Can be -1 to indicate no data
+    # Should return -1 for no data (matching pycocotools behavior)
+    assert result.map50_95 == -1
+    assert result.map50 == -1
+    assert result.map75 == -1
+    
+    # All object size categories should also be -1
+    assert result.small_objects.map50_95 == -1
+    assert result.medium_objects.map50_95 == -1
+    assert result.large_objects.map50_95 == -1

From a139fb0b49d5c7942331256ee0259b79931fc7fe Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 18 Jul 2025 08:00:32 +0000
Subject: [PATCH 12/24] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/metrics/mean_average_precision.py |  28 +++-
 test/metrics/test_mean_average_precision.py   | 136 +++++++++---------
 2 files changed, 91 insertions(+), 73 deletions(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 640e028cf0..4c8c353298 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -920,7 +920,13 @@ def _accumulate(self):
 
         self.results = {
             "params": self.params,
-            "counts": [num_iou_thresholds, num_recall_thresholds, num_categories, num_area_ranges, num_max_detections],
+            "counts": [
+                num_iou_thresholds,
+                num_recall_thresholds,
+                num_categories,
+                num_area_ranges,
+                num_max_detections,
+            ],
             "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
             "precision": precision,
             "recall": recall,
@@ -933,7 +939,9 @@ def compute_average_precision(precision_slice):
             masked = np.ma.masked_equal(precision_slice, -1)
             if masked.count() == 0:
                 # All values are -1 (no data)
-                return np.full(num_iou_thresholds, -1), np.full((num_categories, num_iou_thresholds), -1)
+                return np.full(num_iou_thresholds, -1), np.full(
+                    (num_categories, num_iou_thresholds), -1
+                )
             else:
                 mAP_scores = np.ma.filled(masked.mean(axis=(1, 2)), -1)
                 ap_per_class = np.ma.filled(masked.mean(axis=1), -1).transpose(1, 0)
@@ -948,28 +956,36 @@ def compute_average_precision(precision_slice):
         ]
         # mAP over thresholds (dimension=num_thresholds)
         # Use masked array to exclude -1 values when computing mean
-        mAP_scores_all_sizes, ap_per_class_all_sizes = compute_average_precision(average_precision_all_sizes)
+        mAP_scores_all_sizes, ap_per_class_all_sizes = compute_average_precision(
+            average_precision_all_sizes
+        )
 
         # Average precision for SMALL objects and 100 max detections
         small_area_range_idx = list(ObjectSize).index(ObjectSize.SMALL)
         average_precision_small = precision[
             :, :, :, small_area_range_idx, max_100_dets_idx
         ]
-        mAP_scores_small, ap_per_class_small = compute_average_precision(average_precision_small)
+        mAP_scores_small, ap_per_class_small = compute_average_precision(
+            average_precision_small
+        )
 
         # Average precision for MEDIUM objects and 100 max detections
         medium_area_range_idx = list(ObjectSize).index(ObjectSize.MEDIUM)
         average_precision_medium = precision[
             :, :, :, medium_area_range_idx, max_100_dets_idx
         ]
-        mAP_scores_medium, ap_per_class_medium = compute_average_precision(average_precision_medium)
+        mAP_scores_medium, ap_per_class_medium = compute_average_precision(
+            average_precision_medium
+        )
 
         # Average precision for LARGE objects and 100 max detections
         large_area_range_idx = list(ObjectSize).index(ObjectSize.LARGE)
         average_precision_large = precision[
             :, :, :, large_area_range_idx, max_100_dets_idx
         ]
-        mAP_scores_large, ap_per_class_large = compute_average_precision(average_precision_large)
+        mAP_scores_large, ap_per_class_large = compute_average_precision(
+            average_precision_large
+        )
 
         self.results = {
             "params": self.params,
diff --git a/test/metrics/test_mean_average_precision.py b/test/metrics/test_mean_average_precision.py
index 1345b0a774..8bee7b5372 100644
--- a/test/metrics/test_mean_average_precision.py
+++ b/test/metrics/test_mean_average_precision.py
@@ -72,49 +72,47 @@ def test_scenario_1_success_case_imperfect_match():
         xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
         class_id=np.array([0]),
         confidence=np.array([0.95]),
-        data={"area": np.array([900])}
+        data={"area": np.array([900])},
     )
-    
+
     # Medium object (class 1) - area = 50*50 = 2500 (between 1024 and 9216)
     medium_target = Detections(
         xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64),
         class_id=np.array([1]),
-        data={"area": np.array([2500])}
+        data={"area": np.array([2500])},
     )
     medium_pred = Detections(
         xyxy=np.array([[12, 12, 60, 60]], dtype=np.float64),  # Slightly off
         class_id=np.array([1]),
         confidence=np.array([0.9]),
-        data={"area": np.array([2304])}  # 48*48
+        data={"area": np.array([2304])},  # 48*48
     )
-    
+
     # Large objects (classes 0, 1, 2) - area = 100*100 = 10000 > 9216
     large_targets = Detections(
-        xyxy=np.array([
-            [10, 10, 110, 110],
-            [120, 120, 220, 220],
-            [230, 230, 330, 330]
-        ], dtype=np.float64),
+        xyxy=np.array(
+            [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]],
+            dtype=np.float64,
+        ),
         class_id=np.array([2, 0, 1]),
-        data={"area": np.array([10000, 10000, 10000])}
+        data={"area": np.array([10000, 10000, 10000])},
     )
     large_preds = Detections(
-        xyxy=np.array([
-            [10, 10, 110, 110],
-            [120, 120, 220, 220],
-            [230, 230, 330, 330]
-        ], dtype=np.float64),
+        xyxy=np.array(
+            [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]],
+            dtype=np.float64,
+        ),
         class_id=np.array([2, 0, 1]),
         confidence=np.array([0.9, 0.9, 0.9]),
-        data={"area": np.array([10000, 10000, 10000])}
+        data={"area": np.array([10000, 10000, 10000])},
     )
-    
+
     metric = MeanAveragePrecision()
     metric.update([small_perfect], [small_perfect])
     metric.update([medium_pred], [medium_target])
     metric.update([large_preds], [large_targets])
     result = metric.compute()
-    
+
     # Should be close to 0.9 (slightly less than perfect due to medium object)
     assert 0.85 < result.map50_95 < 0.98  # Adjusted upper bound
     assert result.medium_objects.map50_95 < 1.0  # Medium should be less than perfect
@@ -127,35 +125,34 @@ def test_scenario_2_missed_detection():
         xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
         class_id=np.array([0]),
         confidence=np.array([0.95]),
-        data={"area": np.array([900])}
+        data={"area": np.array([900])},
     )
-    
+
     # Medium object - area = 50*50 = 2500 (between 1024 and 9216) - no prediction (missed)
     medium_target = Detections(
         xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64),
         class_id=np.array([1]),
-        data={"area": np.array([2500])}
+        data={"area": np.array([2500])},
     )
     no_medium_pred = Detections.empty()
-    
+
     # Large objects - area = 100*100 = 10000 > 9216
     large_detections = Detections(
-        xyxy=np.array([
-            [10, 10, 110, 110],
-            [120, 120, 220, 220],
-            [230, 230, 330, 330]
-        ], dtype=np.float64),
+        xyxy=np.array(
+            [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]],
+            dtype=np.float64,
+        ),
         class_id=np.array([2, 0, 1]),
         confidence=np.array([0.9, 0.9, 0.9]),
-        data={"area": np.array([10000, 10000, 10000])}
+        data={"area": np.array([10000, 10000, 10000])},
     )
-    
+
     metric = MeanAveragePrecision()
     metric.update([small_detection], [small_detection])
     metric.update([no_medium_pred], [medium_target])
     metric.update([large_detections], [large_detections])
     result = metric.compute()
-    
+
     # Medium objects should have 0.0 mAP (missed detection)
     assert abs(result.medium_objects.map50_95 - 0.0) < 1e-6
 
@@ -167,36 +164,35 @@ def test_scenario_3_false_positive():
         xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
         class_id=np.array([0]),
         confidence=np.array([0.95]),
-        data={"area": np.array([900])}
+        data={"area": np.array([900])},
     )
-    
+
     # Medium object - area = 50*50 = 2500 - false positive (no GT)
     medium_pred = Detections(
         xyxy=np.array([[12, 12, 62, 62]], dtype=np.float64),
         class_id=np.array([1]),
         confidence=np.array([0.9]),
-        data={"area": np.array([2500])}
+        data={"area": np.array([2500])},
     )
     no_medium_target = Detections.empty()
-    
+
     # Large objects - area = 100*100 = 10000 > 9216
     large_detections = Detections(
-        xyxy=np.array([
-            [10, 10, 110, 110],
-            [120, 120, 220, 220],
-            [230, 230, 330, 330]
-        ], dtype=np.float64),
+        xyxy=np.array(
+            [[10, 10, 110, 110], [120, 120, 220, 220], [230, 230, 330, 330]],
+            dtype=np.float64,
+        ),
         class_id=np.array([2, 0, 1]),
         confidence=np.array([0.9, 0.9, 0.9]),
-        data={"area": np.array([10000, 10000, 10000])}
+        data={"area": np.array([10000, 10000, 10000])},
     )
-    
+
     metric = MeanAveragePrecision()
     metric.update([small_detection], [small_detection])
     metric.update([medium_pred], [no_medium_target])
     metric.update([large_detections], [large_detections])
     result = metric.compute()
-    
+
     # Medium objects should have -1 mAP (false positive, matching pycocotools)
     assert result.medium_objects.map50_95 == -1
 
@@ -208,37 +204,43 @@ def test_scenario_4_no_data():
         xyxy=np.array([[10, 10, 40, 40]], dtype=np.float64),
         class_id=np.array([0]),
         confidence=np.array([0.95]),
-        data={"area": np.array([900])}
+        data={"area": np.array([900])},
     )
-    
+
     # Medium object - no data at all
     no_medium = Detections.empty()
-    
+
     # Large objects - area = 100*100 = 10000 > 9216 - only classes 0 and 2 (no class 1)
     large_targets = Detections(
-        xyxy=np.array([
-            [10, 10, 110, 110],
-            [120, 120, 220, 220],
-        ], dtype=np.float64),
+        xyxy=np.array(
+            [
+                [10, 10, 110, 110],
+                [120, 120, 220, 220],
+            ],
+            dtype=np.float64,
+        ),
         class_id=np.array([2, 0]),
-        data={"area": np.array([10000, 10000])}
+        data={"area": np.array([10000, 10000])},
     )
     large_preds = Detections(
-        xyxy=np.array([
-            [10, 10, 110, 110],
-            [120, 120, 220, 220],
-        ], dtype=np.float64),
+        xyxy=np.array(
+            [
+                [10, 10, 110, 110],
+                [120, 120, 220, 220],
+            ],
+            dtype=np.float64,
+        ),
         class_id=np.array([2, 0]),
         confidence=np.array([0.9, 0.9]),
-        data={"area": np.array([10000, 10000])}
+        data={"area": np.array([10000, 10000])},
     )
-    
+
     metric = MeanAveragePrecision()
     metric.update([small_detection], [small_detection])
     metric.update([no_medium], [no_medium])
     metric.update([large_preds], [large_targets])
     result = metric.compute()
-    
+
     # Should NOT have negative mAP values for overall
     assert result.map50_95 >= 0.0
     # Medium objects should have -1 mAP (no data, matching pycocotools)
@@ -260,13 +262,13 @@ def test_scenario_5_only_one_class_present():
             confidence=np.array([0.9]),
         ),
     ]
-    
+
     metric = MeanAveragePrecision()
     for det in detections_class_0:
         metric.update([det], [det])
-    
+
     result = metric.compute()
-    
+
     # Should be 1.0 mAP (perfect match for the only class present)
     assert abs(result.map50_95 - 1.0) < 1e-6
     assert abs(result.map50 - 1.0) < 1e-6
@@ -281,14 +283,14 @@ def test_mixed_classes_with_missing_detections():
         class_id=np.array([0]),
         confidence=np.array([0.9]),
     )
-    
+
     # Class 1: GT exists but no prediction
     class_1_target = Detections(
         xyxy=np.array([[60, 60, 100, 100]], dtype=np.float64),
         class_id=np.array([1]),
     )
     class_1_pred = Detections.empty()
-    
+
     # Class 2: Prediction exists but no GT (false positive)
     class_2_pred = Detections(
         xyxy=np.array([[110, 110, 150, 150]], dtype=np.float64),
@@ -296,13 +298,13 @@ def test_mixed_classes_with_missing_detections():
         confidence=np.array([0.8]),
     )
     class_2_target = Detections.empty()
-    
+
     metric = MeanAveragePrecision()
     metric.update([class_0_det], [class_0_det])
     metric.update([class_1_pred], [class_1_target])
     metric.update([class_2_pred], [class_2_target])
     result = metric.compute()
-    
+
     # Should not have negative mAP
     assert result.map50_95 >= 0.0
     # Should be less than 1.0 due to missed detection and false positive
@@ -314,12 +316,12 @@ def test_empty_predictions_and_targets():
     metric = MeanAveragePrecision()
     metric.update([Detections.empty()], [Detections.empty()])
     result = metric.compute()
-    
+
     # Should return -1 for no data (matching pycocotools behavior)
     assert result.map50_95 == -1
     assert result.map50 == -1
     assert result.map75 == -1
-    
+
     # All object size categories should also be -1
     assert result.small_objects.map50_95 == -1
     assert result.medium_objects.map50_95 == -1

From 118e3a496c3360af049d0d75969a185ebabe6c58 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Fri, 18 Jul 2025 10:09:53 +0200
Subject: [PATCH 13/24] fix: line length

---
 supervision/metrics/mean_average_precision.py | 2 +-
 test/metrics/test_mean_average_precision.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 4c8c353298..bc6d0af841 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -935,7 +935,7 @@ def _accumulate(self):
 
         # Helper function to compute average precision while handling -1 sentinel values
         def compute_average_precision(precision_slice):
-            """Helper function to compute average precision while handling -1 sentinel values."""
+            """Compute average precision while handling -1 sentinel values."""
             masked = np.ma.masked_equal(precision_slice, -1)
             if masked.count() == 0:
                 # All values are -1 (no data)
diff --git a/test/metrics/test_mean_average_precision.py b/test/metrics/test_mean_average_precision.py
index 8bee7b5372..f17cb1cecb 100644
--- a/test/metrics/test_mean_average_precision.py
+++ b/test/metrics/test_mean_average_precision.py
@@ -128,7 +128,7 @@ def test_scenario_2_missed_detection():
         data={"area": np.array([900])},
     )
 
-    # Medium object - area = 50*50 = 2500 (between 1024 and 9216) - no prediction (missed)
+    # Medium object - area = 50*50 = 2500 (between 1024 and 9216) - missed
     medium_target = Detections(
         xyxy=np.array([[10, 10, 60, 60]], dtype=np.float64),
         class_id=np.array([1]),

From 6e3001194dfff7ef7cabe9968bfbd9afd7743bf4 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar@roboflow.com>
Date: Fri, 18 Jul 2025 17:19:10 +0200
Subject: [PATCH 14/24] improve tests

---
 .../test_mean_average_precision_area.py       | 227 ++++++------------
 1 file changed, 67 insertions(+), 160 deletions(-)

diff --git a/test/metrics/test_mean_average_precision_area.py b/test/metrics/test_mean_average_precision_area.py
index 17257701a8..23e9f940a1 100644
--- a/test/metrics/test_mean_average_precision_area.py
+++ b/test/metrics/test_mean_average_precision_area.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import pytest
 import numpy as np
 
 from supervision.detection.core import Detections
@@ -9,191 +10,97 @@
 class TestMeanAveragePrecisionArea:
     """Test area calculation in MeanAveragePrecision."""
 
-    def test_area_calculated_from_bbox_when_data_empty(self):
-        """Test that area is calculated from bbox when data is empty (normal case)."""
-        # Create detections with empty data (normal case)
-        gt = Detections(
-            xyxy=np.array(
-                [
-                    [10, 10, 40, 40],  # Small: 30x30 = 900
-                    [100, 100, 200, 150],  # Medium: 100x50 = 5000
-                    [300, 300, 500, 400],  # Large: 200x100 = 20000
-                ],
-                dtype=np.float32,
+    @pytest.mark.parametrize(
+        "xyxy, expected_areas, expected_size_maps",
+        [
+            (
+                np.array([
+                    [10, 10, 40, 40],  # Small: 900
+                    [100, 100, 200, 150],  # Medium: 5000
+                    [300, 300, 500, 400],  # Large: 20000
+                ], dtype=np.float32),
+                [900.0, 5000.0, 20000.0],
+                {"small": True, "medium": True, "large": True}
+            ),
+            (
+                np.array([[0, 0, 10, 10]], dtype=np.float32),  # Small: 100
+                [100.0],
+                {"small": True, "medium": False, "large": False}
+            ),
+            (
+                np.array([[0, 0, 50, 50]], dtype=np.float32),  # Medium: 2500
+                [2500.0],
+                {"small": False, "medium": True, "large": False}
             ),
-            class_id=np.array([0, 0, 0]),
-            confidence=np.array([1.0, 1.0, 1.0]),
+            (
+                np.array([[0, 0, 100, 100]], dtype=np.float32),  # Large: 10000
+                [10000.0],
+                {"small": False, "medium": False, "large": True}
+            ),
+        ]
+    )
+    def test_area_calculation_and_size_specific_map(self, xyxy, expected_areas, expected_size_maps):
+        """Test area calculation and size-specific mAP functionality."""
+        gt = Detections(
+            xyxy=xyxy,
+            class_id=np.arange(len(xyxy)),
         )
-
         pred = Detections(
             xyxy=gt.xyxy.copy(),
             class_id=gt.class_id.copy(),
-            confidence=np.array([0.9, 0.9, 0.9]),
+            confidence=np.full(len(xyxy), 0.9),
         )
 
-        # Verify data is empty (normal case)
-        assert gt.data == {}
-        assert pred.data == {}
-
-        # Create mAP metric and test area calculation
         map_metric = MeanAveragePrecision()
         map_metric.update([pred], [gt])
 
-        # Check that areas were calculated correctly from bbox
+        # Test area calculation
         prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
-
         areas = [ann["area"] for ann in prepared_targets["annotations"]]
-        expected_areas = [900.0, 5000.0, 20000.0]  # width * height for each bbox
-
-        assert np.allclose(areas, expected_areas, rtol=1e-05, atol=1e-08), (
-            f"Expected {expected_areas}, got {areas}"
-        )
+        assert np.allclose(areas, expected_areas), f"Expected {expected_areas}, got {areas}"
 
-        # Verify mAP works correctly (no -1.0 for medium/large objects)
+        # Test size-specific mAP
         result = map_metric.compute()
-        assert result.medium_objects.map50 >= 0.0, (
-            "Medium objects should have valid mAP"
-        )
-        assert result.large_objects.map50 >= 0.0, "Large objects should have valid mAP"
 
-    def test_area_preserved_when_provided_in_data(self):
-        """Test that area from data is preserved when provided (COCO case)."""
-        # Create detections with area in data (COCO style)
-        gt = Detections(
-            xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),  # Would be 5000
-            class_id=np.array([0]),
-            confidence=np.array([1.0]),
-        )
+        if expected_size_maps["small"]:
+            assert result.small_objects.map50 > 0.9, "Small objects should have high mAP"
+        else:
+            assert result.small_objects.map50 == -1.0, "Small objects should have no data"
 
-        # Add custom area to data (different from calculated)
-        gt.data = {"area": np.array([3000.0])}
+        if expected_size_maps["medium"]:
+            assert result.medium_objects.map50 > 0.9, "Medium objects should have high mAP"
+        else:
+            assert result.medium_objects.map50 == -1.0, "Medium objects should have no data"
 
-        pred = Detections(
-            xyxy=gt.xyxy.copy(), class_id=gt.class_id.copy(), confidence=np.array([0.9])
-        )
-        pred.data = {"area": np.array([3000.0])}
+        if expected_size_maps["large"]:
+            assert result.large_objects.map50 > 0.9, "Large objects should have high mAP"
+        else:
+            assert result.large_objects.map50 == -1.0, "Large objects should have no data"
 
-        # Test area calculation
-        map_metric = MeanAveragePrecision()
-        map_metric.update([pred], [gt])
-
-        # Check that provided area is used (not calculated)
-        prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
-        used_area = prepared_targets["annotations"][0]["area"]
-
-        assert np.allclose(used_area, 3000.0, rtol=1e-05, atol=1e-08), (
-            f"Should use provided area 3000.0, got {used_area}"
-        )
-
-        # Verify it's different from what would be calculated
-        calculated_area = (200 - 100) * (150 - 100)  # 100 * 50 = 5000
-        assert not np.allclose(used_area, calculated_area, rtol=1e-05, atol=1e-08), (
-            "Should use provided area, not calculated"
-        )
-
-    def test_mixed_area_sources(self):
-        """Test mix of detections with and without area in data."""
-        # Create detections where some have area in data, others don't
-        gt1 = Detections(
-            xyxy=np.array([[10, 10, 40, 40]], dtype=np.float32),  # 900
-            class_id=np.array([0]),
-        )
-        # No area in data - should be calculated
-
-        gt2 = Detections(
-            xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),  # 5000
-            class_id=np.array([1]),
-        )
-        # Add area in data - should be preserved
-        gt2.data = {"area": np.array([3000.0])}
-
-        pred1 = Detections(
-            xyxy=gt1.xyxy.copy(),
-            class_id=gt1.class_id.copy(),
-            confidence=np.array([0.9]),
-        )
-
-        pred2 = Detections(
-            xyxy=gt2.xyxy.copy(),
-            class_id=gt2.class_id.copy(),
-            confidence=np.array([0.8]),
-        )
-        pred2.data = {"area": np.array([3000.0])}
-
-        # Test area calculation for mixed sources
-        map_metric = MeanAveragePrecision()
-        map_metric.update([pred1, pred2], [gt1, gt2])
-
-        prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
-        areas = [ann["area"] for ann in prepared_targets["annotations"]]
-
-        expected_areas = [900.0, 3000.0]  # calculated, then provided
-        assert np.allclose(areas, expected_areas, rtol=1e-05, atol=1e-08), (
-            f"Expected {expected_areas}, got {areas}"
-        )
-
-    def test_size_specific_map_works_correctly(self):
-        """Test that size-specific mAP works correctly with area fix."""
-        # Create detections with one object of each size
+    def test_area_preserved_from_data(self):
+        """Test that area from data field is preserved (COCO case)."""
         gt = Detections(
-            xyxy=np.array(
-                [
-                    [10, 10, 40, 40],  # Small: 30x30 = 900 < 1024
-                    [100, 100, 200, 150],  # Medium: 100x50 = 5000 (1024 <= x < 9216)
-                    [300, 300, 500, 400],  # Large: 200x100 = 20000 >= 9216
-                ],
-                dtype=np.float32,
-            ),
-            class_id=np.array([0, 0, 0]),
+            xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),  # Would calculate to 5000
+            class_id=np.array([0]),
         )
-
-        # Perfect predictions
+        # Override with custom area
+        gt.data = {"area": np.array([3000.0])}
+        
         pred = Detections(
             xyxy=gt.xyxy.copy(),
             class_id=gt.class_id.copy(),
-            confidence=np.array([0.9, 0.9, 0.9]),
-        )
-
-        # Test mAP calculation
-        map_metric = MeanAveragePrecision()
-        map_metric.update([pred], [gt])
-        result = map_metric.compute()
-
-        # All size categories should have valid results (not -1.0)
-        assert result.small_objects.map50 >= 0.0, "Small objects should have valid mAP"
-        assert result.medium_objects.map50 >= 0.0, (
-            "Medium objects should have valid mAP"
-        )
-        assert result.large_objects.map50 >= 0.0, "Large objects should have valid mAP"
-
-        # Perfect matches should yield high mAP for medium and large
-        assert result.medium_objects.map50 > 0.9, (
-            "Perfect medium matches should have high mAP"
-        )
-        assert result.large_objects.map50 > 0.9, (
-            "Perfect large matches should have high mAP"
-        )
-
-    def test_area_uses_detections_property(self):
-        """Test that area calculation uses Detections.area property correctly."""
-        # Create detection
-        gt = Detections(
-            xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),
-            class_id=np.array([0]),
-        )
-
-        pred = Detections(
-            xyxy=gt.xyxy.copy(), class_id=gt.class_id.copy(), confidence=np.array([0.9])
+            confidence=np.array([0.9]),
         )
-
-        # Test that internal calculation matches Detections.area property
+        pred.data = {"area": np.array([3000.0])}
+        
         map_metric = MeanAveragePrecision()
         map_metric.update([pred], [gt])
-
+        
         prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
         used_area = prepared_targets["annotations"][0]["area"]
-        expected_area = gt.area[0]
-
-        assert np.allclose(used_area, expected_area, rtol=1e-05, atol=1e-08), (
-            f"Should use Detections.area property {expected_area}, got {used_area}"
-        )
+        
+        assert np.allclose(used_area, 3000.0), f"Should use provided area 3000.0, got {used_area}"
+        
+        # Verify it's different from what would be calculated
+        calculated_area = (200 - 100) * (150 - 100)  # 100 * 50 = 5000
+        assert not np.allclose(used_area, calculated_area), "Should use provided area, not calculated"

From 704652a0d2ee473041459461d88bcc36305d466e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 18 Jul 2025 15:19:32 +0000
Subject: [PATCH 15/24] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../test_mean_average_precision_area.py       | 79 ++++++++++++-------
 1 file changed, 52 insertions(+), 27 deletions(-)

diff --git a/test/metrics/test_mean_average_precision_area.py b/test/metrics/test_mean_average_precision_area.py
index 23e9f940a1..832620961a 100644
--- a/test/metrics/test_mean_average_precision_area.py
+++ b/test/metrics/test_mean_average_precision_area.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
-import pytest
 import numpy as np
+import pytest
 
 from supervision.detection.core import Detections
 from supervision.metrics.mean_average_precision import MeanAveragePrecision
@@ -14,32 +14,37 @@ class TestMeanAveragePrecisionArea:
         "xyxy, expected_areas, expected_size_maps",
         [
             (
-                np.array([
-                    [10, 10, 40, 40],  # Small: 900
-                    [100, 100, 200, 150],  # Medium: 5000
-                    [300, 300, 500, 400],  # Large: 20000
-                ], dtype=np.float32),
+                np.array(
+                    [
+                        [10, 10, 40, 40],  # Small: 900
+                        [100, 100, 200, 150],  # Medium: 5000
+                        [300, 300, 500, 400],  # Large: 20000
+                    ],
+                    dtype=np.float32,
+                ),
                 [900.0, 5000.0, 20000.0],
-                {"small": True, "medium": True, "large": True}
+                {"small": True, "medium": True, "large": True},
             ),
             (
                 np.array([[0, 0, 10, 10]], dtype=np.float32),  # Small: 100
                 [100.0],
-                {"small": True, "medium": False, "large": False}
+                {"small": True, "medium": False, "large": False},
             ),
             (
                 np.array([[0, 0, 50, 50]], dtype=np.float32),  # Medium: 2500
                 [2500.0],
-                {"small": False, "medium": True, "large": False}
+                {"small": False, "medium": True, "large": False},
             ),
             (
                 np.array([[0, 0, 100, 100]], dtype=np.float32),  # Large: 10000
                 [10000.0],
-                {"small": False, "medium": False, "large": True}
+                {"small": False, "medium": False, "large": True},
             ),
-        ]
+        ],
     )
-    def test_area_calculation_and_size_specific_map(self, xyxy, expected_areas, expected_size_maps):
+    def test_area_calculation_and_size_specific_map(
+        self, xyxy, expected_areas, expected_size_maps
+    ):
         """Test area calculation and size-specific mAP functionality."""
         gt = Detections(
             xyxy=xyxy,
@@ -57,50 +62,70 @@ def test_area_calculation_and_size_specific_map(self, xyxy, expected_areas, expe
         # Test area calculation
         prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
         areas = [ann["area"] for ann in prepared_targets["annotations"]]
-        assert np.allclose(areas, expected_areas), f"Expected {expected_areas}, got {areas}"
+        assert np.allclose(areas, expected_areas), (
+            f"Expected {expected_areas}, got {areas}"
+        )
 
         # Test size-specific mAP
         result = map_metric.compute()
 
         if expected_size_maps["small"]:
-            assert result.small_objects.map50 > 0.9, "Small objects should have high mAP"
+            assert result.small_objects.map50 > 0.9, (
+                "Small objects should have high mAP"
+            )
         else:
-            assert result.small_objects.map50 == -1.0, "Small objects should have no data"
+            assert result.small_objects.map50 == -1.0, (
+                "Small objects should have no data"
+            )
 
         if expected_size_maps["medium"]:
-            assert result.medium_objects.map50 > 0.9, "Medium objects should have high mAP"
+            assert result.medium_objects.map50 > 0.9, (
+                "Medium objects should have high mAP"
+            )
         else:
-            assert result.medium_objects.map50 == -1.0, "Medium objects should have no data"
+            assert result.medium_objects.map50 == -1.0, (
+                "Medium objects should have no data"
+            )
 
         if expected_size_maps["large"]:
-            assert result.large_objects.map50 > 0.9, "Large objects should have high mAP"
+            assert result.large_objects.map50 > 0.9, (
+                "Large objects should have high mAP"
+            )
         else:
-            assert result.large_objects.map50 == -1.0, "Large objects should have no data"
+            assert result.large_objects.map50 == -1.0, (
+                "Large objects should have no data"
+            )
 
     def test_area_preserved_from_data(self):
         """Test that area from data field is preserved (COCO case)."""
         gt = Detections(
-            xyxy=np.array([[100, 100, 200, 150]], dtype=np.float32),  # Would calculate to 5000
+            xyxy=np.array(
+                [[100, 100, 200, 150]], dtype=np.float32
+            ),  # Would calculate to 5000
             class_id=np.array([0]),
         )
         # Override with custom area
         gt.data = {"area": np.array([3000.0])}
-        
+
         pred = Detections(
             xyxy=gt.xyxy.copy(),
             class_id=gt.class_id.copy(),
             confidence=np.array([0.9]),
         )
         pred.data = {"area": np.array([3000.0])}
-        
+
         map_metric = MeanAveragePrecision()
         map_metric.update([pred], [gt])
-        
+
         prepared_targets = map_metric._prepare_targets(map_metric._targets_list)
         used_area = prepared_targets["annotations"][0]["area"]
-        
-        assert np.allclose(used_area, 3000.0), f"Should use provided area 3000.0, got {used_area}"
-        
+
+        assert np.allclose(used_area, 3000.0), (
+            f"Should use provided area 3000.0, got {used_area}"
+        )
+
         # Verify it's different from what would be calculated
         calculated_area = (200 - 100) * (150 - 100)  # 100 * 50 = 5000
-        assert not np.allclose(used_area, calculated_area), "Should use provided area, not calculated"
+        assert not np.allclose(used_area, calculated_area), (
+            "Should use provided area, not calculated"
+        )

From f044398c170c3ca98dca76dda02e4673ba5dc38f Mon Sep 17 00:00:00 2001
From: Onuralp SEZER <thunderbirdtr@gmail.com>
Date: Sun, 20 Jul 2025 20:11:37 +0300
Subject: [PATCH 16/24] =?UTF-8?q?fix:=20=20=F0=9F=90=9E=20update=20inferen?=
 =?UTF-8?q?ce=5Fslicer.py=20for=20improved=20detection=20handling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/tools/inference_slicer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index ff4a44b280..69bc7d7942 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -5,11 +5,11 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 import numpy as np
-
+from typing import Literal
 from supervision.config import ORIENTED_BOX_COORDINATES
 from supervision.detection.core import Detections
 from supervision.detection.utils.boxes import move_boxes, move_oriented_boxes
-from supervision.detection.utils.iou_and_nms import OverlapFilter
+from supervision.detection.utils.iou_and_nms import OverlapFilter,OverlapMetric
 from supervision.detection.utils.masks import move_masks
 from supervision.utils.image import crop_image
 from supervision.utils.internal import (
@@ -96,7 +96,7 @@ def __init__(
         overlap_wh: tuple[int, int] | None = None,
         overlap_filter: OverlapFilter | str = OverlapFilter.NON_MAX_SUPPRESSION,
         iou_threshold: float = 0.5,
-        match_metric: str = "IOU",
+        match_metric: OverlapMetric = OverlapMetric.IOU,
         thread_workers: int = 1,
     ):
         if overlap_ratio_wh is not None:
@@ -173,11 +173,11 @@ def callback(image_slice: np.ndarray) -> sv.Detections:
             return merged
         elif self.overlap_filter == OverlapFilter.NON_MAX_SUPPRESSION:
             return merged.with_nms(
-                threshold=self.iou_threshold, match_metric=self.match_metric
+                threshold=self.iou_threshold, overlap_metric=self.match_metric
             )
         elif self.overlap_filter == OverlapFilter.NON_MAX_MERGE:
             return merged.with_nmm(
-                threshold=self.iou_threshold, match_metric=self.match_metric
+                threshold=self.iou_threshold, overlap_metric=self.match_metric
             )
         else:
             warnings.warn(

From db51a9ff98d653e2b9167a59a96d59d3afaf25b1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 20 Jul 2025 17:12:18 +0000
Subject: [PATCH 17/24] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/tools/inference_slicer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index 69bc7d7942..456067d771 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -5,11 +5,11 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 import numpy as np
-from typing import Literal
+
 from supervision.config import ORIENTED_BOX_COORDINATES
 from supervision.detection.core import Detections
 from supervision.detection.utils.boxes import move_boxes, move_oriented_boxes
-from supervision.detection.utils.iou_and_nms import OverlapFilter,OverlapMetric
+from supervision.detection.utils.iou_and_nms import OverlapFilter, OverlapMetric
 from supervision.detection.utils.masks import move_masks
 from supervision.utils.image import crop_image
 from supervision.utils.internal import (

From 1adb75520f479e076e3f98ab9f76530a03ecc5bd Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Mon, 21 Jul 2025 00:02:14 +0200
Subject: [PATCH 18/24] bump version from `0.26.0` to `0.26.1`; improve
 docstrings; unify naming; improve parsing

---
 pyproject.toml                                |  2 +-
 supervision/detection/core.py                 |  8 +++---
 .../detection/tools/inference_slicer.py       | 12 ++++-----
 supervision/detection/utils/iou_and_nms.py    | 27 ++++++++++++-------
 4 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1b830697a2..cae78492ac 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.26.0"
+version = "0.26.1"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [
diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index efaa366a35..e806513ac2 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1939,8 +1939,8 @@ def with_nms(
             class_agnostic (bool): Whether to perform class-agnostic
                 non-maximum suppression. If True, the class_id of each detection
                 will be ignored. Defaults to False.
-            overlap_metric (OverlapMetric): Metric used for measuring overlap between
-                detections in slices.
+            overlap_metric (OverlapMetric): Metric used to compute the degree of
+                overlap between pairs of masks or boxes (e.g., IoU, IoS).
 
         Returns:
             Detections: A new Detections object containing the subset of detections
@@ -2003,8 +2003,8 @@ def with_nmm(
             class_agnostic (bool): Whether to perform class-agnostic
                 non-maximum merging. If True, the class_id of each detection
                 will be ignored. Defaults to False.
-            overlap_metric (OverlapMetric): Metric used for measuring overlap between
-                detections in slices.
+            overlap_metric (OverlapMetric): Metric used to compute the degree of 
+                overlap between pairs of masks or boxes (e.g., IoU, IoS).
 
         Returns:
             Detections: A new Detections object containing the subset of detections
diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index 456067d771..aaecccb3dc 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -75,8 +75,8 @@ class InferenceSlicer:
             filtering or merging overlapping detections in slices.
         iou_threshold (float): Intersection over Union (IoU) threshold
             used when filtering by overlap.
-        match_metric (str): Metric used for matching detections in slices.
-            "IOU" or "IOS". Defaults "IOU".
+        overlap_metric (Union[OverlapMetric, str]): Metric used for matching detections
+            in slices.
         callback (Callable): A function that performs inference on a given image
             slice and returns detections.
         thread_workers (int): Number of threads for parallel execution.
@@ -96,7 +96,7 @@ def __init__(
         overlap_wh: tuple[int, int] | None = None,
         overlap_filter: OverlapFilter | str = OverlapFilter.NON_MAX_SUPPRESSION,
         iou_threshold: float = 0.5,
-        match_metric: OverlapMetric = OverlapMetric.IOU,
+        overlap_metric: OverlapMetric | str = OverlapMetric.IOU,
         thread_workers: int = 1,
     ):
         if overlap_ratio_wh is not None:
@@ -112,7 +112,7 @@ def __init__(
 
         self.slice_wh = slice_wh
         self.iou_threshold = iou_threshold
-        self.match_metric = match_metric
+        self.overlap_metric = OverlapMetric.from_value(overlap_metric)
         self.overlap_filter = OverlapFilter.from_value(overlap_filter)
         self.callback = callback
         self.thread_workers = thread_workers
@@ -173,11 +173,11 @@ def callback(image_slice: np.ndarray) -> sv.Detections:
             return merged
         elif self.overlap_filter == OverlapFilter.NON_MAX_SUPPRESSION:
             return merged.with_nms(
-                threshold=self.iou_threshold, overlap_metric=self.match_metric
+                threshold=self.iou_threshold, overlap_metric=self.overlap_metric
             )
         elif self.overlap_filter == OverlapFilter.NON_MAX_MERGE:
             return merged.with_nmm(
-                threshold=self.iou_threshold, overlap_metric=self.match_metric
+                threshold=self.iou_threshold, overlap_metric=self.overlap_metric
             )
         else:
             warnings.warn(
diff --git a/supervision/detection/utils/iou_and_nms.py b/supervision/detection/utils/iou_and_nms.py
index 56bdff71b6..1a6f80bc58 100644
--- a/supervision/detection/utils/iou_and_nms.py
+++ b/supervision/detection/utils/iou_and_nms.py
@@ -164,7 +164,8 @@ def box_iou_batch(
             `shape = (N, 4)` where `N` is number of true objects.
         boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes.
             `shape = (M, 4)` where `M` is number of detected objects.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of boxes (e.g., IoU, IoS).
 
     Returns:
         np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.
@@ -381,7 +382,8 @@ def _mask_iou_batch_split(
     Args:
         masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks.
         masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of masks (e.g., IoU, IoS).
 
     Returns:
         np.ndarray: Pairwise IoU of masks from `masks_true` and `masks_detection`.
@@ -433,7 +435,8 @@ def mask_iou_batch(
     Args:
         masks_true (np.ndarray): 3D `np.ndarray` representing ground-truth masks.
         masks_detection (np.ndarray): 3D `np.ndarray` representing detection masks.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of masks (e.g., IoU, IoS).
         memory_limit (int): memory limit in MB, default is 1024 * 5 MB (5GB).
 
     Returns:
@@ -492,7 +495,8 @@ def mask_non_max_suppression(
             dimensions of each mask.
         iou_threshold (float): The intersection-over-union threshold
             to use for non-maximum suppression.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of masks (e.g., IoU, IoS).
         mask_dimension (int): The dimension to which the masks should be
             resized before computing IOU values. Defaults to 640.
 
@@ -543,7 +547,8 @@ def box_non_max_suppression(
             or `(x_min, y_min, x_max, y_max, score, class)`.
         iou_threshold (float): The intersection-over-union threshold
             to use for non-maximum suppression.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of boxes (e.g., IoU, IoS).
 
     Returns:
         np.ndarray: A boolean array indicating which predictions to keep after n
@@ -603,7 +608,8 @@ def _group_overlapping_masks(
             the predictions.
         iou_threshold (float): The intersection-over-union threshold
             to use for non-maximum suppression. Defaults to 0.5.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of masks (e.g., IoU, IoS).
 
     Returns:
         list[list[int]]: Groups of prediction indices be merged.
@@ -664,7 +670,8 @@ def mask_non_max_merge(
             to use for non-maximum suppression.
         mask_dimension (int): The dimension to which the masks should be
             resized before computing IOU values. Defaults to 640.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of masks (e.g., IoU, IoS).
 
     Returns:
         np.ndarray: A boolean array indicating which predictions to keep after
@@ -717,7 +724,8 @@ def _group_overlapping_boxes(
             and the confidence scores.
         iou_threshold (float): The intersection-over-union threshold
             to use for non-maximum suppression. Defaults to 0.5.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of boxes (e.g., IoU, IoS).
 
     Returns:
         list[list[int]]: Groups of prediction indices be merged.
@@ -765,7 +773,8 @@ def box_non_max_merge(
             detections of different classes to be merged.
         iou_threshold (float): The intersection-over-union threshold
             to use for non-maximum suppression. Defaults to 0.5.
-        overlap_metric (OverlapMetric): Metric used for matching detections in slices.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of boxes (e.g., IoU, IoS).
 
     Returns:
         list[list[int]]: Groups of prediction indices be merged.

From c54d16c57e6f6ad5398cbd90bfe25f32514fb3a8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 20 Jul 2025 22:02:36 +0000
Subject: [PATCH 19/24] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index e806513ac2..ea466b8955 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -2003,7 +2003,7 @@ def with_nmm(
             class_agnostic (bool): Whether to perform class-agnostic
                 non-maximum merging. If True, the class_id of each detection
                 will be ignored. Defaults to False.
-            overlap_metric (OverlapMetric): Metric used to compute the degree of 
+            overlap_metric (OverlapMetric): Metric used to compute the degree of
                 overlap between pairs of masks or boxes (e.g., IoU, IoS).
 
         Returns:

From e8cb962fcc982536eeb9d34165b28f1d3df46e47 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Mon, 21 Jul 2025 10:14:58 +0200
Subject: [PATCH 20/24] bump version from `0.26.1` to `0.26.1rc0`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index cae78492ac..bb7d95ef09 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.26.1"
+version = "0.26.1rc0"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [

From d067f326783b55ed3e2e7aed596d38fe836f59db Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Tue, 22 Jul 2025 15:48:54 +0530
Subject: [PATCH 21/24] fix: border thickness parameter usage for
 PercentageBarAnnotator

---
 supervision/annotators/core.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 6c142db198..0b7d4b7632 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -2239,8 +2239,11 @@ def __init__(
         self.position: Position = position
         self.color_lookup: ColorLookup = color_lookup
 
-        if border_thickness is None:
-            self.border_thickness = int(0.15 * self.height)
+        self.border_thickness = (
+            border_thickness
+            if border_thickness is not None
+            else int(0.15 * self.height)
+        )
 
     @ensure_cv2_image_for_annotation
     def annotate(

From ad4f44644c06b105cad48f7eddf8ae9c196a6600 Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Tue, 22 Jul 2025 16:20:38 +0530
Subject: [PATCH 22/24] update: changelog for 0.26.1

---
 docs/changelog.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/changelog.md b/docs/changelog.md
index 73efcf0b1f..b1ba1f41f0 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,17 @@
 # Changelog
 
+### 0.26.1 <small>Jul 22, 2025</small>
+
+- Fix ([1894](https://github.com/roboflow/supervision/pull/1894)): Fixed error in `sv.MeanAveragePrecision` where the area used for size-specific evaluation (small / medium / large) was always zero unless explicitly provided in `sv.Detections.data`.
+
+- Fix ([1895](https://github.com/roboflow/supervision/pull/1895)): Fixed `ID=0` bug in `sv.MeanAveragePrecision` where objects were getting `0.0` mAP despite perfect IoU matches due to a bug in annotation ID assignment. 
+
+-  Fix ([1898](https://github.com/roboflow/supervision/pull/1898)): Fixed issue where `sv.MeanAveragePrecision` could return negative values when certain object size categories have no data.
+
+- Fix ([1901](https://github.com/roboflow/supervision/pull/1901)): Fixed `match_metric` support for `sv.Detections.with_nms()`.
+
+- Fix ([1906](https://github.com/roboflow/supervision/pull/1906)): Fixed `border_thickness` parameter usage for `sv.PercentageBarAnnotator`.
+
 ### 0.26.0 <small>Jul 16, 2025</small>
 
 !!! failure "Removed"

From 580fcdde4ce8bc7dd42ccbcabe4062de9a86ce7d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 22 Jul 2025 10:51:47 +0000
Subject: [PATCH 23/24] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/changelog.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index b1ba1f41f0..67231d24ef 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -4,7 +4,7 @@
 
 - Fix ([1894](https://github.com/roboflow/supervision/pull/1894)): Fixed error in `sv.MeanAveragePrecision` where the area used for size-specific evaluation (small / medium / large) was always zero unless explicitly provided in `sv.Detections.data`.
 
-- Fix ([1895](https://github.com/roboflow/supervision/pull/1895)): Fixed `ID=0` bug in `sv.MeanAveragePrecision` where objects were getting `0.0` mAP despite perfect IoU matches due to a bug in annotation ID assignment. 
+- Fix ([1895](https://github.com/roboflow/supervision/pull/1895)): Fixed `ID=0` bug in `sv.MeanAveragePrecision` where objects were getting `0.0` mAP despite perfect IoU matches due to a bug in annotation ID assignment.
 
 -  Fix ([1898](https://github.com/roboflow/supervision/pull/1898)): Fixed issue where `sv.MeanAveragePrecision` could return negative values when certain object size categories have no data.
 

From cb6018ef04a7921dcb730182beb7ebdd72e2dafe Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 23 Jul 2025 08:32:07 +0200
Subject: [PATCH 24/24] final changes before supervision-0.26.1

---
 .github/CODEOWNERS |  2 +-
 docs/changelog.md  | 24 ++++++++++++------------
 pyproject.toml     |  2 +-
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 6e7fcd59e8..02010285e4 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,4 +1,4 @@
 # These owners will be the default owners for everything in
 # the repo. They will be requested for review when someone
 # opens a pull request.
-*   @SkalskiP @onuralpszr
+*   @SkalskiP @soumik12345
diff --git a/docs/changelog.md b/docs/changelog.md
index 67231d24ef..4ebec50021 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -2,15 +2,15 @@
 
 ### 0.26.1 <small>Jul 22, 2025</small>
 
-- Fix ([1894](https://github.com/roboflow/supervision/pull/1894)): Fixed error in `sv.MeanAveragePrecision` where the area used for size-specific evaluation (small / medium / large) was always zero unless explicitly provided in `sv.Detections.data`.
+- Fixed [1894](https://github.com/roboflow/supervision/pull/1894): Error in [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) where the area used for size-specific evaluation (small / medium / large) was always zero unless explicitly provided in `sv.Detections.data`.
 
-- Fix ([1895](https://github.com/roboflow/supervision/pull/1895)): Fixed `ID=0` bug in `sv.MeanAveragePrecision` where objects were getting `0.0` mAP despite perfect IoU matches due to a bug in annotation ID assignment.
+- Fixed [1895](https://github.com/roboflow/supervision/pull/1895): `ID=0` bug in [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) where objects were getting `0.0` mAP despite perfect IoU matches due to a bug in annotation ID assignment.
 
--  Fix ([1898](https://github.com/roboflow/supervision/pull/1898)): Fixed issue where `sv.MeanAveragePrecision` could return negative values when certain object size categories have no data.
+- Fixed [1898](https://github.com/roboflow/supervision/pull/1898): Issue where [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) could return negative values when certain object size categories have no data.
 
-- Fix ([1901](https://github.com/roboflow/supervision/pull/1901)): Fixed `match_metric` support for `sv.Detections.with_nms()`.
+- Fixed [1901](https://github.com/roboflow/supervision/pull/1901): `match_metric` support for [`sv.Detections.with_nms`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.detection.core.Detections.with_nms).
 
-- Fix ([1906](https://github.com/roboflow/supervision/pull/1906)): Fixed `border_thickness` parameter usage for `sv.PercentageBarAnnotator`.
+- Fixed [1906](https://github.com/roboflow/supervision/pull/1906): `border_thickness` parameter usage for [`sv.PercentageBarAnnotator`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.annotators.core.PercentageBarAnnotator).
 
 ### 0.26.0 <small>Jul 16, 2025</small>
 
@@ -165,7 +165,7 @@
 
 - Changed [#1786](https://github.com/roboflow/supervision/pull/1786): Significantly improved the speed of HSV color mapping in [`sv.HeatMapAnnotator`](https://supervision.roboflow.com/0.26.0/detection/annotators/#supervision.annotators.core.HeatMapAnnotator), achieving approximately 28x faster performance on 1920x1080 frames.
 
-- Fix [#1834](https://github.com/roboflow/supervision/pull/1834): Supervision’s [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.0/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) is now fully aligned with [pycocotools](https://github.com/ppwwyyxx/cocoapi), the official COCO evaluation tool, ensuring accurate and standardized metrics. This update enabled us to launch a new version of the [Computer Vision Model Leaderboard](https://leaderboard.roboflow.com/).
+- Fixed [#1834](https://github.com/roboflow/supervision/pull/1834): Supervision’s [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.0/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) is now fully aligned with [pycocotools](https://github.com/ppwwyyxx/cocoapi), the official COCO evaluation tool, ensuring accurate and standardized metrics. This update enabled us to launch a new version of the [Computer Vision Model Leaderboard](https://leaderboard.roboflow.com/).
 
     ```python
     import supervision as sv
@@ -185,7 +185,7 @@
     # Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.629
     ```
 
-- Fix [#1767](https://github.com/roboflow/supervision/pull/1767): Fixed losing `sv.Detections.data` when detections filtering.
+- Fixed [#1767](https://github.com/roboflow/supervision/pull/1767): Fixed losing `sv.Detections.data` when detections filtering.
 
 ### 0.25.0 <small>Nov 12, 2024</small>
 
@@ -569,9 +569,9 @@ detections = sv.Detections.from_sam(sam_result=sam_result)
 
 - Changed [#1434](https://github.com/roboflow/supervision/pull/1434): [`InferenceSlicer`](https://supervision.roboflow.com/0.23.0/detection/tools/inference_slicer/) now features an `overlap_wh` parameter, making it easier to compute slice sizes when handling overlapping slices.
 
-- Fix [#1448](https://github.com/roboflow/supervision/pull/1448): Various annotator type issues have been resolved, supporting expanded error handling.
+- Fixed [#1448](https://github.com/roboflow/supervision/pull/1448): Various annotator type issues have been resolved, supporting expanded error handling.
 
-- Fix [#1348](https://github.com/roboflow/supervision/pull/1348): Introduced a new method for [seeking to a specific video frame](https://supervision.roboflow.com/0.23.0/utils/video/#supervision.utils.video.get_video_frames_generator), addressing cases where traditional seek methods were failing. It can be enabled with `iterative_seek=True`.
+- Fixed [#1348](https://github.com/roboflow/supervision/pull/1348): Introduced a new method for [seeking to a specific video frame](https://supervision.roboflow.com/0.23.0/utils/video/#supervision.utils.video.get_video_frames_generator), addressing cases where traditional seek methods were failing. It can be enabled with `iterative_seek=True`.
 
 ```python
 import supervision as sv
@@ -584,7 +584,7 @@ for frame in sv.get_video_frames_generator(
     ...
 ```
 
-- Fix [#1424](https://github.com/roboflow/supervision/pull/1424): `plot_image` function now clearly indicates that the size is in inches.
+- Fixed [#1424](https://github.com/roboflow/supervision/pull/1424): `plot_image` function now clearly indicates that the size is in inches.
 
 !!! failure "Removed"
 
@@ -1297,7 +1297,7 @@ array([
 
 ### 0.11.1 <small>June 29, 2023</small>
 
-- Fix [#165](https://github.com/roboflow/supervision/pull/165): [`as_folder_structure`](/0.11.1/dataset/core/#supervision.dataset.core.ClassificationDataset.as_folder_structure) fails to save [`sv.ClassificationDataset`](/0.11.1/dataset/core/#classificationdataset) when it is result of inference.
+- Fixed [#165](https://github.com/roboflow/supervision/pull/165): [`as_folder_structure`](/0.11.1/dataset/core/#supervision.dataset.core.ClassificationDataset.as_folder_structure) fails to save [`sv.ClassificationDataset`](/0.11.1/dataset/core/#classificationdataset) when it is result of inference.
 
 ### 0.11.0 <small>June 28, 2023</small>
 
@@ -1343,7 +1343,7 @@ array([
 
 - Added [#162](https://github.com/roboflow/supervision/pull/162): additional `start` and `end` arguments to [`sv.get_video_frames_generator`](/0.11.0/utils/video/#get_video_frames_generator) allowing to generate frames only for a selected part of the video.
 
-- Fix [#157](https://github.com/roboflow/supervision/pull/157): incorrect loading of YOLO dataset class names from `data.yaml`.
+- Fixed [#157](https://github.com/roboflow/supervision/pull/157): incorrect loading of YOLO dataset class names from `data.yaml`.
 
 ### 0.10.0 <small>June 14, 2023</small>
 
diff --git a/pyproject.toml b/pyproject.toml
index bb7d95ef09..cae78492ac 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.26.1rc0"
+version = "0.26.1"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [