From f737782ae848a5437d7210c8cf4cecfc93a2ad90 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Mon, 7 Apr 2025 14:56:58 +0200
Subject: [PATCH 001/124] initial commit

---
 supervision/detection/core.py | 42 ++++++++++++++++
 supervision/detection/vlm.py  | 92 +++++++++++++++++++++++++++++++++++
 2 files changed, 134 insertions(+)
diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index f508dd62be..9cda122a15 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -40,6 +40,7 @@
     from_paligemma,
     from_qwen_2_5_vl,
     validate_vlm_parameters,
+    from_deepseek_vl_2,
 )
 from supervision.geometry.core import Position
 from supervision.utils.internal import deprecated, get_instance_variables
@@ -849,6 +850,7 @@ def from_lmm(
             LMM.PALIGEMMA: VLM.PALIGEMMA,
             LMM.FLORENCE_2: VLM.FLORENCE_2,
             LMM.QWEN_2_5_VL: VLM.QWEN_2_5_VL,
+            LMM.DEEPSEEK_VL_2: VLM.DEEPSEEK_VL_2
         }
 
         # (this works even if the LMM enum is wrapped by @deprecated)
@@ -876,6 +878,41 @@ def from_lmm(
     def from_vlm(
         cls, vlm: Union[VLM, str], result: Union[str, dict], **kwargs: Any
     ) -> Detections:
+        """
+        Creates a Detections object from the given result string based on the specified
+        Vision-Language Model (LMM).
+
+        Args:
+            vlm (Union[VLM, str]): The type of VLM (Vision-Language Model) to use.
+            result (str): The result string containing the detection data.
+            **kwargs (Any): Additional keyword arguments required by the specified VLM.
+
+        Returns:
+            Detections: A new Detections object.
+
+        Raises:
+            ValueError: If the LMM is invalid, required arguments are missing, or
+                disallowed arguments are provided.
+            ValueError: If the specified LMM is not supported.
+
+        Examples:
+            ```python
+            import supervision as sv
+
+            paligemma_result = "<loc0256><loc0256><loc0768><loc0768> cat"
+            detections = sv.Detections.from_vlm(
+                sv.VLM.PALIGEMMA,
+                paligemma_result,
+                resolution_wh=(1000, 1000),
+                classes=['cat', 'dog']
+            )
+            detections.xyxy
+            # array([[250., 250., 750., 750.]])
+
+            detections.class_id
+            # array([0])
+            ```
+        """
         vlm = validate_vlm_parameters(vlm, result, kwargs)
 
         if vlm == VLM.PALIGEMMA:
@@ -888,6 +925,11 @@ def from_vlm(
             data = {CLASS_NAME_DATA_FIELD: class_name}
             return cls(xyxy=xyxy, class_id=class_id, data=data)
 
+        if vlm == VLM.DEEPSEEK_VL_2:
+            xyxy, class_id, class_name = from_deepseek_vl_2(result, **kwargs)
+            data = {CLASS_NAME_DATA_FIELD: class_name}
+            return cls(xyxy=xyxy, class_id=class_id, data=data)
+
         if vlm == VLM.FLORENCE_2:
             xyxy, labels, mask, xyxyxyxy = from_florence_2(result, **kwargs)
             if len(xyxy) == 0:
diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 719dc4438c..e9b72bef59 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -1,5 +1,6 @@
 import json
 import re
+import ast
 from enum import Enum
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -17,30 +18,35 @@ class LMM(Enum):
     PALIGEMMA = "paligemma"
     FLORENCE_2 = "florence_2"
     QWEN_2_5_VL = "qwen_2_5_vl"
+    DEEPSEEK_VL_2 = "deepseek_vl_2"
 
 
 class VLM(Enum):
     PALIGEMMA = "paligemma"
     FLORENCE_2 = "florence_2"
     QWEN_2_5_VL = "qwen_2_5_vl"
+    DEEPSEEK_VL_2 = "deepseek_vl_2"
 
 
 RESULT_TYPES: Dict[VLM, type] = {
     VLM.PALIGEMMA: str,
     VLM.FLORENCE_2: dict,
     VLM.QWEN_2_5_VL: str,
+    VLM.DEEPSEEK_VL_2: str,
 }
 
 REQUIRED_ARGUMENTS: Dict[VLM, List[str]] = {
     VLM.PALIGEMMA: ["resolution_wh"],
     VLM.FLORENCE_2: ["resolution_wh"],
     VLM.QWEN_2_5_VL: ["input_wh", "resolution_wh"],
+    VLM.DEEPSEEK_VL_2: ["resolution_wh"],
 }
 
 ALLOWED_ARGUMENTS: Dict[VLM, List[str]] = {
     VLM.PALIGEMMA: ["resolution_wh", "classes"],
     VLM.FLORENCE_2: ["resolution_wh"],
     VLM.QWEN_2_5_VL: ["input_wh", "resolution_wh", "classes"],
+    VLM.DEEPSEEK_VL_2: ["resolution_wh", "classes"],
 }
 
 SUPPORTED_TASKS_FLORENCE_2 = [
@@ -223,6 +229,92 @@ def from_qwen_2_5_vl(
     return xyxy, class_id, class_name
 
 
+def from_deepseek_vl_2(
+    result: str,
+    resolution_wh: Tuple[int, int],
+    classes: Optional[List[str]] = None
+) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray]:
+    """
+    Parse bounding boxes from deepseek-vl2-formatted text, scale them to the specified
+    resolution, and optionally filter by classes.
+
+    The DeepSeek-VL2 output typically contains pairs of <|ref|> ... <|/ref|> labels
+    and <|det|> ... <|/det|> bounding box definitions. Each <|det|> section may
+    contain one or more bounding boxes in the form [[x1, y1, x2, y2], [x1, y1, x2, y2], ...]
+    (scaled to 0..999). However, other text (e.g. <｜end▁of▁sentence｜>) may appear
+    after the bracket, so we strip that out here.
+
+    Args:
+        result: String containing deepseek-vl2-formatted locations and labels.
+        resolution_wh: Tuple (width, height) to which we scale the box coordinates.
+        classes: Optional list of valid class names. If provided, boxes and labels not
+            in this list are filtered out.
+
+    Returns:
+        xyxy (np.ndarray): An array of shape `(n, 4)` containing
+            the bounding boxes coordinates in format `[x1, y1, x2, y2]`.
+        class_id (Optional[np.ndarray]): An array of shape `(n,)` containing
+            the class indices for each bounding box (or `None` if classes is not
+            provided).
+        class_name (np.ndarray): An array of shape `(n,)` containing
+            the class labels for each bounding box.
+    """
+
+    w, h = resolution_wh
+    if w <= 0 or h <= 0:
+        raise ValueError(
+            f"Both dimensions in resolution_wh must be positive. Got ({w}, {h})."
+        )
+
+    label_segments = re.findall(r'<\|ref\|>(.*?)<\|/ref\|>', result, flags=re.DOTALL)
+    bbox_segments = re.findall(r'<\|det\|>(.*?)<\|/det\|>', result, flags=re.DOTALL)
+
+    if len(label_segments) != len(bbox_segments):
+        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+
+    boxes, labels = [], []
+
+    for label_str, bbox_str in zip(label_segments, bbox_segments):
+        label_str = label_str.strip()
+        raw_box_groups = re.findall(r'\[\[.*?\]\]', bbox_str, flags=re.DOTALL)
+
+        if not raw_box_groups:
+            continue
+
+        for group_str in raw_box_groups:
+            try:
+                list_of_boxes = ast.literal_eval(group_str)
+                for box in list_of_boxes:
+                    if len(box) != 4:
+                        continue
+
+                    x1 = box[0] / 999.0 * w
+                    y1 = box[1] / 999.0 * h
+                    x2 = box[2] / 999.0 * w
+                    y2 = box[3] / 999.0 * h
+
+                    boxes.append([x1, y1, x2, y2])
+                    labels.append(label_str)
+
+            except (SyntaxError, ValueError):
+                continue
+
+    if len(boxes) == 0:
+        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+
+    xyxy = np.array(boxes, dtype=np.float32)
+    class_name = np.array(labels, dtype=str)
+    class_id = None
+
+    if classes is not None:
+        mask = np.array([name in classes for name in class_name], dtype=bool)
+        xyxy = xyxy[mask]
+        class_name = class_name[mask]
+        class_id = np.array([classes.index(name) for name in class_name])
+
+    return xyxy, class_id, class_name
+
+
 def from_florence_2(
     result: dict, resolution_wh: Tuple[int, int]
 ) -> Tuple[

From 8e845caeeb804b21776f31ebbc4cba9aa3108f1b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 11 Jul 2025 15:44:09 +0000
Subject: [PATCH 002/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py |  3 +--
 supervision/detection/vlm.py  | 12 +++++-------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 06a2f466a4..ca821592b9 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -36,12 +36,12 @@
 from supervision.detection.vlm import (
     LMM,
     VLM,
+    from_deepseek_vl_2,
     from_florence_2,
     from_google_gemini,
     from_paligemma,
     from_qwen_2_5_vl,
     validate_vlm_parameters,
-    from_deepseek_vl_2,
 )
 from supervision.geometry.core import Position
 from supervision.utils.internal import deprecated, get_instance_variables
@@ -902,7 +902,6 @@ def from_lmm(
             LMM.DEEPSEEK_VL_2: VLM.DEEPSEEK_VL_2,
             LMM.GOOGLE_GEMINI_2_0: VLM.GOOGLE_GEMINI_2_0,
             LMM.GOOGLE_GEMINI_2_5: VLM.GOOGLE_GEMINI_2_5,
-
         }
 
         # (this works even if the LMM enum is wrapped by @deprecated)
diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index f8b45d4818..35c0c68e9d 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -1,6 +1,6 @@
+import ast
 import json
 import re
-import ast
 from enum import Enum
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -244,9 +244,7 @@ def from_qwen_2_5_vl(
 
 
 def from_deepseek_vl_2(
-    result: str,
-    resolution_wh: Tuple[int, int],
-    classes: Optional[List[str]] = None
+    result: str, resolution_wh: Tuple[int, int], classes: Optional[List[str]] = None
 ) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray]:
     """
     Parse bounding boxes from deepseek-vl2-formatted text, scale them to the specified
@@ -280,8 +278,8 @@ def from_deepseek_vl_2(
             f"Both dimensions in resolution_wh must be positive. Got ({w}, {h})."
         )
 
-    label_segments = re.findall(r'<\|ref\|>(.*?)<\|/ref\|>', result, flags=re.DOTALL)
-    bbox_segments = re.findall(r'<\|det\|>(.*?)<\|/det\|>', result, flags=re.DOTALL)
+    label_segments = re.findall(r"<\|ref\|>(.*?)<\|/ref\|>", result, flags=re.DOTALL)
+    bbox_segments = re.findall(r"<\|det\|>(.*?)<\|/det\|>", result, flags=re.DOTALL)
 
     if len(label_segments) != len(bbox_segments):
         return np.empty((0, 4)), None, np.empty((0,), dtype=str)
@@ -290,7 +288,7 @@ def from_deepseek_vl_2(
 
     for label_str, bbox_str in zip(label_segments, bbox_segments):
         label_str = label_str.strip()
-        raw_box_groups = re.findall(r'\[\[.*?\]\]', bbox_str, flags=re.DOTALL)
+        raw_box_groups = re.findall(r"\[\[.*?\]\]", bbox_str, flags=re.DOTALL)
 
         if not raw_box_groups:
             continue

From 535cb2a91b2c54bf56704435a96721bc1ce8070f Mon Sep 17 00:00:00 2001
From: Onuralp SEZER <thunderbirdtr@gmail.com>
Date: Fri, 11 Jul 2025 18:49:50 +0300
Subject: [PATCH 003/124] =?UTF-8?q?fix(vlm):=20=F0=9F=93=9D=20correct=20fo?=
 =?UTF-8?q?rmatting=20in=20docstring=20for=20from=5Fdeepseek=5Fvl=5F2=20fu?=
 =?UTF-8?q?nction?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/vlm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 35c0c68e9d..bf5c116b98 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -253,7 +253,7 @@ def from_deepseek_vl_2(
     The DeepSeek-VL2 output typically contains pairs of <|ref|> ... <|/ref|> labels
     and <|det|> ... <|/det|> bounding box definitions. Each <|det|> section may
     contain one or more bounding boxes in the form [[x1, y1, x2, y2], [x1, y1, x2, y2], ...]
-    (scaled to 0..999). However, other text (e.g. <｜end▁of▁sentence｜>) may appear
+    (scaled to 0..999). However, other text (e.g. < | end▁of▁sentence | >) may appear
     after the bracket, so we strip that out here.
 
     Args:
@@ -270,7 +270,7 @@ def from_deepseek_vl_2(
             provided).
         class_name (np.ndarray): An array of shape `(n,)` containing
             the class labels for each bounding box.
-    """
+    """  # noqa: E501
 
     w, h = resolution_wh
     if w <= 0 or h <= 0:

From 1e84351d60434f3cce56bdd57802a88c225b4b84 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 21 Jul 2025 00:32:44 +0000
Subject: [PATCH 004/124] :arrow_up: Bump astral-sh/setup-uv from 6.3.1 to
 6.4.1

Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 6.3.1 to 6.4.1.
- [Release notes](https://github.com/astral-sh/setup-uv/releases)
- [Commits](https://github.com/astral-sh/setup-uv/compare/bd01e18f51369d5a26f1651c3cb451d3417e3bba...7edac99f961f18b581bbd960d59d049f04c0002f)

---
updated-dependencies:
- dependency-name: astral-sh/setup-uv
  dependency-version: 6.4.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/publish-docs.yml        | 2 +-
 .github/workflows/publish-pre-release.yml | 2 +-
 .github/workflows/publish-release.yml     | 2 +-
 .github/workflows/publish-testpypi.yml    | 2 +-
 .github/workflows/test-doc.yml            | 2 +-
 .github/workflows/uv-test.yml             | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml
index 3b05352ef8..57a2d07f9d 100644
--- a/.github/workflows/publish-docs.yml
+++ b/.github/workflows/publish-docs.yml
@@ -34,7 +34,7 @@ jobs:
           fetch-depth: 0
 
       - name: 🐍 Install uv and set Python ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1
+        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-pre-release.yml b/.github/workflows/publish-pre-release.yml
index f6e410eeb0..be0a70f07e 100644
--- a/.github/workflows/publish-pre-release.yml
+++ b/.github/workflows/publish-pre-release.yml
@@ -29,7 +29,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1
+        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml
index 2ba7536f9f..2ed0b22d2d 100644
--- a/.github/workflows/publish-release.yml
+++ b/.github/workflows/publish-release.yml
@@ -27,7 +27,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1
+        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-testpypi.yml b/.github/workflows/publish-testpypi.yml
index 69dd746d13..1d963431c8 100644
--- a/.github/workflows/publish-testpypi.yml
+++ b/.github/workflows/publish-testpypi.yml
@@ -24,7 +24,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1
+        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/test-doc.yml b/.github/workflows/test-doc.yml
index 01930690be..6e069b10e9 100644
--- a/.github/workflows/test-doc.yml
+++ b/.github/workflows/test-doc.yml
@@ -24,7 +24,7 @@ jobs:
           fetch-depth: 0
 
       - name: 🐍 Install uv and set Python ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1
+        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/uv-test.yml b/.github/workflows/uv-test.yml
index 1aa2882ec8..42b673cfeb 100644
--- a/.github/workflows/uv-test.yml
+++ b/.github/workflows/uv-test.yml
@@ -19,7 +19,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1
+        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true

From b1780fbae504c3b102b9eaf1b79fe8eaf5d4335d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 21 Jul 2025 17:29:41 +0000
Subject: [PATCH 005/124] =?UTF-8?q?chore(pre=5Fcommit):=20=E2=AC=86=20pre?=
 =?UTF-8?q?=5Fcommit=20autoupdate?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.12.3 → v0.12.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.12.3...v0.12.4)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 24e432cd54..69fb2a520b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,7 +32,7 @@ repos:
           additional_dependencies: ["bandit[toml]"]
 
   -   repo: https://github.com/astral-sh/ruff-pre-commit
-      rev: v0.12.3
+      rev: v0.12.4
       hooks:
       -   id: ruff
           args: [--fix, --exit-non-zero-on-fix]

From a470bc29769c710b57a453215fe5fe17e48b4382 Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Wed, 23 Jul 2025 18:36:31 +0530
Subject: [PATCH 006/124] chore: make pre-commit happy

---
 supervision/detection/vlm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index f0e79ae48c..24d542b26c 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -317,8 +317,8 @@ def from_qwen_2_5_vl(
 
 
 def from_deepseek_vl_2(
-    result: str, resolution_wh: Tuple[int, int], classes: Optional[List[str]] = None
-) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray]:
+    result: str, resolution_wh: tuple[int, int], classes: list[str] | None = None
+) -> tuple[np.ndarray, np.ndarray | None, np.ndarray]:
     """
     Parse bounding boxes from deepseek-vl2-formatted text, scale them to the specified
     resolution, and optionally filter by classes.

From 76ee05420d9cc0134af453173d18ea5810cd8915 Mon Sep 17 00:00:00 2001
From: rcvsq <rafael@shotquality.com>
Date: Wed, 23 Jul 2025 14:58:09 +0000
Subject: [PATCH 007/124] exposing `box_iou_batch`  in
 `supervision/detection/utils__init__.py`

---
 supervision/detection/utils/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/supervision/detection/utils/__init__.py b/supervision/detection/utils/__init__.py
index e69de29bb2..c4b0f38706 100644
--- a/supervision/detection/utils/__init__.py
+++ b/supervision/detection/utils/__init__.py
@@ -0,0 +1,3 @@
+from supervision.detection.utils.iou_and_nms import box_iou_batch
+
+__all__ = ["box_iou_batch"]

From b3aee43c3be09906355ade76821ea6adf7d42373 Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Thu, 24 Jul 2025 12:55:40 +0530
Subject: [PATCH 008/124] update: from_deepseek_vl_2 logic

---
 supervision/detection/vlm.py | 82 +++++++++++++++---------------------
 1 file changed, 34 insertions(+), 48 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 24d542b26c..ffc0d779aa 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import ast
 import base64
 import io
 import json
@@ -326,8 +325,11 @@ def from_deepseek_vl_2(
     The DeepSeek-VL2 output typically contains pairs of <|ref|> ... <|/ref|> labels
     and <|det|> ... <|/det|> bounding box definitions. Each <|det|> section may
     contain one or more bounding boxes in the form [[x1, y1, x2, y2], [x1, y1, x2, y2], ...]
-    (scaled to 0..999). However, other text (e.g. < | end▁of▁sentence | >) may appear
-    after the bracket, so we strip that out here.
+    (scaled to 0..999). For example:
+
+    ```
+    <|ref|>The giraffe at the back<|/ref|><|det|>[[580, 270, 999, 904]]<|/det|><|ref|>The giraffe at the front<|/ref|><|det|>[[26, 31, 632, 998]]<|/det|><|end▁of▁sentence|>
+    ```
 
     Args:
         result: String containing deepseek-vl2-formatted locations and labels.
@@ -345,59 +347,43 @@ def from_deepseek_vl_2(
             the class labels for each bounding box.
     """  # noqa: E501
 
-    w, h = resolution_wh
-    if w <= 0 or h <= 0:
+    width, height = resolution_wh
+    label_segments = re.findall(r"<\|ref\|>(.*?)<\|/ref\|>", result, flags=re.S)
+    detection_segments = re.findall(r"<\|det\|>(.*?)<\|/det\|>", result, flags=re.S)
+
+    if len(label_segments) != len(detection_segments):
         raise ValueError(
-            f"Both dimensions in resolution_wh must be positive. Got ({w}, {h})."
+            f"Number of ref tags ({len(label_segments)}) "
+            f"and det tags ({len(detection_segments)}) in the result must be equal."
         )
 
-    label_segments = re.findall(r"<\|ref\|>(.*?)<\|/ref\|>", result, flags=re.DOTALL)
-    bbox_segments = re.findall(r"<\|det\|>(.*?)<\|/det\|>", result, flags=re.DOTALL)
-
-    if len(label_segments) != len(bbox_segments):
-        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
-
-    boxes, labels = [], []
-
-    for label_str, bbox_str in zip(label_segments, bbox_segments):
-        label_str = label_str.strip()
-        raw_box_groups = re.findall(r"\[\[.*?\]\]", bbox_str, flags=re.DOTALL)
-
-        if not raw_box_groups:
-            continue
-
-        for group_str in raw_box_groups:
-            try:
-                list_of_boxes = ast.literal_eval(group_str)
-                for box in list_of_boxes:
-                    if len(box) != 4:
-                        continue
-
-                    x1 = box[0] / 999.0 * w
-                    y1 = box[1] / 999.0 * h
-                    x2 = box[2] / 999.0 * w
-                    y2 = box[3] / 999.0 * h
-
-                    boxes.append([x1, y1, x2, y2])
-                    labels.append(label_str)
-
-            except (SyntaxError, ValueError):
-                continue
-
-    if len(boxes) == 0:
-        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+    xyxy, class_names = [], []
+    for label, detection_blob in zip(label_segments, detection_segments):
+        class_name = label.strip()
+        for box in re.findall(r"\[(.*?)\]", detection_blob):
+            x1, y1, x2, y2 = map(float, box.strip("[]").split(","))
+            xyxy.append(
+                [
+                    int(x1 / 999 * width),
+                    int(y1 / 999 * height),
+                    int(x2 / 999 * width),
+                    int(y2 / 999 * height),
+                ]
+            )
+            class_names.append(class_name)
 
-    xyxy = np.array(boxes, dtype=np.float32)
-    class_name = np.array(labels, dtype=str)
-    class_id = None
+    xyxy = np.array(xyxy)
+    class_names = np.array(class_names)
 
     if classes is not None:
-        mask = np.array([name in classes for name in class_name], dtype=bool)
+        mask = np.array([name in classes for name in class_names], dtype=bool)
         xyxy = xyxy[mask]
-        class_name = class_name[mask]
-        class_id = np.array([classes.index(name) for name in class_name])
+        class_names = class_names[mask]
+        class_id = np.array([classes.index(name) for name in class_names])
+    else:
+        class_id = np.array(list(range(len(class_names))))
 
-    return xyxy, class_id, class_name
+    return xyxy, class_id, class_names
 
 
 def from_florence_2(

From cc1525707aaa5bf81c9ae076a1f44c9118a40bec Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Thu, 24 Jul 2025 13:11:06 +0530
Subject: [PATCH 009/124] update: docstring for Detections.from_vlm

---
 supervision/detection/core.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index de665d285a..0233ee3f9d 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1166,6 +1166,7 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
         | Google Gemini 2.0   | `GOOGLE_GEMINI_2_0`  | detection               | `resolution_wh`             | `classes`           |
         | Google Gemini 2.5   | `GOOGLE_GEMINI_2_5`  | detection, segmentation | `resolution_wh`             | `classes`           |
         | Moondream           | `MOONDREAM`          | detection               | `resolution_wh`             |                     |
+        | DeepSeek-VL2        | `DEEPSEEK_VL_2`      | detection               | `resolution_wh`             | `classes`           |
 
         Args:
             vlm (Union[VLM, str]): The type of VLM (Vision Language Model) to use.
@@ -1454,6 +1455,29 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             #        [1908.01, 1346.67, 2585.99, 2024.11]])
             ```
 
+        !!! example "DeepSeek-VL2"
+
+            ```python
+            from PIL import Image
+            import supervision as sv
+
+            deepseek_vl2_result = "<|ref|>The giraffe at the back<|/ref|><|det|>[[580, 270, 999, 904]]<|/det|><|ref|>The giraffe at the front<|/ref|><|det|>[[26, 31, 632, 998]]<|/det|><|end▁of▁sentence|>"
+
+            detections = sv.Detections.from_vlm(
+                vlm=sv.VLM.DEEPSEEK_VL_2, result=deepseek_vl2_result, resolution_wh=image.size
+            )
+
+            detections.xyxy
+            # array([[ 420,  293,  724,  982],
+            #        [  18,   33,  458, 1084]])
+
+            detections.class_id
+            # array([0, 1])
+
+            detections.data
+            # {'class_name': array(['The giraffe at the back', 'The giraffe at the front'], dtype='<U24')}
+            ```
+
         """  # noqa: E501
 
         vlm = validate_vlm_parameters(vlm, result, kwargs)

From da70389546befb8dcd1757955d3af24cdc2db7fd Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Thu, 24 Jul 2025 14:23:24 +0530
Subject: [PATCH 010/124] add: prompt engineering tip for deepseek-vl2

---
 supervision/detection/core.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 0233ee3f9d..65429fd681 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -831,6 +831,7 @@ def from_lmm(cls, lmm: LMM | str, result: str | dict, **kwargs: Any) -> Detectio
         | Google Gemini 2.0   | `GOOGLE_GEMINI_2_0`  | detection               | `resolution_wh`             | `classes`           |
         | Google Gemini 2.5   | `GOOGLE_GEMINI_2_5`  | detection, segmentation | `resolution_wh`             | `classes`           |
         | Moondream           | `MOONDREAM`          | detection               | `resolution_wh`             |                     |
+        | DeepSeek-VL2        | `DEEPSEEK_VL_2`      | detection               | `resolution_wh`             | `classes`           |
 
         Args:
             lmm (Union[LMM, str]): The type of LMM (Large Multimodal Model) to use.
@@ -1457,6 +1458,24 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
 
         !!! example "DeepSeek-VL2"
 
+
+            ??? tip "Prompt engineering"
+
+                To get the best results from DeepSeek-VL2, use optimized prompts that leverage
+                its object detection and visual grounding capabilities effectively.
+
+                **For general object detection, use the following user prompt:**
+
+                ```
+                <image>\\n<|ref|>The giraffe at the front<|/ref|>
+                ```
+
+                **For visual grounding, use the following user prompt:**
+
+                ```
+                <image>\\n<|grounding|>Detect the giraffes
+                ```
+
             ```python
             from PIL import Image
             import supervision as sv

From 8fa21dd165a99f34cbc24c60d127ca3a5881303f Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Thu, 24 Jul 2025 15:04:59 +0530
Subject: [PATCH 011/124] update: from_deepseek_vl_2

---
 supervision/detection/vlm.py | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index ffc0d779aa..71207554e4 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -357,33 +357,35 @@ def from_deepseek_vl_2(
             f"and det tags ({len(detection_segments)}) in the result must be equal."
         )
 
-    xyxy, class_names = [], []
+    xyxy, class_name_list = [], []
     for label, detection_blob in zip(label_segments, detection_segments):
-        class_name = label.strip()
+        current_class_name = label.strip()
         for box in re.findall(r"\[(.*?)\]", detection_blob):
             x1, y1, x2, y2 = map(float, box.strip("[]").split(","))
             xyxy.append(
                 [
-                    int(x1 / 999 * width),
-                    int(y1 / 999 * height),
-                    int(x2 / 999 * width),
-                    int(y2 / 999 * height),
+                    (x1 / 999 * width),
+                    (y1 / 999 * height),
+                    (x2 / 999 * width),
+                    (y2 / 999 * height),
                 ]
             )
-            class_names.append(class_name)
+            class_name_list.append(current_class_name)
 
-    xyxy = np.array(xyxy)
-    class_names = np.array(class_names)
+    xyxy = np.array(xyxy, dtype=np.float32)
+    class_name = np.array(class_name_list)
 
     if classes is not None:
-        mask = np.array([name in classes for name in class_names], dtype=bool)
+        mask = np.array([name in classes for name in class_name], dtype=bool)
         xyxy = xyxy[mask]
-        class_names = class_names[mask]
-        class_id = np.array([classes.index(name) for name in class_names])
+        class_name = class_name[mask]
+        class_id = np.array([classes.index(name) for name in class_name])
     else:
-        class_id = np.array(list(range(len(class_names))))
+        unique_classes = sorted(list(set(class_name)))
+        class_to_id = {name: i for i, name in enumerate(unique_classes)}
+        class_id = np.array([class_to_id[name] for name in class_name])
 
-    return xyxy, class_id, class_names
+    return xyxy, class_id, class_name
 
 
 def from_florence_2(

From 137078602ba2a1eb845c4131e62f68bc2a6a0cbb Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Thu, 24 Jul 2025 15:17:05 +0530
Subject: [PATCH 012/124] add: tests

---
 test/detection/test_vlm.py | 110 +++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/test/detection/test_vlm.py b/test/detection/test_vlm.py
index 1b4c2f1894..8a8240e98f 100644
--- a/test/detection/test_vlm.py
+++ b/test/detection/test_vlm.py
@@ -6,7 +6,10 @@
 import numpy as np
 import pytest
 
+from supervision.config import CLASS_NAME_DATA_FIELD
+from supervision.detection.core import Detections
 from supervision.detection.vlm import (
+    VLM,
     from_florence_2,
     from_google_gemini_2_0,
     from_google_gemini_2_5,
@@ -1122,3 +1125,110 @@ def test_from_google_gemini_2_5(
             assert masks is not None
             assert masks.shape == expected_results[4].shape
             assert np.array_equal(masks, expected_results[4])
+
+
+@pytest.mark.parametrize(
+    "exception, result, resolution_wh, classes, expected_detections",
+    [
+        (
+            pytest.raises(ValueError),
+            "",
+            (100, 100),
+            None,
+            None,
+        ),  # empty text
+        (
+            pytest.raises(ValueError),
+            "random text",
+            (100, 100),
+            None,
+            None,
+        ),  # random text
+        (
+            does_not_raise(),
+            "<|ref|>cat<|/ref|><|det|>[[100, 200, 300, 400]]<|/det|>",
+            (1000, 1000),
+            None,
+            Detections(
+                xyxy=np.array([[100.1, 200.2, 300.3, 400.4]]),
+                class_id=np.array([0]),
+                data={CLASS_NAME_DATA_FIELD: np.array(["cat"])},
+            ),
+        ),  # single box, no classes
+        (
+            does_not_raise(),
+            "<|ref|>cat<|/ref|><|det|>[[100, 200, 300, 400]]<|/det|>",
+            (1000, 1000),
+            ["cat", "dog"],
+            Detections(
+                xyxy=np.array([[100.1, 200.2, 300.3, 400.4]]),
+                class_id=np.array([0]),
+                data={CLASS_NAME_DATA_FIELD: np.array(["cat"])},
+            ),
+        ),  # single box, with classes
+        (
+            does_not_raise(),
+            "<|ref|>person<|/ref|><|det|>[[100, 200, 300, 400]]<|/det|>",
+            (1000, 1000),
+            ["cat", "dog"],
+            Detections.empty(),
+        ),  # single box, wrong class
+        (
+            does_not_raise(),
+            (
+                "<|ref|>cat<|/ref|><|det|>[[100, 200, 300, 400]]<|/det|>"
+                "<|ref|>dog<|/ref|><|det|>[[500, 600, 700, 800]]<|/det|>"
+            ),
+            (1000, 1000),
+            ["cat"],
+            Detections(
+                xyxy=np.array([[100.1, 200.2, 300.3, 400.4]]),
+                class_id=np.array([0]),
+                data={CLASS_NAME_DATA_FIELD: np.array(["cat"])},
+            ),
+        ),  # multiple boxes, one class correct
+        (
+            pytest.raises(ValueError),
+            "<|ref|>cat<|/ref|>",
+            (100, 100),
+            None,
+            None,
+        ),  # only ref
+        (
+            pytest.raises(ValueError),
+            "<|det|>[[100, 200, 300, 400]]<|/det|>",
+            (100, 100),
+            None,
+            None,
+        ),  # only det
+    ],
+)
+def test_from_deepseek_vl_2(
+    exception,
+    result: str,
+    resolution_wh: tuple[int, int],
+    classes: list[str] | None,
+    expected_detections: Detections,
+):
+    with exception:
+        detections = Detections.from_vlm(
+            vlm=VLM.DEEPSEEK_VL_2,
+            result=result,
+            resolution_wh=resolution_wh,
+            classes=classes,
+        )
+
+        if expected_detections is None:
+            return
+
+        assert len(detections) == len(expected_detections)
+
+        if len(detections) == 0:
+            return
+
+        assert np.allclose(detections.xyxy, expected_detections.xyxy, atol=1e-1)
+        assert np.array_equal(detections.class_id, expected_detections.class_id)
+        assert np.array_equal(
+            detections.data[CLASS_NAME_DATA_FIELD],
+            expected_detections.data[CLASS_NAME_DATA_FIELD],
+        )

From 72fe0bf47f7330e07932f0541b99b49a6fcdd8fd Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 24 Jul 2025 15:05:19 +0200
Subject: [PATCH 013/124] initial version of `edit_distance` and
 `fuzzy_match_index` that will allow to match faulty class name generated by
 VLMs with exact class names

---
 docs/detection/utils/vlms.md                  |  18 +++
 mkdocs.yml                                    |   1 +
 supervision/__init__.py                       |   3 +
 supervision/detection/utils/vlms.py           | 106 ++++++++++++++++
 supervision/metrics/mean_average_precision.py |  12 +-
 test/detection/utils/test_vlms.py             | 120 ++++++++++++++++++
 6 files changed, 254 insertions(+), 6 deletions(-)
 create mode 100644 docs/detection/utils/vlms.md
 create mode 100644 supervision/detection/utils/vlms.py
 create mode 100644 test/detection/utils/test_vlms.py

diff --git a/docs/detection/utils/vlms.md b/docs/detection/utils/vlms.md
new file mode 100644
index 0000000000..2706d2a837
--- /dev/null
+++ b/docs/detection/utils/vlms.md
@@ -0,0 +1,18 @@
+---
+comments: true
+status: new
+---
+
+# VLMs Utils
+
+<div class="md-typeset">
+  <h2><a href="#supervision.detection.utils.vlms.edit_distance">edit_distance</a></h2>
+</div>
+
+:::supervision.detection.utils.vlms.edit_distance
+
+<div class="md-typeset">
+  <h2><a href="#supervision.detection.utils.vlms.fuzzy_match_index">fuzzy_match_index</a></h2>
+</div>
+
+:::supervision.detection.utils.vlms.fuzzy_match_index
diff --git a/mkdocs.yml b/mkdocs.yml
index f25015348a..394d5ddd3b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -47,6 +47,7 @@ nav:
           - Boxes: detection/utils/boxes.md
           - Masks: detection/utils/masks.md
           - Polygons: detection/utils/polygons.md
+          - VLMs: detection/utils/vlms.md
       - Keypoint Detection:
           - Core: keypoint/core.md
           - Annotators: keypoint/annotators.md
diff --git a/supervision/__init__.py b/supervision/__init__.py
index 925628be3c..0a80113dec 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -51,6 +51,7 @@
 from supervision.detection.tools.json_sink import JSONSink
 from supervision.detection.tools.polygon_zone import PolygonZone, PolygonZoneAnnotator
 from supervision.detection.tools.smoother import DetectionsSmoother
+from supervision.detection.utils.vlms import edit_distance, fuzzy_match_index
 from supervision.detection.utils.boxes import (
     clip_boxes,
     denormalize_boxes,
@@ -215,7 +216,9 @@
     "draw_polygon",
     "draw_rectangle",
     "draw_text",
+    "edit_distance",
     "filter_polygons_by_area",
+    "fuzzy_match_index",
     "get_coco_class_index_mapping",
     "get_polygon_center",
     "get_video_frames_generator",
diff --git a/supervision/detection/utils/vlms.py b/supervision/detection/utils/vlms.py
new file mode 100644
index 0000000000..94af747649
--- /dev/null
+++ b/supervision/detection/utils/vlms.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+
+def edit_distance(string_1: str, string_2: str, case_sensitive: bool = True) -> int:
+    """
+    Calculates the minimum number of single-character edits required
+    to transform one string into another. Allowed operations are insertion,
+    deletion, and substitution.
+
+    Args:
+        string_1 (str): The source string to be transformed.
+        string_2 (str): The target string to transform into.
+        case_sensitive (bool, optional): Whether comparison should be case-sensitive.
+            Defaults to True.
+
+    Returns:
+        int: The minimum number of edits required to convert `string_1`
+        into `string_2`.
+
+    Examples:
+        ```python
+        import supervision as sv
+
+        sv.edit_distance("hello", "hello")
+        # 0
+
+        sv.edit_distance("Test", "test", case_sensitive=True)
+        # 1
+
+        sv.edit_distance("abc", "xyz")
+        # 3
+
+        sv.edit_distance("hello", "")
+        # 5
+
+        sv.edit_distance("", "")
+        # 0
+
+        sv.edit_distance("hello world", "helloworld")
+        # 1
+        ```
+    """
+    if not case_sensitive:
+        string_1 = string_1.lower()
+        string_2 = string_2.lower()
+
+    length_1 = len(string_1)
+    length_2 = len(string_2)
+    distance_matrix = [[0] * (length_2 + 1) for _ in range(length_1 + 1)]
+
+    for i in range(length_1 + 1):
+        distance_matrix[i][0] = i
+    for j in range(length_2 + 1):
+        distance_matrix[0][j] = j
+
+    for i in range(1, length_1 + 1):
+        for j in range(1, length_2 + 1):
+            if string_1[i - 1] == string_2[j - 1]:
+                substitution_cost = 0
+            else:
+                substitution_cost = 1
+            distance_matrix[i][j] = min(
+                distance_matrix[i - 1][j] + 1,
+                distance_matrix[i][j - 1] + 1,
+                distance_matrix[i - 1][j - 1] + substitution_cost
+            )
+
+    return distance_matrix[length_1][length_2]
+
+
+def fuzzy_match_index(
+    candidates: list[str],
+    query: str,
+    threshold: int,
+    case_sensitive: bool = True,
+) -> int | None:
+    """
+    Searches for the first string in `candidates` whose edit distance
+    to `query` is less than or equal to `threshold`.
+
+    Args:
+        candidates (list[str]): List of strings to search.
+        query (str): String to compare against the candidates.
+        threshold (int): Maximum allowed edit distance for a match.
+        case_sensitive (bool, optional): Whether matching should be case-sensitive.
+
+    Returns:
+        Optional[int]: Index of the first matching string in candidates,
+            or None if no match is found.
+
+    Examples:
+        ```python
+        fuzzy_match_index(["cat", "dog", "rat"], "dat", threshold=1)
+        # 0
+
+        fuzzy_match_index(["alpha", "beta", "gamma"], "bata", threshold=1)
+        # 1
+
+        fuzzy_match_index(["one", "two", "three"], "ten", threshold=2)
+        # None
+        ```
+    """
+    for idx, candidate in enumerate(candidates):
+        if edit_distance(candidate, query, case_sensitive=case_sensitive) <= threshold:
+            return idx
+    return None
\ No newline at end of file
diff --git a/supervision/metrics/mean_average_precision.py b/supervision/metrics/mean_average_precision.py
index 43a3116ac2..6967361702 100644
--- a/supervision/metrics/mean_average_precision.py
+++ b/supervision/metrics/mean_average_precision.py
@@ -102,12 +102,12 @@ def __str__(self) -> str:
             f"maxDets=100 ] = {self.map50:.3f}\n"
             f"Average Precision (AP) @[ IoU=0.75      | area=   all | "
             f"maxDets=100 ] = {self.map75:.3f}\n"
-            f"Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] "
-            f"= {self.small_objects.map50_95:.3f}\n"
-            f"Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] \
-                = {self.medium_objects.map50_95:.3f}\n"
-            f"Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] \
-                = {self.large_objects.map50_95:.3f}"
+            f"Average Precision (AP) @[ IoU=0.50:0.95 | area= small | "
+            f"maxDets=100 ] = {self.small_objects.map50_95:.3f}\n"
+            f"Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | "
+            f"maxDets=100 ] = {self.medium_objects.map50_95:.3f}\n"
+            f"Average Precision (AP) @[ IoU=0.50:0.95 | area= large | "
+            f"maxDets=100 ] = {self.large_objects.map50_95:.3f}"
         )
 
     def to_pandas(self) -> pd.DataFrame:
diff --git a/test/detection/utils/test_vlms.py b/test/detection/utils/test_vlms.py
new file mode 100644
index 0000000000..6365cf8a65
--- /dev/null
+++ b/test/detection/utils/test_vlms.py
@@ -0,0 +1,120 @@
+import pytest
+
+from supervision.detection.utils.vlms import edit_distance, fuzzy_match_index
+
+@pytest.mark.parametrize(
+    "string_1, string_2, case_sensitive, expected_result",
+    [
+        # identical strings, various cases
+        ("hello", "hello", True, 0),
+        ("hello", "hello", False, 0),
+
+        # case sensitive vs insensitive
+        ("Test", "test", True, 1),
+        ("Test", "test", False, 0),
+        ("CASE", "case", True, 4),
+        ("CASE", "case", False, 0),
+
+        # completely different
+        ("abc", "xyz", True, 3),
+        ("abc", "xyz", False, 3),
+
+        # one string empty
+        ("hello", "", True, 5),
+        ("", "world", True, 5),
+
+        # single character cases
+        ("a", "b", True, 1),
+        ("A", "a", True, 1),
+        ("A", "a", False, 0),
+
+        # whitespaces
+        ("hello world", "helloworld", True, 1),
+        ("test", " test", True, 1),
+
+        # unicode and emoji
+        ("😊", "😊", True, 0),
+        ("😊", "😢", True, 1),
+
+        # long string vs empty
+        ("a" * 100, "", True, 100),
+        ("", "b" * 100, True, 100),
+
+        # prefix/suffix
+        ("prefix", "prefixes", True, 2),
+        ("suffix", "asuffix", True, 1),
+
+        # leading/trailing whitespace
+        (" hello", "hello", True, 1),
+        ("hello", "hello ", True, 1),
+
+        # long almost-equal string
+        (
+            "The quick brown fox jumps over the lazy dog",
+            "The quick brown fox jumps over the lazy cog",
+            True,
+            1
+        ),
+        (
+            "The quick brown fox jumps over the lazy dog",
+            "The quick brown fox jumps over the lazy cog",
+            False,
+            1
+        ),
+
+        # both empty
+        ("", "", True, 0),
+        ("", "", False, 0),
+
+        # mixed case with symbols
+        ("123ABC!", "123abc!", True, 3),
+        ("123ABC!", "123abc!", False, 0),
+    ],
+)
+def test_edit_distance(string_1, string_2, case_sensitive, expected_result):
+    assert edit_distance(string_1, string_2, case_sensitive=case_sensitive) == expected_result
+
+
+@pytest.mark.parametrize(
+    "candidates, query, threshold, case_sensitive, expected_result",
+    [
+        # exact match at index 0
+        (["cat", "dog", "rat"], "cat", 0, True, 0),
+        # match at index 2 within threshold
+        (["cat", "dog", "rat"], "dat", 1, True, 2),
+        # no match due to high threshold
+        (["cat", "dog", "rat"], "bat", 0, True, None),
+        # multiple possible matches, returns first
+        (["apple", "apply", "appla"], "apple", 1, True, 0),
+        # case-insensitive match
+        (["Alpha", "beta", "Gamma"], "alpha", 0, False, 0),
+        # case-sensitive: no match
+        (["Alpha", "beta", "Gamma"], "alpha", 0, True, None),
+        # threshold boundary
+        (["alpha", "beta", "gamma"], "bata", 1, True, 1),
+        # no match (all distances too high)
+        (["one", "two", "three"], "ten", 1, True, None),
+        # unicode/emoji match
+        (["😊", "😢", "😁"], "😄", 1, True, None),
+        (["😊", "😢", "😁"], "😊", 0, True, 0),
+        # empty candidates
+        ([], "any", 2, True, None),
+        # empty query, non-empty candidates
+        (["", "abc"], "", 0, True, 0),
+        (["", "abc"], "", 1, True, 0),
+        (["a", "b", "c"], "", 1, True, 0),
+        # non-empty query, empty candidate
+        (["", ""], "a", 1, True, 0),
+        # all candidates require higher edit than threshold
+        (["short", "words", "only"], "longerword", 2, True, None),
+        # repeated candidates
+        (["a", "a", "a"], "b", 1, True, 0),
+    ]
+)
+def test_fuzzy_match_index(candidates, query, threshold, case_sensitive, expected_result):
+    assert fuzzy_match_index(
+        candidates=candidates,
+        query=query,
+        threshold=threshold,
+        case_sensitive=case_sensitive
+    ) == expected_result

From 8262b6e64ba44e4312bc275ed04ce8cf63d8c873 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 24 Jul 2025 13:06:42 +0000
Subject: [PATCH 014/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py             |  2 +-
 supervision/detection/utils/vlms.py |  4 +--
 test/detection/utils/test_vlms.py   | 43 ++++++++++++++---------------
 3 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 0a80113dec..cc54fe7098 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -51,7 +51,6 @@
 from supervision.detection.tools.json_sink import JSONSink
 from supervision.detection.tools.polygon_zone import PolygonZone, PolygonZoneAnnotator
 from supervision.detection.tools.smoother import DetectionsSmoother
-from supervision.detection.utils.vlms import edit_distance, fuzzy_match_index
 from supervision.detection.utils.boxes import (
     clip_boxes,
     denormalize_boxes,
@@ -93,6 +92,7 @@
     approximate_polygon,
     filter_polygons_by_area,
 )
+from supervision.detection.utils.vlms import edit_distance, fuzzy_match_index
 from supervision.detection.vlm import LMM, VLM
 from supervision.draw.color import Color, ColorPalette
 from supervision.draw.utils import (
diff --git a/supervision/detection/utils/vlms.py b/supervision/detection/utils/vlms.py
index 94af747649..9022b0619b 100644
--- a/supervision/detection/utils/vlms.py
+++ b/supervision/detection/utils/vlms.py
@@ -62,7 +62,7 @@ def edit_distance(string_1: str, string_2: str, case_sensitive: bool = True) ->
             distance_matrix[i][j] = min(
                 distance_matrix[i - 1][j] + 1,
                 distance_matrix[i][j - 1] + 1,
-                distance_matrix[i - 1][j - 1] + substitution_cost
+                distance_matrix[i - 1][j - 1] + substitution_cost,
             )
 
     return distance_matrix[length_1][length_2]
@@ -103,4 +103,4 @@ def fuzzy_match_index(
     for idx, candidate in enumerate(candidates):
         if edit_distance(candidate, query, case_sensitive=case_sensitive) <= threshold:
             return idx
-    return None
\ No newline at end of file
+    return None
diff --git a/test/detection/utils/test_vlms.py b/test/detection/utils/test_vlms.py
index 6365cf8a65..76310a227e 100644
--- a/test/detection/utils/test_vlms.py
+++ b/test/detection/utils/test_vlms.py
@@ -2,77 +2,69 @@
 
 from supervision.detection.utils.vlms import edit_distance, fuzzy_match_index
 
+
 @pytest.mark.parametrize(
     "string_1, string_2, case_sensitive, expected_result",
     [
         # identical strings, various cases
         ("hello", "hello", True, 0),
         ("hello", "hello", False, 0),
-
         # case sensitive vs insensitive
         ("Test", "test", True, 1),
         ("Test", "test", False, 0),
         ("CASE", "case", True, 4),
         ("CASE", "case", False, 0),
-
         # completely different
         ("abc", "xyz", True, 3),
         ("abc", "xyz", False, 3),
-
         # one string empty
         ("hello", "", True, 5),
         ("", "world", True, 5),
-
         # single character cases
         ("a", "b", True, 1),
         ("A", "a", True, 1),
         ("A", "a", False, 0),
-
         # whitespaces
         ("hello world", "helloworld", True, 1),
         ("test", " test", True, 1),
-
         # unicode and emoji
         ("😊", "😊", True, 0),
         ("😊", "😢", True, 1),
-
         # long string vs empty
         ("a" * 100, "", True, 100),
         ("", "b" * 100, True, 100),
-
         # prefix/suffix
         ("prefix", "prefixes", True, 2),
         ("suffix", "asuffix", True, 1),
-
         # leading/trailing whitespace
         (" hello", "hello", True, 1),
         ("hello", "hello ", True, 1),
-
         # long almost-equal string
         (
             "The quick brown fox jumps over the lazy dog",
             "The quick brown fox jumps over the lazy cog",
             True,
-            1
+            1,
         ),
         (
             "The quick brown fox jumps over the lazy dog",
             "The quick brown fox jumps over the lazy cog",
             False,
-            1
+            1,
         ),
-
         # both empty
         ("", "", True, 0),
         ("", "", False, 0),
-
         # mixed case with symbols
         ("123ABC!", "123abc!", True, 3),
         ("123ABC!", "123abc!", False, 0),
     ],
 )
 def test_edit_distance(string_1, string_2, case_sensitive, expected_result):
-    assert edit_distance(string_1, string_2, case_sensitive=case_sensitive) == expected_result
+    assert (
+        edit_distance(string_1, string_2, case_sensitive=case_sensitive)
+        == expected_result
+    )
 
 
 @pytest.mark.parametrize(
@@ -109,12 +101,17 @@ def test_edit_distance(string_1, string_2, case_sensitive, expected_result):
         (["short", "words", "only"], "longerword", 2, True, None),
         # repeated candidates
         (["a", "a", "a"], "b", 1, True, 0),
-    ]
+    ],
 )
-def test_fuzzy_match_index(candidates, query, threshold, case_sensitive, expected_result):
-    assert fuzzy_match_index(
-        candidates=candidates,
-        query=query,
-        threshold=threshold,
-        case_sensitive=case_sensitive
-    ) == expected_result
+def test_fuzzy_match_index(
+    candidates, query, threshold, case_sensitive, expected_result
+):
+    assert (
+        fuzzy_match_index(
+            candidates=candidates,
+            query=query,
+            threshold=threshold,
+            case_sensitive=case_sensitive,
+        )
+        == expected_result
+    )

From 58bdce57076fe2c2c72d661aac46a42849a52a2f Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 24 Jul 2025 15:13:46 +0200
Subject: [PATCH 015/124] tests fixed

---
 test/detection/utils/test_vlms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/detection/utils/test_vlms.py b/test/detection/utils/test_vlms.py
index 76310a227e..a6fe649b73 100644
--- a/test/detection/utils/test_vlms.py
+++ b/test/detection/utils/test_vlms.py
@@ -73,7 +73,7 @@ def test_edit_distance(string_1, string_2, case_sensitive, expected_result):
         # exact match at index 0
         (["cat", "dog", "rat"], "cat", 0, True, 0),
         # match at index 2 within threshold
-        (["cat", "dog", "rat"], "dat", 1, True, 2),
+        (["cat", "dog", "rat"], "dat", 1, True, 0),
         # no match due to high threshold
         (["cat", "dog", "rat"], "bat", 0, True, None),
         # multiple possible matches, returns first
@@ -87,7 +87,7 @@ def test_edit_distance(string_1, string_2, case_sensitive, expected_result):
         # no match (all distances too high)
         (["one", "two", "three"], "ten", 1, True, None),
         # unicode/emoji match
-        (["😊", "😢", "😁"], "😄", 1, True, None),
+        (["😊", "😢", "😁"], "😄", 1, True, 0),
         (["😊", "😢", "😁"], "😊", 0, True, 0),
         # empty candidates
         ([], "any", 2, True, None),

From ec189bf38895a83c92f2ea6ddc9fb665a3f26531 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 25 Jul 2025 13:41:03 +0200
Subject: [PATCH 016/124] new, more memory efficient `edit_distance`
 implementation

---
 supervision/detection/utils/vlms.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/supervision/detection/utils/vlms.py b/supervision/detection/utils/vlms.py
index 9022b0619b..c284ac8d3b 100644
--- a/supervision/detection/utils/vlms.py
+++ b/supervision/detection/utils/vlms.py
@@ -44,28 +44,27 @@ def edit_distance(string_1: str, string_2: str, case_sensitive: bool = True) ->
         string_1 = string_1.lower()
         string_2 = string_2.lower()
 
-    length_1 = len(string_1)
-    length_2 = len(string_2)
-    distance_matrix = [[0] * (length_2 + 1) for _ in range(length_1 + 1)]
+    if len(string_1) < len(string_2):
+        string_1, string_2 = string_2, string_1
 
-    for i in range(length_1 + 1):
-        distance_matrix[i][0] = i
-    for j in range(length_2 + 1):
-        distance_matrix[0][j] = j
+    prev_row = list(range(len(string_2) + 1))
+    curr_row = [0] * (len(string_2) + 1)
 
-    for i in range(1, length_1 + 1):
-        for j in range(1, length_2 + 1):
+    for i in range(1, len(string_1) + 1):
+        curr_row[0] = i
+        for j in range(1, len(string_2) + 1):
             if string_1[i - 1] == string_2[j - 1]:
                 substitution_cost = 0
             else:
                 substitution_cost = 1
-            distance_matrix[i][j] = min(
-                distance_matrix[i - 1][j] + 1,
-                distance_matrix[i][j - 1] + 1,
-                distance_matrix[i - 1][j - 1] + substitution_cost,
+            curr_row[j] = min(
+                prev_row[j] + 1,
+                curr_row[j - 1] + 1,
+                prev_row[j - 1] + substitution_cost
             )
+        prev_row, curr_row = curr_row, prev_row
 
-    return distance_matrix[length_1][length_2]
+    return prev_row[len(string_2)]
 
 
 def fuzzy_match_index(

From 00817df9e23a12ca5a9f8462864c4f916654feab Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 25 Jul 2025 11:41:24 +0000
Subject: [PATCH 017/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/utils/vlms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/detection/utils/vlms.py b/supervision/detection/utils/vlms.py
index c284ac8d3b..8c4aca74aa 100644
--- a/supervision/detection/utils/vlms.py
+++ b/supervision/detection/utils/vlms.py
@@ -60,7 +60,7 @@ def edit_distance(string_1: str, string_2: str, case_sensitive: bool = True) ->
             curr_row[j] = min(
                 prev_row[j] + 1,
                 curr_row[j - 1] + 1,
-                prev_row[j - 1] + substitution_cost
+                prev_row[j - 1] + substitution_cost,
             )
         prev_row, curr_row = curr_row, prev_row
 

From bc1d73a30721471472e3e06ec03cac1f419e02c1 Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Fri, 25 Jul 2025 20:13:28 +0530
Subject: [PATCH 018/124] chore: address feedback

---
 supervision/detection/core.py | 41 +++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 65429fd681..3865f5d723 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1119,6 +1119,47 @@ def from_lmm(cls, lmm: LMM | str, result: str | dict, **kwargs: Any) -> Detectio
             # array([[1752.28,  818.82, 2165.72, 1229.14],
             #        [1908.01, 1346.67, 2585.99, 2024.11]])
             ```
+        
+        !!! example "DeepSeek-VL2"
+
+
+            ??? tip "Prompt engineering"
+
+                To get the best results from DeepSeek-VL2, use optimized prompts that leverage
+                its object detection and visual grounding capabilities effectively.
+
+                **For general object detection, use the following user prompt:**
+
+                ```
+                <image>\\n<|ref|>The giraffe at the front<|/ref|>
+                ```
+
+                **For visual grounding, use the following user prompt:**
+
+                ```
+                <image>\\n<|grounding|>Detect the giraffes
+                ```
+
+            ```python
+            from PIL import Image
+            import supervision as sv
+
+            deepseek_vl2_result = "<|ref|>The giraffe at the back<|/ref|><|det|>[[580, 270, 999, 904]]<|/det|><|ref|>The giraffe at the front<|/ref|><|det|>[[26, 31, 632, 998]]<|/det|><|end▁of▁sentence|>"
+
+            detections = sv.Detections.from_vlm(
+                vlm=sv.VLM.DEEPSEEK_VL_2, result=deepseek_vl2_result, resolution_wh=image.size
+            )
+
+            detections.xyxy
+            # array([[ 420,  293,  724,  982],
+            #        [  18,   33,  458, 1084]])
+
+            detections.class_id
+            # array([0, 1])
+
+            detections.data
+            # {'class_name': array(['The giraffe at the back', 'The giraffe at the front'], dtype='<U24')}
+            ```
         """  # noqa: E501
 
         # filler logic mapping old from_lmm to new from_vlm

From 8ca2e9fe8d3a0b15a6788f6c2e082230e4bc8542 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 25 Jul 2025 14:44:10 +0000
Subject: [PATCH 019/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 3865f5d723..ffe5ed3fca 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1119,7 +1119,7 @@ def from_lmm(cls, lmm: LMM | str, result: str | dict, **kwargs: Any) -> Detectio
             # array([[1752.28,  818.82, 2165.72, 1229.14],
             #        [1908.01, 1346.67, 2585.99, 2024.11]])
             ```
-        
+
         !!! example "DeepSeek-VL2"
 
 

From 036a9564732c274cc630ef245f7c03e73e409fff Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 26 Jul 2025 17:46:04 +0200
Subject: [PATCH 020/124] fix for #1911

---
 examples/time_in_zone/requirements.txt                 | 4 +++-
 examples/time_in_zone/scripts/download_from_youtube.py | 2 +-
 supervision/annotators/utils.py                        | 6 +++++-
 test/annotators/test_utils.py                          | 2 +-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/examples/time_in_zone/requirements.txt b/examples/time_in_zone/requirements.txt
index 9f9446c836..b5ff1911b6 100644
--- a/examples/time_in_zone/requirements.txt
+++ b/examples/time_in_zone/requirements.txt
@@ -1,4 +1,6 @@
 supervision
 ultralytics
 inference
-pytube
+# https://github.com/pytube/pytube/issues/2044
+# pytube
+pytubefix
diff --git a/examples/time_in_zone/scripts/download_from_youtube.py b/examples/time_in_zone/scripts/download_from_youtube.py
index d867363175..b740095573 100644
--- a/examples/time_in_zone/scripts/download_from_youtube.py
+++ b/examples/time_in_zone/scripts/download_from_youtube.py
@@ -3,7 +3,7 @@
 import argparse
 import os
 
-from pytube import YouTube
+from pytubefix import YouTube
 
 
 def main(url: str, output_path: str | None, file_name: str | None) -> None:
diff --git a/supervision/annotators/utils.py b/supervision/annotators/utils.py
index a84e2049d4..511c68dee1 100644
--- a/supervision/annotators/utils.py
+++ b/supervision/annotators/utils.py
@@ -142,7 +142,11 @@ def resolve_color(
         detection_idx=detection_idx,
         color_lookup=color_lookup,
     )
-    if color_lookup == ColorLookup.TRACK and idx == PENDING_TRACK_ID:
+    if (
+        isinstance(color_lookup, ColorLookup)
+        and color_lookup == ColorLookup.TRACK
+        and idx == PENDING_TRACK_ID
+    ):
         return PENDING_TRACK_COLOR
     return get_color_by_index(color=color, idx=idx)
 
diff --git a/test/annotators/test_utils.py b/test/annotators/test_utils.py
index 2fdccec116..d6abc38183 100644
--- a/test/annotators/test_utils.py
+++ b/test/annotators/test_utils.py
@@ -97,7 +97,7 @@
 def test_resolve_color_idx(
     detections: Detections,
     detection_idx: int,
-    color_lookup: ColorLookup,
+    color_lookup: ColorLookup | np.ndarray,
     expected_result: int | None,
     exception: Exception,
 ) -> None:

From 96b58c2860630def1935e6c52cb4880833d82711 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 28 Jul 2025 00:07:27 +0000
Subject: [PATCH 021/124] :arrow_up: Bump astral-sh/setup-uv from 6.4.1 to
 6.4.3

Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 6.4.1 to 6.4.3.
- [Release notes](https://github.com/astral-sh/setup-uv/releases)
- [Commits](https://github.com/astral-sh/setup-uv/compare/7edac99f961f18b581bbd960d59d049f04c0002f...e92bafb6253dcd438e0484186d7669ea7a8ca1cc)

---
updated-dependencies:
- dependency-name: astral-sh/setup-uv
  dependency-version: 6.4.3
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/publish-docs.yml        | 2 +-
 .github/workflows/publish-pre-release.yml | 2 +-
 .github/workflows/publish-release.yml     | 2 +-
 .github/workflows/publish-testpypi.yml    | 2 +-
 .github/workflows/test-doc.yml            | 2 +-
 .github/workflows/uv-test.yml             | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml
index 57a2d07f9d..af0d6b1aba 100644
--- a/.github/workflows/publish-docs.yml
+++ b/.github/workflows/publish-docs.yml
@@ -34,7 +34,7 @@ jobs:
           fetch-depth: 0
 
       - name: 🐍 Install uv and set Python ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
+        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-pre-release.yml b/.github/workflows/publish-pre-release.yml
index be0a70f07e..7253538603 100644
--- a/.github/workflows/publish-pre-release.yml
+++ b/.github/workflows/publish-pre-release.yml
@@ -29,7 +29,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
+        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml
index 2ed0b22d2d..2b90a3e9ee 100644
--- a/.github/workflows/publish-release.yml
+++ b/.github/workflows/publish-release.yml
@@ -27,7 +27,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
+        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-testpypi.yml b/.github/workflows/publish-testpypi.yml
index 1d963431c8..c15a71f9b8 100644
--- a/.github/workflows/publish-testpypi.yml
+++ b/.github/workflows/publish-testpypi.yml
@@ -24,7 +24,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
+        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/test-doc.yml b/.github/workflows/test-doc.yml
index 6e069b10e9..54d9339713 100644
--- a/.github/workflows/test-doc.yml
+++ b/.github/workflows/test-doc.yml
@@ -24,7 +24,7 @@ jobs:
           fetch-depth: 0
 
       - name: 🐍 Install uv and set Python ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
+        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/uv-test.yml b/.github/workflows/uv-test.yml
index 42b673cfeb..8e6bb65968 100644
--- a/.github/workflows/uv-test.yml
+++ b/.github/workflows/uv-test.yml
@@ -19,7 +19,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
+        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true

From beaf89cb22f0aee82f71774fddb06beda7449293 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Mon, 28 Jul 2025 08:24:43 +0200
Subject: [PATCH 022/124] initial version of advanced key points filtering and
 slicing + some naming refactor

---
 docs/how_to/track_objects.md                  |   4 +-
 docs/keypoint/annotators.md                   |  12 +-
 docs/keypoint/core.md                         |   2 +-
 supervision/__init__.py                       |   4 +-
 .../{keypoint => key_points}/__init__.py      |   0
 .../{keypoint => key_points}/annotators.py    |   4 +-
 supervision/{keypoint => key_points}/core.py  | 192 +++++---
 .../{keypoint => key_points}/skeletons.py     |   0
 supervision/validators/__init__.py            |   6 +-
 test/key_points/__init__.py                   |   0
 test/key_points/test_core.py                  | 444 ++++++++++++++++++
 test/test_utils.py                            |  11 +-
 12 files changed, 595 insertions(+), 84 deletions(-)
 rename supervision/{keypoint => key_points}/__init__.py (100%)
 rename supervision/{keypoint => key_points}/annotators.py (99%)
 rename supervision/{keypoint => key_points}/core.py (81%)
 rename supervision/{keypoint => key_points}/skeletons.py (100%)
 create mode 100644 test/key_points/__init__.py
 create mode 100644 test/key_points/test_core.py

diff --git a/docs/how_to/track_objects.md b/docs/how_to/track_objects.md
index c2be3cef6a..9bf17e8651 100644
--- a/docs/how_to/track_objects.md
+++ b/docs/how_to/track_objects.md
@@ -345,7 +345,7 @@ Supervision is versatile and compatible with various models. Check this [link](/
 
 We will define a `callback` function, which will process each frame of the video by obtaining model predictions and then annotating the frame based on these predictions.
 
-Let's immediately visualize the results with our [`EdgeAnnotator`](/latest/keypoint/annotators/#supervision.keypoint.annotators.EdgeAnnotator) and [`VertexAnnotator`](https://supervision.roboflow.com/latest/keypoint/annotators/#supervision.keypoint.annotators.VertexAnnotator).
+Let's immediately visualize the results with our [`EdgeAnnotator`](/latest/keypoint/annotators/#supervision.key_points.annotators.EdgeAnnotator) and [`VertexAnnotator`](https://supervision.roboflow.com/latest/keypoint/annotators/#supervision.key_points.annotators.VertexAnnotator).
 
 === "Ultralytics"
 
@@ -408,7 +408,7 @@ Let's immediately visualize the results with our [`EdgeAnnotator`](/latest/keypo
 
 ### Convert to Detections
 
-Keypoint tracking is currently supported via the conversion of `KeyPoints` to `Detections`. This is achieved with the [`KeyPoints.as_detections()`](/latest/keypoint/core/#supervision.keypoint.core.KeyPoints.as_detections) function.
+Keypoint tracking is currently supported via the conversion of `KeyPoints` to `Detections`. This is achieved with the [`KeyPoints.as_detections()`](/latest/keypoint/core/#supervision.key_points.core.KeyPoints.as_detections) function.
 
 Let's convert to detections and visualize the results with our [`BoxAnnotator`](/latest/detection/annotators/#supervision.annotators.core.BoxAnnotator).
 
diff --git a/docs/keypoint/annotators.md b/docs/keypoint/annotators.md
index 32f30626bb..92c7cebaf5 100644
--- a/docs/keypoint/annotators.md
+++ b/docs/keypoint/annotators.md
@@ -78,19 +78,19 @@ comments: true
     </div>
 
 <div class="md-typeset">
-  <h2><a href="#supervision.keypoint.annotators.VertexAnnotator">VertexAnnotator</a></h2>
+  <h2><a href="#supervision.key_points.annotators.VertexAnnotator">VertexAnnotator</a></h2>
 </div>
 
-:::supervision.keypoint.annotators.VertexAnnotator
+:::supervision.key_points.annotators.VertexAnnotator
 
 <div class="md-typeset">
-  <h2><a href="#supervision.keypoint.annotators.EdgeAnnotator">EdgeAnnotator</a></h2>
+  <h2><a href="#supervision.key_points.annotators.EdgeAnnotator">EdgeAnnotator</a></h2>
 </div>
 
-:::supervision.keypoint.annotators.EdgeAnnotator
+:::supervision.key_points.annotators.EdgeAnnotator
 
 <div class="md-typeset">
-  <h2><a href="#supervision.keypoint.annotators.VertexLabelAnnotator">VertexLabelAnnotator</a></h2>
+  <h2><a href="#supervision.key_points.annotators.VertexLabelAnnotator">VertexLabelAnnotator</a></h2>
 </div>
 
-:::supervision.keypoint.annotators.VertexLabelAnnotator
+:::supervision.key_points.annotators.VertexLabelAnnotator
diff --git a/docs/keypoint/core.md b/docs/keypoint/core.md
index 7354babab0..acb13e156c 100644
--- a/docs/keypoint/core.md
+++ b/docs/keypoint/core.md
@@ -5,4 +5,4 @@ status: new
 
 # Keypoint Detection
 
-:::supervision.keypoint.core.KeyPoints
+:::supervision.key_points.core.KeyPoints
diff --git a/supervision/__init__.py b/supervision/__init__.py
index cc54fe7098..ab45651ac9 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -108,12 +108,12 @@
 )
 from supervision.geometry.core import Point, Position, Rect
 from supervision.geometry.utils import get_polygon_center
-from supervision.keypoint.annotators import (
+from supervision.key_points.annotators import (
     EdgeAnnotator,
     VertexAnnotator,
     VertexLabelAnnotator,
 )
-from supervision.keypoint.core import KeyPoints
+from supervision.key_points.core import KeyPoints
 from supervision.metrics.detection import ConfusionMatrix, MeanAveragePrecision
 from supervision.tracker.byte_tracker.core import ByteTrack
 from supervision.utils.conversion import cv2_to_pillow, pillow_to_cv2
diff --git a/supervision/keypoint/__init__.py b/supervision/key_points/__init__.py
similarity index 100%
rename from supervision/keypoint/__init__.py
rename to supervision/key_points/__init__.py
diff --git a/supervision/keypoint/annotators.py b/supervision/key_points/annotators.py
similarity index 99%
rename from supervision/keypoint/annotators.py
rename to supervision/key_points/annotators.py
index d83d7d5e0f..ab9f04d171 100644
--- a/supervision/keypoint/annotators.py
+++ b/supervision/key_points/annotators.py
@@ -11,8 +11,8 @@
 from supervision.draw.color import Color
 from supervision.draw.utils import draw_rounded_rectangle
 from supervision.geometry.core import Rect
-from supervision.keypoint.core import KeyPoints
-from supervision.keypoint.skeletons import SKELETONS_BY_VERTEX_COUNT
+from supervision.key_points.core import KeyPoints
+from supervision.key_points.skeletons import SKELETONS_BY_VERTEX_COUNT
 from supervision.utils.conversion import ensure_cv2_image_for_annotation
 
 
diff --git a/supervision/keypoint/core.py b/supervision/key_points/core.py
similarity index 81%
rename from supervision/keypoint/core.py
rename to supervision/key_points/core.py
index 0d57c56183..fb37e6289d 100644
--- a/supervision/keypoint/core.py
+++ b/supervision/key_points/core.py
@@ -10,7 +10,7 @@
 from supervision.config import CLASS_NAME_DATA_FIELD
 from supervision.detection.core import Detections
 from supervision.detection.utils.internal import get_data_item, is_data_equal
-from supervision.validators import validate_keypoints_fields
+from supervision.validators import validate_key_points_fields
 
 
 @dataclass
@@ -23,7 +23,7 @@ class simplifies data manipulation and filtering, providing a uniform API for
 
     === "Ultralytics"
 
-        Use [`sv.KeyPoints.from_ultralytics`](/latest/keypoint/core/#supervision.keypoint.core.KeyPoints.from_ultralytics)
+        Use [`sv.KeyPoints.from_ultralytics`](/latest/keypoint/core/#supervision.key_points.core.KeyPoints.from_ultralytics)
         method, which accepts [YOLOv8-pose](https://docs.ultralytics.com/models/yolov8/), [YOLO11-pose](https://docs.ultralytics.com/models/yolo11/)
         [pose](https://docs.ultralytics.com/tasks/pose/) result.
 
@@ -41,7 +41,7 @@ class simplifies data manipulation and filtering, providing a uniform API for
 
     === "Inference"
 
-        Use [`sv.KeyPoints.from_inference`](/latest/keypoint/core/#supervision.keypoint.core.KeyPoints.from_inference)
+        Use [`sv.KeyPoints.from_inference`](/latest/keypoint/core/#supervision.key_points.core.KeyPoints.from_inference)
         method, which accepts [Inference](https://inference.roboflow.com/) pose result.
 
         ```python
@@ -58,7 +58,7 @@ class simplifies data manipulation and filtering, providing a uniform API for
 
     === "MediaPipe"
 
-        Use [`sv.KeyPoints.from_mediapipe`](/latest/keypoint/core/#supervision.keypoint.core.KeyPoints.from_mediapipe)
+        Use [`sv.KeyPoints.from_mediapipe`](/latest/keypoint/core/#supervision.key_points.core.KeyPoints.from_mediapipe)
         method, which accepts [MediaPipe](https://github.com/google-ai-edge/mediapipe)
         pose result.
 
@@ -88,11 +88,62 @@ class simplifies data manipulation and filtering, providing a uniform API for
         key_points = sv.KeyPoints.from_mediapipe(
             pose_landmarker_result, (image_width, image_height))
         ```
+        
+    === "Transformers"
+    
+        Use [`sv.KeyPoints.from_transformers`](/latest/keypoint/core/#supervision.key_points.core.KeyPoints.from_transformers)
+        method, which accepts [ViTPose](https://huggingface.co/docs/transformers/en/model_doc/vitpose) result.
+    
+        ```python
+        from PIL import Image
+        import requests
+        import supervision as sv
+        import torch
+        from transformers import (
+            AutoProcessor,
+            RTDetrForObjectDetection,
+            VitPoseForPoseEstimation,
+        )
+
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        image = Image.open(<SOURCE_IMAGE_PATH>)
+
+        DETECTION_MODEL_ID = "PekingU/rtdetr_r50vd_coco_o365"
+
+        detection_processor = AutoProcessor.from_pretrained(DETECTION_MODEL_ID, use_fast=True)
+        detection_model = RTDetrForObjectDetection.from_pretrained(DETECTION_MODEL_ID, device_map=DEVICE)
+
+        inputs = detection_processor(images=frame, return_tensors="pt").to(DEVICE)
+
+        with torch.no_grad():
+            outputs = detection_model(**inputs)
+
+        target_size = torch.tensor([(frame.height, frame.width)])
+        results = detection_processor.post_process_object_detection(
+            outputs, target_sizes=target_size, threshold=0.3)
+
+        detections = sv.Detections.from_transformers(results[0])
+        boxes = sv.xyxy_to_xywh(detections[detections.class_id == 0].xyxy)
+
+        POSE_ESTIMATION_MODEL_ID = "usyd-community/vitpose-base-simple"
+
+        pose_estimation_processor = AutoProcessor.from_pretrained(POSE_ESTIMATION_MODEL_ID)
+        pose_estimation_model = VitPoseForPoseEstimation.from_pretrained(
+            POSE_ESTIMATION_MODEL_ID, device_map=DEVICE)
+
+        inputs = pose_estimation_processor(frame, boxes=[boxes], return_tensors="pt").to(DEVICE)
+
+        with torch.no_grad():
+            outputs = pose_estimation_model(**inputs)
+
+        results = pose_estimation_processor.post_process_pose_estimation(outputs, boxes=[boxes])
+        key_point = sv.KeyPoints.from_transformers(results[0])
+        ```
 
     Attributes:
         xy (np.ndarray): An array of shape `(n, m, 2)` containing
             `n` detected objects, each composed of `m` equally-sized
-            sets of keypoints, where each point is `[x, y]`.
+            sets of key points, where each point is `[x, y]`.
         class_id (Optional[np.ndarray]): An array of shape
             `(n,)` containing the class ids of the detected objects.
         confidence (Optional[np.ndarray]): An array of shape
@@ -109,7 +160,7 @@ class simplifies data manipulation and filtering, providing a uniform API for
     data: dict[str, npt.NDArray[Any] | list] = field(default_factory=dict)
 
     def __post_init__(self):
-        validate_keypoints_fields(
+        validate_key_points_fields(
             xy=self.xy,
             confidence=self.confidence,
             class_id=self.class_id,
@@ -514,13 +565,13 @@ def from_detectron2(cls, detectron2_results: Any) -> KeyPoints:
             return cls.empty()
 
     @classmethod
-    def from_transformers(cls, transfomers_results: Any) -> KeyPoints:
+    def from_transformers(cls, transformers_results: Any) -> KeyPoints:
         """
         Create a `sv.KeyPoints` object from the
         [Transformers](https://github.com/huggingface/transformers) inference result.
 
         Args:
-            transfomers_results (Any): The output of a
+            transformers_results (Any): The output of a
                 Transformers model containing instances with prediction data.
 
         Returns:
@@ -576,8 +627,8 @@ def from_transformers(cls, transfomers_results: Any) -> KeyPoints:
 
         """  # noqa: E501 // docs
 
-        if "keypoints" in transfomers_results[0]:
-            if transfomers_results[0]["keypoints"].cpu().numpy().size == 0:
+        if "keypoints" in transformers_results[0]:
+            if transformers_results[0]["keypoints"].cpu().numpy().size == 0:
                 return cls.empty()
 
             result_data = [
@@ -585,7 +636,7 @@ def from_transformers(cls, transfomers_results: Any) -> KeyPoints:
                     result["keypoints"].cpu().numpy(),
                     result["scores"].cpu().numpy(),
                 )
-                for result in transfomers_results
+                for result in transformers_results
             ]
 
             xy, scores = zip(*result_data)
@@ -599,55 +650,72 @@ def from_transformers(cls, transfomers_results: Any) -> KeyPoints:
             return cls.empty()
 
     def __getitem__(
-        self, index: int | slice | list[int] | np.ndarray | str
-    ) -> KeyPoints | list | np.ndarray | None:
-        """
-        Get a subset of the `sv.KeyPoints` object or access an item from its data field.
+            self, index: int | slice | list[int] | np.ndarray | tuple | str
+    ) -> KeyPoints | np.ndarray | list | None:
+        if isinstance(index, str):
+            return self.data.get(index)
 
-        When provided with an integer, slice, list of integers, or a numpy array, this
-        method returns a new `sv.KeyPoints` object that represents a subset of the
-        original `sv.KeyPoints`. When provided with a string, it accesses the
-        corresponding item in the data dictionary.
+        if not isinstance(index, tuple):
+            index = (index, slice(None))
 
-        Args:
-            index (Union[int, slice, List[int], np.ndarray, str]): The index, indices,
-                or key to access a subset of the `sv.KeyPoints` or an item from the
-                data.
+        i, j = index
 
-        Returns:
-            A subset of the `sv.KeyPoints` object or an item from the data field.
+        if isinstance(i, int):
+            i = [i]
 
-        Examples:
-            ```python
-            import supervision as sv
+        if isinstance(i, list) and all(isinstance(x, bool) for x in i):
+            i = np.array(i)
+        if isinstance(j, list) and all(isinstance(x, bool) for x in j):
+            j = np.array(j)
 
-            key_points = sv.KeyPoints()
+        if isinstance(i, np.ndarray) and i.dtype == bool:
+            i = np.flatnonzero(i)
+        if isinstance(j, np.ndarray) and j.dtype == bool:
+            j = np.flatnonzero(j)
 
-            # access the first keypoint using an integer index
-            key_points[0]
+        if (
+                isinstance(i, (list, np.ndarray))
+                and isinstance(j, (list, np.ndarray))
+                and not np.isscalar(i)
+                and not np.isscalar(j)
+        ):
+            i, j = np.ix_(i, j)
 
-            # access the first 10 keypoints using index slice
-            key_points[0:10]
+        xy_selected = self.xy[i, j]
 
-            # access selected keypoints using a list of indices
-            key_points[[0, 2, 4]]
+        conf_selected = (
+            self.confidence[i, j] if self.confidence is not None else None
+        )
 
-            # access keypoints with selected class_id
-            key_points[key_points.class_id == 0]
+        class_id_selected = (
+            self.class_id[i] if self.class_id is not None else None
+        )
+
+        data_selected = get_data_item(self.data, i)
+
+        if xy_selected.ndim == 1:
+            xy_selected = xy_selected.reshape(1, 1, 2)
+            if conf_selected is not None:
+                conf_selected = conf_selected.reshape(1, 1)
+        elif xy_selected.ndim == 2:
+            if np.isscalar(index[0]) or (
+                    isinstance(index[0], np.ndarray) and index[0].ndim == 0
+            ):
+                xy_selected = xy_selected[np.newaxis, ...]
+                if conf_selected is not None:
+                    conf_selected = conf_selected[np.newaxis, ...]
+            elif np.isscalar(index[1]) or (
+                    isinstance(index[1], np.ndarray) and index[1].ndim == 0
+            ):
+                xy_selected = xy_selected[:, np.newaxis, :]
+                if conf_selected is not None:
+                    conf_selected = conf_selected[:, np.newaxis]
 
-            # access keypoints with confidence greater than 0.5
-            key_points[key_points.confidence > 0.5]
-            ```
-        """
-        if isinstance(index, str):
-            return self.data.get(index)
-        if isinstance(index, int):
-            index = [index]
         return KeyPoints(
-            xy=self.xy[index],
-            confidence=self.confidence[index] if self.confidence is not None else None,
-            class_id=self.class_id[index] if self.class_id is not None else None,
-            data=get_data_item(self.data, index),
+            xy=xy_selected,
+            confidence=conf_selected,
+            class_id=class_id_selected,
+            data=data_selected,
         )
 
     def __setitem__(self, key: str, value: np.ndarray | list):
@@ -668,12 +736,12 @@ def __setitem__(self, key: str, value: np.ndarray | list):
             model = YOLO('yolov8s.pt')
 
             result = model(image)[0]
-            keypoints = sv.KeyPoints.from_ultralytics(result)
+            key_points = sv.KeyPoints.from_ultralytics(result)
 
-            keypoints['class_name'] = [
+            key_points['class_name'] = [
                  model.model.names[class_id]
                  for class_id
-                 in keypoints.class_id
+                 in key_points.class_id
              ]
             ```
         """
@@ -688,7 +756,7 @@ def __setitem__(self, key: str, value: np.ndarray | list):
     @classmethod
     def empty(cls) -> KeyPoints:
         """
-        Create an empty Keypoints object with no keypoints.
+        Create an empty KeyPoints object with no key points.
 
         Returns:
             An empty `sv.KeyPoints` object.
@@ -706,9 +774,9 @@ def is_empty(self) -> bool:
         """
         Returns `True` if the `KeyPoints` object is considered empty.
         """
-        empty_keypoints = KeyPoints.empty()
-        empty_keypoints.data = self.data
-        return self == empty_keypoints
+        empty_key_points = KeyPoints.empty()
+        empty_key_points.data = self.data
+        return self == empty_key_points
 
     def as_detections(
         self, selected_keypoint_indices: Iterable[int] | None = None
@@ -716,21 +784,21 @@ def as_detections(
         """
         Convert a KeyPoints object to a Detections object. This
         approximates the bounding box of the detected object by
-        taking the bounding box that fits all keypoints.
+        taking the bounding box that fits all key points.
 
         Arguments:
             selected_keypoint_indices (Optional[Iterable[int]]): The
-                indices of the keypoints to include in the bounding box
-                calculation. This helps focus on a subset of keypoints,
-                e.g. when some are occluded. Captures all keypoints by default.
+                indices of the key points to include in the bounding box
+                calculation. This helps focus on a subset of key points,
+                e.g. when some are occluded. Captures all key points by default.
 
         Returns:
             detections (Detections): The converted detections object.
 
         Examples:
             ```python
-            keypoints = sv.KeyPoints.from_inference(...)
-            detections = keypoints.as_detections()
+            key_points = sv.KeyPoints.from_inference(...)
+            detections = key_points.as_detections()
             ```
         """
         if self.is_empty():
diff --git a/supervision/keypoint/skeletons.py b/supervision/key_points/skeletons.py
similarity index 100%
rename from supervision/keypoint/skeletons.py
rename to supervision/key_points/skeletons.py
diff --git a/supervision/validators/__init__.py b/supervision/validators/__init__.py
index 97fedabdd9..f051d89d7a 100644
--- a/supervision/validators/__init__.py
+++ b/supervision/validators/__init__.py
@@ -53,7 +53,7 @@ def validate_confidence(confidence: Any, n: int) -> None:
         )
 
 
-def validate_keypoint_confidence(confidence: Any, n: int, m: int) -> None:
+def validate_key_point_confidence(confidence: Any, n: int, m: int) -> None:
     expected_shape = f"({n, m})"
     actual_shape = str(getattr(confidence, "shape", None))
 
@@ -126,7 +126,7 @@ def validate_detections_fields(
     validate_data(data, n)
 
 
-def validate_keypoints_fields(
+def validate_key_points_fields(
     xy: Any,
     class_id: Any,
     confidence: Any,
@@ -136,7 +136,7 @@ def validate_keypoints_fields(
     m = len(xy[0]) if len(xy) > 0 else 0
     validate_xy(xy, n, m)
     validate_class_id(class_id, n)
-    validate_keypoint_confidence(confidence, n, m)
+    validate_key_point_confidence(confidence, n, m)
     validate_data(data, n)
 
 
diff --git a/test/key_points/__init__.py b/test/key_points/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/key_points/test_core.py b/test/key_points/test_core.py
new file mode 100644
index 0000000000..f12cc2fdbb
--- /dev/null
+++ b/test/key_points/test_core.py
@@ -0,0 +1,444 @@
+import numpy as np
+import pytest
+from contextlib import nullcontext as DoesNotRaise
+from supervision.key_points.core import KeyPoints
+from test.test_utils import mock_key_points
+
+KEY_POINTS = mock_key_points(
+    xy=[
+        [[0, 1],
+         [2, 3],
+         [4, 5],
+         [6, 7],
+         [8, 9]],
+
+        [[10, 11],
+         [12, 13],
+         [14, 15],
+         [16, 17],
+         [18, 19]],
+
+        [[20, 21],
+         [22, 23],
+         [24, 25],
+         [26, 27],
+         [28, 29]]
+    ],
+    confidence=[
+        [0.8, 0.2, 0.6, 0.1, 0.5],
+        [0.7, 0.9, 0.3, 0.4, 0.0],
+        [0.1, 0.6, 0.8, 0.2, 0.7]
+    ],
+    class_id=[0, 1, 2],
+)
+
+
+@pytest.mark.parametrize(
+    "key_points, index, expected_result, exception",
+    [
+        (
+            KeyPoints.empty(),
+            slice(None),
+            KeyPoints.empty(),
+            DoesNotRaise(),
+        ),  # slice all key points when key points object empty
+        (
+            KEY_POINTS,
+            slice(None),
+            KEY_POINTS,
+            DoesNotRaise(),
+        ),  # slice all key points when key points object nonempty
+        (
+            KEY_POINTS,
+            slice(0, 1),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3],
+                     [4, 5],
+                     [6, 7],
+                     [8, 9]]
+                ],
+                confidence=[
+                    [0.8, 0.2, 0.6, 0.1, 0.5]
+                ],
+                class_id=[0],
+            ),
+            DoesNotRaise(),
+        ),  # select the first skeleton by slice
+        (
+            KEY_POINTS,
+            slice(0, 2),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3],
+                     [4, 5],
+                     [6, 7],
+                     [8, 9]],
+
+                    [[10, 11],
+                     [12, 13],
+                     [14, 15],
+                     [16, 17],
+                     [18, 19]],
+                ],
+                confidence=[
+                    [0.8, 0.2, 0.6, 0.1, 0.5],
+                    [0.7, 0.9, 0.3, 0.4, 0.0],
+                ],
+                class_id=[0, 1],
+            ),
+            DoesNotRaise(),
+        ),  # select the first skeleton by slice
+        (
+            KEY_POINTS,
+            0,
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3],
+                     [4, 5],
+                     [6, 7],
+                     [8, 9]]
+                ],
+                confidence=[
+                    [0.8, 0.2, 0.6, 0.1, 0.5]
+                ],
+                class_id=[0],
+            ),
+            DoesNotRaise(),
+        ),  # select the first skeleton by index
+        (
+            KEY_POINTS,
+            -1,
+            mock_key_points(
+                xy=[
+                    [[20, 21],
+                     [22, 23],
+                     [24, 25],
+                     [26, 27],
+                     [28, 29]]
+                ],
+                confidence=[
+                    [0.1, 0.6, 0.8, 0.2, 0.7]
+                ],
+                class_id=[2],
+            ),
+            DoesNotRaise(),
+        ),  # select the last skeleton by index
+        (
+            KEY_POINTS,
+            [0, 1],
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3],
+                     [4, 5],
+                     [6, 7],
+                     [8, 9]],
+
+                    [[10, 11],
+                     [12, 13],
+                     [14, 15],
+                     [16, 17],
+                     [18, 19]],
+                ],
+                confidence=[
+                    [0.8, 0.2, 0.6, 0.1, 0.5],
+                    [0.7, 0.9, 0.3, 0.4, 0.0],
+                ],
+                class_id=[0, 1],
+            ),
+            DoesNotRaise(),
+        ),  # select the first two skeletons by index; list
+        (
+            KEY_POINTS,
+            np.array([0, 1]),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3],
+                     [4, 5],
+                     [6, 7],
+                     [8, 9]],
+
+                    [[10, 11],
+                     [12, 13],
+                     [14, 15],
+                     [16, 17],
+                     [18, 19]],
+                ],
+                confidence=[
+                    [0.8, 0.2, 0.6, 0.1, 0.5],
+                    [0.7, 0.9, 0.3, 0.4, 0.0],
+                ],
+                class_id=[0, 1],
+            ),
+            DoesNotRaise(),
+        ),  # select the first two skeletons by index; np.array
+        (
+            KEY_POINTS,
+            [True, True, False],
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3],
+                     [4, 5],
+                     [6, 7],
+                     [8, 9]],
+
+                    [[10, 11],
+                     [12, 13],
+                     [14, 15],
+                     [16, 17],
+                     [18, 19]],
+                ],
+                confidence=[
+                    [0.8, 0.2, 0.6, 0.1, 0.5],
+                    [0.7, 0.9, 0.3, 0.4, 0.0],
+                ],
+                class_id=[0, 1],
+            ),
+            DoesNotRaise(),
+        ),  # select only skeletons associated with positive filter; list
+        (
+            KEY_POINTS,
+            np.array([True, True, False]),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3],
+                     [4, 5],
+                     [6, 7],
+                     [8, 9]],
+
+                    [[10, 11],
+                     [12, 13],
+                     [14, 15],
+                     [16, 17],
+                     [18, 19]],
+                ],
+                confidence=[
+                    [0.8, 0.2, 0.6, 0.1, 0.5],
+                    [0.7, 0.9, 0.3, 0.4, 0.0],
+                ],
+                class_id=[0, 1],
+            ),
+            DoesNotRaise(),
+        ),  # select only skeletons associated with positive filter; list
+        (
+            KEY_POINTS,
+            (slice(None), slice(None)),
+            KEY_POINTS,
+            DoesNotRaise(),
+        ),  # slice all anchors from all skeletons
+        (
+            KEY_POINTS,
+            (slice(None), slice(0, 1)),
+            mock_key_points(
+                xy=[
+                    [[0, 1]],
+
+                    [[10, 11]],
+
+                    [[20, 21]]
+                ],
+                confidence=[
+                    [0.8],
+                    [0.7],
+                    [0.1]
+                ],
+                class_id=[0, 1, 2],
+            ),
+            DoesNotRaise(),
+        ),  # slice the first anchor from every skeleton
+        (
+            KEY_POINTS,
+            (slice(None), slice(0, 2)),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3]],
+
+                    [[10, 11],
+                     [12, 13]],
+
+                    [[20, 21],
+                     [22, 23]]
+                ],
+                confidence=[
+                    [0.8, 0.2],
+                    [0.7, 0.9],
+                    [0.1, 0.6]
+                ],
+                class_id=[0, 1, 2],
+            ),
+            DoesNotRaise(),
+        ),  # slice the first anchor two anchors from every skeleton
+        (
+            KEY_POINTS,
+            (slice(None), 0),
+            mock_key_points(
+                xy=[
+                    [[0, 1]],
+
+                    [[10, 11]],
+
+                    [[20, 21]]
+                ],
+                confidence=[
+                    [0.8],
+                    [0.7],
+                    [0.1]
+                ],
+                class_id=[0, 1, 2],
+            ),
+            DoesNotRaise(),
+        ),  # select the first anchor from every skeleton by index
+        (
+            KEY_POINTS,
+            (slice(None), -1),
+            mock_key_points(
+                xy=[
+                    [[8, 9]],
+
+                    [[18, 19]],
+
+                    [[28, 29]]
+                ],
+                confidence=[
+                    [0.5],
+                    [0.0],
+                    [0.7]
+                ],
+                class_id=[0, 1, 2],
+            ),
+            DoesNotRaise(),
+        ),  # select the last anchor from every skeleton by index
+        (
+            KEY_POINTS,
+            (slice(None), [0, 1]),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3]],
+
+                    [[10, 11],
+                     [12, 13]],
+
+                    [[20, 21],
+                     [22, 23]]
+                ],
+                confidence=[
+                    [0.8, 0.2],
+                    [0.7, 0.9],
+                    [0.1, 0.6]
+                ],
+                class_id=[0, 1, 2],
+            ),
+            DoesNotRaise(),
+        ),  # select the first two anchors from every skeleton by index; list
+        (
+            KEY_POINTS,
+            (slice(None), np.array([0, 1])),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3]],
+
+                    [[10, 11],
+                     [12, 13]],
+
+                    [[20, 21],
+                     [22, 23]]
+                ],
+                confidence=[
+                    [0.8, 0.2],
+                    [0.7, 0.9],
+                    [0.1, 0.6]
+                ],
+                class_id=[0, 1, 2],
+            ),
+            DoesNotRaise(),
+        ),  # select the first two anchors from every skeleton by index; np.array
+        (
+            KEY_POINTS,
+            (slice(None), [True, True, False, False, False]),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3]],
+
+                    [[10, 11],
+                     [12, 13]],
+
+                    [[20, 21],
+                     [22, 23]]
+                ],
+                confidence=[
+                    [0.8, 0.2],
+                    [0.7, 0.9],
+                    [0.1, 0.6]
+                ],
+                class_id=[0, 1, 2],
+            ),
+            DoesNotRaise(),
+        ),  # select only anchors associated with positive filter; list
+        (
+            KEY_POINTS,
+            (slice(None), np.array([True, True, False, False, False])),
+            mock_key_points(
+                xy=[
+                    [[0, 1],
+                     [2, 3]],
+
+                    [[10, 11],
+                     [12, 13]],
+
+                    [[20, 21],
+                     [22, 23]]
+                ],
+                confidence=[
+                    [0.8, 0.2],
+                    [0.7, 0.9],
+                    [0.1, 0.6]
+                ],
+                class_id=[0, 1, 2],
+            ),
+            DoesNotRaise(),
+        ),  # select only anchors associated with positive filter; np.array
+        (
+            KEY_POINTS,
+            (0, 0),
+            mock_key_points(
+                xy=[
+                    [[0, 1]],
+                ],
+                confidence=[
+                    [0.8],
+                ],
+                class_id=[0],
+            ),
+            DoesNotRaise(),
+        ),  # select the first anchor from the first skeleton by index
+(
+            KEY_POINTS,
+            (0, -1),
+            mock_key_points(
+                xy=[
+                    [[8, 9]],
+                ],
+                confidence=[
+                    [0.5],
+                ],
+                class_id=[0],
+            ),
+            DoesNotRaise(),
+        ),  # select the last anchor from the first skeleton by index
+    ],
+)
+def test_key_points_getitem(key_points, index, expected_result, exception):
+    with exception:
+        result = key_points[index]
+        assert result == expected_result
diff --git a/test/test_utils.py b/test/test_utils.py
index 19fffad5bb..0a97bf4bff 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -3,14 +3,13 @@
 from typing import Any
 
 import numpy as np
-import numpy.typing as npt
 
 from supervision.detection.core import Detections
-from supervision.keypoint.core import KeyPoints
+from supervision.key_points.core import KeyPoints
 
 
 def mock_detections(
-    xyxy: npt.NDArray[np.float32],
+    xyxy: list[list[float]],
     mask: list[np.ndarray] | None = None,
     confidence: list[float] | None = None,
     class_id: list[int] | None = None,
@@ -34,9 +33,9 @@ def convert_data(data: dict[str, list[Any]]):
     )
 
 
-def mock_keypoints(
-    xy: npt.NDArray[np.float32],
-    confidence: list[float] | None = None,
+def mock_key_points(
+    xy: list[list[list[float]]],
+    confidence: list[list[float]] | None = None,
     class_id: list[int] | None = None,
     data: dict[str, list[Any]] | None = None,
 ) -> KeyPoints:

From f4561f9ba7dcbd1c8e373cf2170c3b7edb6a0437 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 28 Jul 2025 06:29:02 +0000
Subject: [PATCH 023/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/key_points/core.py |  28 ++--
 test/key_points/test_core.py   | 256 ++++++---------------------------
 2 files changed, 52 insertions(+), 232 deletions(-)

diff --git a/supervision/key_points/core.py b/supervision/key_points/core.py
index fb37e6289d..d8214170bf 100644
--- a/supervision/key_points/core.py
+++ b/supervision/key_points/core.py
@@ -88,12 +88,12 @@ class simplifies data manipulation and filtering, providing a uniform API for
         key_points = sv.KeyPoints.from_mediapipe(
             pose_landmarker_result, (image_width, image_height))
         ```
-        
+
     === "Transformers"
-    
+
         Use [`sv.KeyPoints.from_transformers`](/latest/keypoint/core/#supervision.key_points.core.KeyPoints.from_transformers)
         method, which accepts [ViTPose](https://huggingface.co/docs/transformers/en/model_doc/vitpose) result.
-    
+
         ```python
         from PIL import Image
         import requests
@@ -650,7 +650,7 @@ def from_transformers(cls, transformers_results: Any) -> KeyPoints:
             return cls.empty()
 
     def __getitem__(
-            self, index: int | slice | list[int] | np.ndarray | tuple | str
+        self, index: int | slice | list[int] | np.ndarray | tuple | str
     ) -> KeyPoints | np.ndarray | list | None:
         if isinstance(index, str):
             return self.data.get(index)
@@ -674,22 +674,18 @@ def __getitem__(
             j = np.flatnonzero(j)
 
         if (
-                isinstance(i, (list, np.ndarray))
-                and isinstance(j, (list, np.ndarray))
-                and not np.isscalar(i)
-                and not np.isscalar(j)
+            isinstance(i, (list, np.ndarray))
+            and isinstance(j, (list, np.ndarray))
+            and not np.isscalar(i)
+            and not np.isscalar(j)
         ):
             i, j = np.ix_(i, j)
 
         xy_selected = self.xy[i, j]
 
-        conf_selected = (
-            self.confidence[i, j] if self.confidence is not None else None
-        )
+        conf_selected = self.confidence[i, j] if self.confidence is not None else None
 
-        class_id_selected = (
-            self.class_id[i] if self.class_id is not None else None
-        )
+        class_id_selected = self.class_id[i] if self.class_id is not None else None
 
         data_selected = get_data_item(self.data, i)
 
@@ -699,13 +695,13 @@ def __getitem__(
                 conf_selected = conf_selected.reshape(1, 1)
         elif xy_selected.ndim == 2:
             if np.isscalar(index[0]) or (
-                    isinstance(index[0], np.ndarray) and index[0].ndim == 0
+                isinstance(index[0], np.ndarray) and index[0].ndim == 0
             ):
                 xy_selected = xy_selected[np.newaxis, ...]
                 if conf_selected is not None:
                     conf_selected = conf_selected[np.newaxis, ...]
             elif np.isscalar(index[1]) or (
-                    isinstance(index[1], np.ndarray) and index[1].ndim == 0
+                isinstance(index[1], np.ndarray) and index[1].ndim == 0
             ):
                 xy_selected = xy_selected[:, np.newaxis, :]
                 if conf_selected is not None:
diff --git a/test/key_points/test_core.py b/test/key_points/test_core.py
index f12cc2fdbb..5a8244cd87 100644
--- a/test/key_points/test_core.py
+++ b/test/key_points/test_core.py
@@ -1,33 +1,21 @@
+from contextlib import nullcontext as DoesNotRaise
+
 import numpy as np
 import pytest
-from contextlib import nullcontext as DoesNotRaise
+
 from supervision.key_points.core import KeyPoints
 from test.test_utils import mock_key_points
 
 KEY_POINTS = mock_key_points(
     xy=[
-        [[0, 1],
-         [2, 3],
-         [4, 5],
-         [6, 7],
-         [8, 9]],
-
-        [[10, 11],
-         [12, 13],
-         [14, 15],
-         [16, 17],
-         [18, 19]],
-
-        [[20, 21],
-         [22, 23],
-         [24, 25],
-         [26, 27],
-         [28, 29]]
+        [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]],
+        [[10, 11], [12, 13], [14, 15], [16, 17], [18, 19]],
+        [[20, 21], [22, 23], [24, 25], [26, 27], [28, 29]],
     ],
     confidence=[
         [0.8, 0.2, 0.6, 0.1, 0.5],
         [0.7, 0.9, 0.3, 0.4, 0.0],
-        [0.1, 0.6, 0.8, 0.2, 0.7]
+        [0.1, 0.6, 0.8, 0.2, 0.7],
     ],
     class_id=[0, 1, 2],
 )
@@ -52,16 +40,8 @@
             KEY_POINTS,
             slice(0, 1),
             mock_key_points(
-                xy=[
-                    [[0, 1],
-                     [2, 3],
-                     [4, 5],
-                     [6, 7],
-                     [8, 9]]
-                ],
-                confidence=[
-                    [0.8, 0.2, 0.6, 0.1, 0.5]
-                ],
+                xy=[[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]],
+                confidence=[[0.8, 0.2, 0.6, 0.1, 0.5]],
                 class_id=[0],
             ),
             DoesNotRaise(),
@@ -71,17 +51,8 @@
             slice(0, 2),
             mock_key_points(
                 xy=[
-                    [[0, 1],
-                     [2, 3],
-                     [4, 5],
-                     [6, 7],
-                     [8, 9]],
-
-                    [[10, 11],
-                     [12, 13],
-                     [14, 15],
-                     [16, 17],
-                     [18, 19]],
+                    [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]],
+                    [[10, 11], [12, 13], [14, 15], [16, 17], [18, 19]],
                 ],
                 confidence=[
                     [0.8, 0.2, 0.6, 0.1, 0.5],
@@ -95,16 +66,8 @@
             KEY_POINTS,
             0,
             mock_key_points(
-                xy=[
-                    [[0, 1],
-                     [2, 3],
-                     [4, 5],
-                     [6, 7],
-                     [8, 9]]
-                ],
-                confidence=[
-                    [0.8, 0.2, 0.6, 0.1, 0.5]
-                ],
+                xy=[[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]],
+                confidence=[[0.8, 0.2, 0.6, 0.1, 0.5]],
                 class_id=[0],
             ),
             DoesNotRaise(),
@@ -113,16 +76,8 @@
             KEY_POINTS,
             -1,
             mock_key_points(
-                xy=[
-                    [[20, 21],
-                     [22, 23],
-                     [24, 25],
-                     [26, 27],
-                     [28, 29]]
-                ],
-                confidence=[
-                    [0.1, 0.6, 0.8, 0.2, 0.7]
-                ],
+                xy=[[[20, 21], [22, 23], [24, 25], [26, 27], [28, 29]]],
+                confidence=[[0.1, 0.6, 0.8, 0.2, 0.7]],
                 class_id=[2],
             ),
             DoesNotRaise(),
@@ -132,17 +87,8 @@
             [0, 1],
             mock_key_points(
                 xy=[
-                    [[0, 1],
-                     [2, 3],
-                     [4, 5],
-                     [6, 7],
-                     [8, 9]],
-
-                    [[10, 11],
-                     [12, 13],
-                     [14, 15],
-                     [16, 17],
-                     [18, 19]],
+                    [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]],
+                    [[10, 11], [12, 13], [14, 15], [16, 17], [18, 19]],
                 ],
                 confidence=[
                     [0.8, 0.2, 0.6, 0.1, 0.5],
@@ -157,17 +103,8 @@
             np.array([0, 1]),
             mock_key_points(
                 xy=[
-                    [[0, 1],
-                     [2, 3],
-                     [4, 5],
-                     [6, 7],
-                     [8, 9]],
-
-                    [[10, 11],
-                     [12, 13],
-                     [14, 15],
-                     [16, 17],
-                     [18, 19]],
+                    [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]],
+                    [[10, 11], [12, 13], [14, 15], [16, 17], [18, 19]],
                 ],
                 confidence=[
                     [0.8, 0.2, 0.6, 0.1, 0.5],
@@ -182,17 +119,8 @@
             [True, True, False],
             mock_key_points(
                 xy=[
-                    [[0, 1],
-                     [2, 3],
-                     [4, 5],
-                     [6, 7],
-                     [8, 9]],
-
-                    [[10, 11],
-                     [12, 13],
-                     [14, 15],
-                     [16, 17],
-                     [18, 19]],
+                    [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]],
+                    [[10, 11], [12, 13], [14, 15], [16, 17], [18, 19]],
                 ],
                 confidence=[
                     [0.8, 0.2, 0.6, 0.1, 0.5],
@@ -207,17 +135,8 @@
             np.array([True, True, False]),
             mock_key_points(
                 xy=[
-                    [[0, 1],
-                     [2, 3],
-                     [4, 5],
-                     [6, 7],
-                     [8, 9]],
-
-                    [[10, 11],
-                     [12, 13],
-                     [14, 15],
-                     [16, 17],
-                     [18, 19]],
+                    [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]],
+                    [[10, 11], [12, 13], [14, 15], [16, 17], [18, 19]],
                 ],
                 confidence=[
                     [0.8, 0.2, 0.6, 0.1, 0.5],
@@ -237,18 +156,8 @@
             KEY_POINTS,
             (slice(None), slice(0, 1)),
             mock_key_points(
-                xy=[
-                    [[0, 1]],
-
-                    [[10, 11]],
-
-                    [[20, 21]]
-                ],
-                confidence=[
-                    [0.8],
-                    [0.7],
-                    [0.1]
-                ],
+                xy=[[[0, 1]], [[10, 11]], [[20, 21]]],
+                confidence=[[0.8], [0.7], [0.1]],
                 class_id=[0, 1, 2],
             ),
             DoesNotRaise(),
@@ -257,21 +166,8 @@
             KEY_POINTS,
             (slice(None), slice(0, 2)),
             mock_key_points(
-                xy=[
-                    [[0, 1],
-                     [2, 3]],
-
-                    [[10, 11],
-                     [12, 13]],
-
-                    [[20, 21],
-                     [22, 23]]
-                ],
-                confidence=[
-                    [0.8, 0.2],
-                    [0.7, 0.9],
-                    [0.1, 0.6]
-                ],
+                xy=[[[0, 1], [2, 3]], [[10, 11], [12, 13]], [[20, 21], [22, 23]]],
+                confidence=[[0.8, 0.2], [0.7, 0.9], [0.1, 0.6]],
                 class_id=[0, 1, 2],
             ),
             DoesNotRaise(),
@@ -280,18 +176,8 @@
             KEY_POINTS,
             (slice(None), 0),
             mock_key_points(
-                xy=[
-                    [[0, 1]],
-
-                    [[10, 11]],
-
-                    [[20, 21]]
-                ],
-                confidence=[
-                    [0.8],
-                    [0.7],
-                    [0.1]
-                ],
+                xy=[[[0, 1]], [[10, 11]], [[20, 21]]],
+                confidence=[[0.8], [0.7], [0.1]],
                 class_id=[0, 1, 2],
             ),
             DoesNotRaise(),
@@ -300,18 +186,8 @@
             KEY_POINTS,
             (slice(None), -1),
             mock_key_points(
-                xy=[
-                    [[8, 9]],
-
-                    [[18, 19]],
-
-                    [[28, 29]]
-                ],
-                confidence=[
-                    [0.5],
-                    [0.0],
-                    [0.7]
-                ],
+                xy=[[[8, 9]], [[18, 19]], [[28, 29]]],
+                confidence=[[0.5], [0.0], [0.7]],
                 class_id=[0, 1, 2],
             ),
             DoesNotRaise(),
@@ -320,21 +196,8 @@
             KEY_POINTS,
             (slice(None), [0, 1]),
             mock_key_points(
-                xy=[
-                    [[0, 1],
-                     [2, 3]],
-
-                    [[10, 11],
-                     [12, 13]],
-
-                    [[20, 21],
-                     [22, 23]]
-                ],
-                confidence=[
-                    [0.8, 0.2],
-                    [0.7, 0.9],
-                    [0.1, 0.6]
-                ],
+                xy=[[[0, 1], [2, 3]], [[10, 11], [12, 13]], [[20, 21], [22, 23]]],
+                confidence=[[0.8, 0.2], [0.7, 0.9], [0.1, 0.6]],
                 class_id=[0, 1, 2],
             ),
             DoesNotRaise(),
@@ -343,21 +206,8 @@
             KEY_POINTS,
             (slice(None), np.array([0, 1])),
             mock_key_points(
-                xy=[
-                    [[0, 1],
-                     [2, 3]],
-
-                    [[10, 11],
-                     [12, 13]],
-
-                    [[20, 21],
-                     [22, 23]]
-                ],
-                confidence=[
-                    [0.8, 0.2],
-                    [0.7, 0.9],
-                    [0.1, 0.6]
-                ],
+                xy=[[[0, 1], [2, 3]], [[10, 11], [12, 13]], [[20, 21], [22, 23]]],
+                confidence=[[0.8, 0.2], [0.7, 0.9], [0.1, 0.6]],
                 class_id=[0, 1, 2],
             ),
             DoesNotRaise(),
@@ -366,21 +216,8 @@
             KEY_POINTS,
             (slice(None), [True, True, False, False, False]),
             mock_key_points(
-                xy=[
-                    [[0, 1],
-                     [2, 3]],
-
-                    [[10, 11],
-                     [12, 13]],
-
-                    [[20, 21],
-                     [22, 23]]
-                ],
-                confidence=[
-                    [0.8, 0.2],
-                    [0.7, 0.9],
-                    [0.1, 0.6]
-                ],
+                xy=[[[0, 1], [2, 3]], [[10, 11], [12, 13]], [[20, 21], [22, 23]]],
+                confidence=[[0.8, 0.2], [0.7, 0.9], [0.1, 0.6]],
                 class_id=[0, 1, 2],
             ),
             DoesNotRaise(),
@@ -389,21 +226,8 @@
             KEY_POINTS,
             (slice(None), np.array([True, True, False, False, False])),
             mock_key_points(
-                xy=[
-                    [[0, 1],
-                     [2, 3]],
-
-                    [[10, 11],
-                     [12, 13]],
-
-                    [[20, 21],
-                     [22, 23]]
-                ],
-                confidence=[
-                    [0.8, 0.2],
-                    [0.7, 0.9],
-                    [0.1, 0.6]
-                ],
+                xy=[[[0, 1], [2, 3]], [[10, 11], [12, 13]], [[20, 21], [22, 23]]],
+                confidence=[[0.8, 0.2], [0.7, 0.9], [0.1, 0.6]],
                 class_id=[0, 1, 2],
             ),
             DoesNotRaise(),
@@ -422,7 +246,7 @@
             ),
             DoesNotRaise(),
         ),  # select the first anchor from the first skeleton by index
-(
+        (
             KEY_POINTS,
             (0, -1),
             mock_key_points(

From 9a0483446aa0a60421975298befad721adc1be25 Mon Sep 17 00:00:00 2001
From: Alex Bodner <yodabodner@gmail.com>
Date: Mon, 28 Jul 2025 09:53:44 -0300
Subject: [PATCH 024/124] fixed docs

---
 supervision/keypoint/core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/supervision/keypoint/core.py b/supervision/keypoint/core.py
index 0d57c56183..be023d269b 100644
--- a/supervision/keypoint/core.py
+++ b/supervision/keypoint/core.py
@@ -545,9 +545,9 @@ def from_transformers(cls, transfomers_results: Any) -> KeyPoints:
             DETECTION_MODEL_ID = "PekingU/rtdetr_r50vd_coco_o365"
 
             detection_processor = AutoProcessor.from_pretrained(DETECTION_MODEL_ID, use_fast=True)
-            detection_model = RTDetrForObjectDetection.from_pretrained(DETECTION_MODEL_ID, device_map=DEVICE)
+            detection_model = RTDetrForObjectDetection.from_pretrained(DETECTION_MODEL_ID, device_map=device)
 
-            inputs = detection_processor(images=frame, return_tensors="pt").to(DEVICE)
+            inputs = detection_processor(images=frame, return_tensors="pt").to(device)
 
             with torch.no_grad():
                 outputs = detection_model(**inputs)
@@ -563,9 +563,9 @@ def from_transformers(cls, transfomers_results: Any) -> KeyPoints:
 
             pose_estimation_processor = AutoProcessor.from_pretrained(POSE_ESTIMATION_MODEL_ID)
             pose_estimation_model = VitPoseForPoseEstimation.from_pretrained(
-                POSE_ESTIMATION_MODEL_ID, device_map=DEVICE)
+                POSE_ESTIMATION_MODEL_ID, device_map=device)
 
-            inputs = pose_estimation_processor(frame, boxes=[boxes], return_tensors="pt").to(DEVICE)
+            inputs = pose_estimation_processor(frame, boxes=[boxes], return_tensors="pt").to(device)
 
             with torch.no_grad():
                 outputs = pose_estimation_model(**inputs)

From cb215d26f3bd09e8a707b751b32c30120ae58731 Mon Sep 17 00:00:00 2001
From: soumik12345 <19soumik.rakshit96@gmail.com>
Date: Mon, 28 Jul 2025 19:03:17 +0530
Subject: [PATCH 025/124] add: support for an additional parameter
 text_anchor_offset in _BaseLabelAnnotator

---
 supervision/annotators/core.py | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 0b7d4b7632..7fd47852ee 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -61,7 +61,9 @@ class _BaseLabelAnnotator(BaseAnnotator):
                                 avoid overlapping with other elements.
         max_line_length (Optional[int]): Maximum number of characters per line before
                                 wrapping the text. None means no wrapping.
-    """
+        text_anchor_offset (Tuple[int, int]): A tuple of 2D coordinates (x, y) to
+                                offset the text position from the anchor point, in pixels.
+    """  # noqa: E501
 
     def __init__(
         self,
@@ -73,6 +75,7 @@ def __init__(
         border_radius: int = 0,
         smart_position: bool = False,
         max_line_length: int | None = None,
+        text_anchor_offset: tuple[int, int] = (0, 0),
     ):
         """
         Initializes the _BaseLabelAnnotator.
@@ -93,8 +96,9 @@ def __init__(
                                 position to avoid overlapping with other elements.
             max_line_length (Optional[int], optional): Maximum number of characters per
                                 line before wrapping the text. None means no wrapping.
-
-        """
+            text_anchor_offset (Tuple[int, int], optional): A tuple of 2D coordinates
+                                (x, y) to offset the text position from the anchor point, in pixels.
+        """  # noqa: E501
         self.color: Color | ColorPalette = color
         self.text_color: Color | ColorPalette = text_color
         self.text_padding: int = text_padding
@@ -103,6 +107,7 @@ def __init__(
         self.border_radius: int = border_radius
         self.smart_position = smart_position
         self.max_line_length: int | None = max_line_length
+        self.text_anchor_offset: tuple[int, int] = text_anchor_offset
 
     def _adjust_labels_in_frame(
         self,
@@ -1085,6 +1090,7 @@ def __init__(
         border_radius: int = 0,
         smart_position: bool = False,
         max_line_length: int | None = None,
+        text_anchor_offset: tuple[int, int] = (0, 0),
     ):
         self.text_scale: float = text_scale
         self.text_thickness: int = text_thickness
@@ -1097,6 +1103,7 @@ def __init__(
             border_radius=border_radius,
             smart_position=smart_position,
             max_line_length=max_line_length,
+            text_anchor_offset=text_anchor_offset,
         )
 
     @ensure_cv2_image_for_annotation
@@ -1145,6 +1152,12 @@ def _get_label_properties(
         ).astype(int)
 
         for label, center_coords in zip(labels, anchors_coordinates):
+            # Apply the text anchor offset
+            offset_coords = (
+                center_coords[0] + self.text_anchor_offset[0],
+                center_coords[1] + self.text_anchor_offset[1],
+            )
+
             wrapped_lines = wrap_text(label, self.max_line_length)
             line_heights = []
             line_widths = []
@@ -1170,7 +1183,7 @@ def _get_label_properties(
             height_padded = total_height + 2 * self.text_padding
 
             text_background_xyxy = resolve_text_background_xyxy(
-                center_coordinates=tuple(center_coords),
+                center_coordinates=tuple(offset_coords),
                 text_wh=(width_padded, height_padded),
                 position=self.text_anchor,
             )
@@ -1326,6 +1339,7 @@ def __init__(
         border_radius: int = 0,
         smart_position: bool = False,
         max_line_length: int | None = None,
+        text_anchor_offset: tuple[int, int] = (0, 0),
     ):
         self.font_path = font_path
         self.font_size = font_size
@@ -1339,6 +1353,7 @@ def __init__(
             border_radius=border_radius,
             smart_position=smart_position,
             max_line_length=max_line_length,
+            text_anchor_offset=text_anchor_offset,
         )
 
     @ensure_pil_image_for_annotation
@@ -1387,6 +1402,12 @@ def _get_label_properties(
         ).astype(int)
 
         for label, center_coords in zip(labels, anchor_coordinates):
+            # Apply the text anchor offset
+            offset_coords = (
+                center_coords[0] + self.text_anchor_offset[0],
+                center_coords[1] + self.text_anchor_offset[1],
+            )
+
             wrapped_lines = wrap_text(label, self.max_line_length)
 
             # Calculate the total text height and maximum width
@@ -1409,7 +1430,7 @@ def _get_label_properties(
             height_padded = int(total_height + 2 * self.text_padding)
 
             text_background_xyxy = resolve_text_background_xyxy(
-                center_coordinates=tuple(center_coords),
+                center_coordinates=tuple(offset_coords),
                 text_wh=(width_padded, height_padded),
                 position=self.text_anchor,
             )

From 063e2092c856c3f25399548d75074fb61ac9545f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 28 Jul 2025 17:28:44 +0000
Subject: [PATCH 026/124] =?UTF-8?q?chore(pre=5Fcommit):=20=E2=AC=86=20pre?=
 =?UTF-8?q?=5Fcommit=20autoupdate?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.12.4 → v0.12.5](https://github.com/astral-sh/ruff-pre-commit/compare/v0.12.4...v0.12.5)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 69fb2a520b..87528b7e40 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,7 +32,7 @@ repos:
           additional_dependencies: ["bandit[toml]"]
 
   -   repo: https://github.com/astral-sh/ruff-pre-commit
-      rev: v0.12.4
+      rev: v0.12.5
       hooks:
       -   id: ruff
           args: [--fix, --exit-non-zero-on-fix]

From 71a14812fe819c77e725b188486746b5d6a7a201 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 29 Jul 2025 14:39:41 +0200
Subject: [PATCH 027/124] `LabelAnnotator` and `RichLabelAnnotator` docs
 improvement

---
 supervision/annotators/core.py | 200 ++++++++++++++++++++++++++-------
 1 file changed, 159 insertions(+), 41 deletions(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 7fd47852ee..178f3fbcce 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -51,63 +51,63 @@ class _BaseLabelAnnotator(BaseAnnotator):
 
     Attributes:
         color (Union[Color, ColorPalette]): The color to use for the label background.
+        color_lookup (ColorLookup): The method used to determine the color of the label.
         text_color (Union[Color, ColorPalette]): The color to use for the label text.
         text_padding (int): The padding around the label text, in pixels.
         text_anchor (Position): The position of the text relative to the detection
-                                bounding box.
-        color_lookup (ColorLookup): The method used to determine the color of the label.
+            bounding box.
+        text_offset (Tuple[int, int]): A tuple of 2D coordinates `(x, y)` to
+            offset the text position from the anchor point, in pixels.
         border_radius (int): The radius of the label background corners, in pixels.
         smart_position (bool): Whether to intelligently adjust the label position to
-                                avoid overlapping with other elements.
+            avoid overlapping with other elements.
         max_line_length (Optional[int]): Maximum number of characters per line before
-                                wrapping the text. None means no wrapping.
-        text_anchor_offset (Tuple[int, int]): A tuple of 2D coordinates (x, y) to
-                                offset the text position from the anchor point, in pixels.
+            wrapping the text. None means no wrapping.
     """  # noqa: E501
 
     def __init__(
         self,
         color: Color | ColorPalette = ColorPalette.DEFAULT,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
         text_color: Color | ColorPalette = Color.WHITE,
         text_padding: int = 10,
         text_position: Position = Position.TOP_LEFT,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
+        text_offset: tuple[int, int] = (0, 0),
         border_radius: int = 0,
         smart_position: bool = False,
         max_line_length: int | None = None,
-        text_anchor_offset: tuple[int, int] = (0, 0),
     ):
         """
         Initializes the _BaseLabelAnnotator.
 
         Args:
             color (Union[Color, ColorPalette], optional): The color to use for the label
-                                background.
+                background.
+            color_lookup (ColorLookup, optional): The method used to determine the color
+                of the label
             text_color (Union[Color, ColorPalette], optional): The color to use for the
-                                label text.
+                label text.
             text_padding (int, optional): The padding around the label text, in pixels.
             text_position (Position, optional): The position of the text relative to the
-                                detection bounding box.
-            color_lookup (ColorLookup, optional): The method used to determine the color
-                                of the label
+                detection bounding box.
+            text_offset (Tuple[int, int], optional): A tuple of 2D coordinates
+                `(x, y)` to offset the text position from the anchor point, in pixels.
             border_radius (int, optional): The radius of the label background corners,
-                                in pixels.
+                in pixels.
             smart_position (bool, optional): Whether to intelligently adjust the label
-                                position to avoid overlapping with other elements.
+                position to avoid overlapping with other elements.
             max_line_length (Optional[int], optional): Maximum number of characters per
-                                line before wrapping the text. None means no wrapping.
-            text_anchor_offset (Tuple[int, int], optional): A tuple of 2D coordinates
-                                (x, y) to offset the text position from the anchor point, in pixels.
+                line before wrapping the text. None means no wrapping.
         """  # noqa: E501
         self.color: Color | ColorPalette = color
+        self.color_lookup: ColorLookup = color_lookup
         self.text_color: Color | ColorPalette = text_color
         self.text_padding: int = text_padding
         self.text_anchor: Position = text_position
-        self.color_lookup: ColorLookup = color_lookup
+        self.text_offset: tuple[int, int] = text_offset
         self.border_radius: int = border_radius
         self.smart_position = smart_position
         self.max_line_length: int | None = max_line_length
-        self.text_anchor_offset: tuple[int, int] = text_anchor_offset
 
     def _adjust_labels_in_frame(
         self,
@@ -1081,39 +1081,100 @@ class LabelAnnotator(_BaseLabelAnnotator):
     def __init__(
         self,
         color: Color | ColorPalette = ColorPalette.DEFAULT,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
         text_color: Color | ColorPalette = Color.WHITE,
         text_scale: float = 0.5,
         text_thickness: int = 1,
         text_padding: int = 10,
         text_position: Position = Position.TOP_LEFT,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
+        text_offset: tuple[int, int] = (0, 0),
         border_radius: int = 0,
         smart_position: bool = False,
         max_line_length: int | None = None,
-        text_anchor_offset: tuple[int, int] = (0, 0),
     ):
+        """
+        Args:
+            color (Union[Color, ColorPalette]): The color or color palette to use for
+                annotating the text background.
+            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+            text_color (Union[Color, ColorPalette]): The color or color palette to use
+                for the text.
+            text_scale (float): Font scale for the text.
+            text_thickness (int): Thickness of the text characters.
+            text_padding (int): Padding around the text within its background box.
+            text_position (Position): Position of the text relative to the detection.
+                Possible values are defined in the `Position` enum.
+            text_offset (Tuple[int, int]): A tuple of 2D coordinates `(x, y)` to
+                offset the text position from the anchor point, in pixels.
+            border_radius (int): The radius to apply round edges. If the selected
+                value is higher than the lower dimension, width or height, is clipped.
+            smart_position (bool): Spread out the labels to avoid overlapping.
+            max_line_length (Optional[int]): Maximum number of characters per line before
+                wrapping the text. None means no wrapping.
+        """
         self.text_scale: float = text_scale
         self.text_thickness: int = text_thickness
         super().__init__(
             color=color,
+            color_lookup=color_lookup,
             text_color=text_color,
             text_padding=text_padding,
             text_position=text_position,
-            color_lookup=color_lookup,
+            text_offset=text_offset,
             border_radius=border_radius,
             smart_position=smart_position,
             max_line_length=max_line_length,
-            text_anchor_offset=text_anchor_offset,
         )
 
     @ensure_cv2_image_for_annotation
     def annotate(
         self,
-        scene: ImageType,  # Ensure scene is initially a NumPy array here
+        scene: ImageType,
         detections: Detections,
         labels: list[str] | None = None,
         custom_color_lookup: np.ndarray | None = None,
     ) -> np.ndarray:
+        """
+        Annotates the given scene with labels based on the provided detections.
+
+        Args:
+            scene (ImageType): The image where labels will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+            labels (Optional[List[str]]): Custom labels for each detection.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
+
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
+
+        Example:
+            ```python
+            import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            labels = [
+                f"{class_name} {confidence:.2f}"
+                for class_name, confidence
+                in zip(detections['class_name'], detections.confidence)
+            ]
+
+            label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
+            annotated_frame = label_annotator.annotate(
+                scene=image.copy(),
+                detections=detections,
+                labels=labels
+            )
+            ```
+
+        ![label-annotator-example](https://media.roboflow.com/
+        supervision-annotator-examples/label-annotator-example-purple.png)
+        """
         assert isinstance(scene, np.ndarray)
         validate_labels(labels, detections)
 
@@ -1151,11 +1212,10 @@ def _get_label_properties(
             anchor=self.text_anchor
         ).astype(int)
 
-        for label, center_coords in zip(labels, anchors_coordinates):
-            # Apply the text anchor offset
-            offset_coords = (
-                center_coords[0] + self.text_anchor_offset[0],
-                center_coords[1] + self.text_anchor_offset[1],
+        for label, center_coordinates in zip(labels, anchors_coordinates):
+            center_coordinates = (
+                center_coordinates[0] + self.text_offset[0],
+                center_coordinates[1] + self.text_offset[1],
             )
 
             wrapped_lines = wrap_text(label, self.max_line_length)
@@ -1183,7 +1243,7 @@ def _get_label_properties(
             height_padded = total_height + 2 * self.text_padding
 
             text_background_xyxy = resolve_text_background_xyxy(
-                center_coordinates=tuple(offset_coords),
+                center_coordinates=center_coordinates,
                 text_wh=(width_padded, height_padded),
                 position=self.text_anchor,
             )
@@ -1330,30 +1390,51 @@ class RichLabelAnnotator(_BaseLabelAnnotator):
     def __init__(
         self,
         color: Color | ColorPalette = ColorPalette.DEFAULT,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
         text_color: Color | ColorPalette = Color.WHITE,
         font_path: str | None = None,
         font_size: int = 10,
         text_padding: int = 10,
         text_position: Position = Position.TOP_LEFT,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
+        text_offset: tuple[int, int] = (0, 0),
         border_radius: int = 0,
         smart_position: bool = False,
         max_line_length: int | None = None,
-        text_anchor_offset: tuple[int, int] = (0, 0),
     ):
+        """
+        Args:
+            color (Union[Color, ColorPalette]): The color or color palette to use for
+                annotating the text background.
+            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+            text_color (Union[Color, ColorPalette]): The color to use for the text.
+            font_path (Optional[str]): Path to the font file (e.g., ".ttf" or ".otf")
+                to use for rendering text. If `None`, the default PIL font will be used.
+            font_size (int): Font size for the text.
+            text_padding (int): Padding around the text within its background box.
+            text_position (Position): Position of the text relative to the detection.
+                Possible values are defined in the `Position` enum.
+            text_offset (Tuple[int, int]): A tuple of 2D coordinates `(x, y)` to
+                offset the text position from the anchor point, in pixels.
+            border_radius (int): The radius to apply round edges. If the selected
+                value is higher than the lower dimension, width or height, is clipped.
+            smart_position (bool): Spread out the labels to avoid overlapping.
+            max_line_length (Optional[int]): Maximum number of characters per line before
+                wrapping the text. None means no wrapping.
+        """
         self.font_path = font_path
         self.font_size = font_size
         self.font = self._load_font(font_size, font_path)
         super().__init__(
             color=color,
+            color_lookup=color_lookup,
             text_color=text_color,
             text_padding=text_padding,
             text_position=text_position,
-            color_lookup=color_lookup,
+            text_offset=text_offset,
             border_radius=border_radius,
             smart_position=smart_position,
             max_line_length=max_line_length,
-            text_anchor_offset=text_anchor_offset,
         )
 
     @ensure_pil_image_for_annotation
@@ -1364,6 +1445,44 @@ def annotate(
         labels: list[str] | None = None,
         custom_color_lookup: np.ndarray | None = None,
     ) -> ImageType:
+        """
+        Annotates the given scene with labels based on the provided
+        detections, with support for Unicode characters.
+
+        Args:
+            scene (ImageType): The image where labels will be drawn.
+                `ImageType` is a flexible type, accepting either `numpy.ndarray`
+                or `PIL.Image.Image`.
+            detections (Detections): Object detections to annotate.
+            labels (Optional[List[str]]): Custom labels for each detection.
+            custom_color_lookup (Optional[np.ndarray]): Custom color lookup array.
+                Allows to override the default color mapping strategy.
+
+        Returns:
+            The annotated image, matching the type of `scene` (`numpy.ndarray`
+                or `PIL.Image.Image`)
+
+        Example:
+            ```python
+            import supervision as sv
+
+            image = ...
+            detections = sv.Detections(...)
+
+            labels = [
+                f"{class_name} {confidence:.2f}"
+                for class_name, confidence
+                in zip(detections['class_name'], detections.confidence)
+            ]
+
+            rich_label_annotator = sv.RichLabelAnnotator(font_path="path/to/font.ttf")
+            annotated_frame = label_annotator.annotate(
+                scene=image.copy(),
+                detections=detections,
+                labels=labels
+            )
+            ```
+        """
         assert isinstance(scene, Image.Image)
         validate_labels(labels, detections)
 
@@ -1401,11 +1520,10 @@ def _get_label_properties(
             anchor=self.text_anchor
         ).astype(int)
 
-        for label, center_coords in zip(labels, anchor_coordinates):
-            # Apply the text anchor offset
-            offset_coords = (
-                center_coords[0] + self.text_anchor_offset[0],
-                center_coords[1] + self.text_anchor_offset[1],
+        for label, center_coordinates in zip(labels, anchor_coordinates):
+            center_coordinates = (
+                center_coordinates[0] + self.text_offset[0],
+                center_coordinates[1] + self.text_offset[1],
             )
 
             wrapped_lines = wrap_text(label, self.max_line_length)
@@ -1430,7 +1548,7 @@ def _get_label_properties(
             height_padded = int(total_height + 2 * self.text_padding)
 
             text_background_xyxy = resolve_text_background_xyxy(
-                center_coordinates=tuple(offset_coords),
+                center_coordinates=center_coordinates,
                 text_wh=(width_padded, height_padded),
                 position=self.text_anchor,
             )

From 21e311a319e03f9274986e1e63a735aa1cc8abc1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 29 Jul 2025 12:40:05 +0000
Subject: [PATCH 028/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/annotators/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 178f3fbcce..8b0d854cbf 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -63,7 +63,7 @@ class _BaseLabelAnnotator(BaseAnnotator):
             avoid overlapping with other elements.
         max_line_length (Optional[int]): Maximum number of characters per line before
             wrapping the text. None means no wrapping.
-    """  # noqa: E501
+    """
 
     def __init__(
         self,
@@ -98,7 +98,7 @@ def __init__(
                 position to avoid overlapping with other elements.
             max_line_length (Optional[int], optional): Maximum number of characters per
                 line before wrapping the text. None means no wrapping.
-        """  # noqa: E501
+        """
         self.color: Color | ColorPalette = color
         self.color_lookup: ColorLookup = color_lookup
         self.text_color: Color | ColorPalette = text_color

From 6f2490cc2360e20469e0643f1429f14f0bb73694 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Tue, 29 Jul 2025 14:43:38 +0200
Subject: [PATCH 029/124] fix formatting

---
 supervision/annotators/core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 8b0d854cbf..b77d2c0bd0 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -1110,8 +1110,8 @@ def __init__(
             border_radius (int): The radius to apply round edges. If the selected
                 value is higher than the lower dimension, width or height, is clipped.
             smart_position (bool): Spread out the labels to avoid overlapping.
-            max_line_length (Optional[int]): Maximum number of characters per line before
-                wrapping the text. None means no wrapping.
+            max_line_length (Optional[int]): Maximum number of characters per line
+                before wrapping the text. None means no wrapping.
         """
         self.text_scale: float = text_scale
         self.text_thickness: int = text_thickness
@@ -1419,8 +1419,8 @@ def __init__(
             border_radius (int): The radius to apply round edges. If the selected
                 value is higher than the lower dimension, width or height, is clipped.
             smart_position (bool): Spread out the labels to avoid overlapping.
-            max_line_length (Optional[int]): Maximum number of characters per line before
-                wrapping the text. None means no wrapping.
+            max_line_length (Optional[int]): Maximum number of characters per line
+                before wrapping the text. None means no wrapping.
         """
         self.font_path = font_path
         self.font_size = font_size

From 22b6d3d464fd23033a94f925f56eea852e2dce6b Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 30 Jul 2025 02:04:24 -0400
Subject: [PATCH 030/124] ADD: Added a spline annotator

---
 supervision/__init__.py        |  1 +
 supervision/annotators/core.py | 82 ++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index ab45651ac9..2a3a642d4c 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -29,6 +29,7 @@
     RichLabelAnnotator,
     RoundBoxAnnotator,
     TraceAnnotator,
+    SplineAnnotator,
     TriangleAnnotator,
 )
 from supervision.annotators.utils import ColorLookup
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index b77d2c0bd0..09073728eb 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -7,6 +7,7 @@
 import numpy as np
 import numpy.typing as npt
 from PIL import Image, ImageDraw, ImageFont
+from scipy.interpolate import splprep, splev
 
 from supervision.annotators.base import BaseAnnotator, ImageType
 from supervision.annotators.utils import (
@@ -1908,6 +1909,7 @@ def annotate(
                 else custom_color_lookup,
             )
             xy = self.trace.get(tracker_id=tracker_id)
+
             if len(xy) > 1:
                 scene = cv2.polylines(
                     scene,
@@ -1918,6 +1920,86 @@ def annotate(
                 )
         return scene
 
+class SplineAnnotator(BaseAnnotator):
+    """
+    A class for drawing trace paths on an image based on detection coordinates.
+
+    !!! warning
+
+        This annotator uses the `sv.Detections.tracker_id`. Read
+        [here](/latest/trackers/) to learn how to plug
+        tracking into your inference pipeline.
+    """
+
+    def __init__(
+        self,
+        color: Color | ColorPalette = ColorPalette.DEFAULT,
+        position: Position = Position.CENTER,
+        trace_length: int = 30,
+        thickness: int = 2,
+        color_lookup: ColorLookup = ColorLookup.CLASS,
+    ):
+        """
+        Args:
+            color (Union[Color, ColorPalette]): The color to draw the trace, can be
+                a single color or a color palette.
+            position (Position): The position of the trace.
+                Defaults to `CENTER`.
+            trace_length (int): The maximum length of the trace in terms of historical
+                points. Defaults to `30`.
+            thickness (int): The thickness of the trace lines. Defaults to `2`.
+            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
+                Options are `INDEX`, `CLASS`, `TRACK`.
+        """
+        self.color: Color | ColorPalette = color
+        self.trace = Trace(max_size=trace_length, anchor=position)
+        self.thickness = thickness
+        self.color_lookup: ColorLookup = color_lookup
+
+    @ensure_cv2_image_for_annotation
+    def annotate(self, scene: ImageType, detections: Detections, custom_color_lookup: np.ndarray | None = None) -> ImageType:
+        assert isinstance(scene, np.ndarray)
+
+        if detections.tracker_id is None:
+            raise ValueError(
+                "The `tracker_id` field is missing in the provided detections."
+                " See more: https://supervision.roboflow.com/latest/how_to/track_objects"
+            )
+        
+        detections = detections[detections.tracker_id != PENDING_TRACK_ID]
+
+        self.trace.put(detections)
+        
+        for detection_idx in range(len(detections)):
+            tracker_id = int(detections.tracker_id[detection_idx])
+            color = resolve_color(
+                color=self.color,
+                detections=detections,
+                detection_idx=detection_idx,
+                color_lookup=self.color_lookup
+                if custom_color_lookup is None
+                else custom_color_lookup,
+            )
+            xy = self.trace.get(tracker_id=tracker_id)
+            spline_points = None;
+
+            if len(xy) > 3:
+                x, y = xy[:, 0], xy[:, 1]
+                tck, u = splprep([x, y], s=100)
+                x_new, y_new = splev(np.linspace(0, 1, 100), tck)
+                spline_points = np.stack([x_new, y_new], axis=1).astype(np.int32)
+            else:
+                spline_points = xy.astype(np.int32)
+
+            if len(xy) > 1:
+                scene = cv2.polylines(
+                    scene,
+                    [spline_points],
+                    False,
+                    color=color.as_bgr(),
+                    thickness=self.thickness,
+                )
+        return scene
 
 class HeatMapAnnotator(BaseAnnotator):
     """

From 8c6ac2dd2970fe12565f6ad83beb767e4641ba57 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 30 Jul 2025 02:53:58 -0400
Subject: [PATCH 031/124] UPDATE: Made smoothing and spline_order as
 hyperparameters

---
 supervision/annotators/core.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 09073728eb..61632a047b 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -1937,25 +1937,34 @@ def __init__(
         position: Position = Position.CENTER,
         trace_length: int = 30,
         thickness: int = 2,
+        smoothing_factor: int = 20,
+        spline_order: int = 3,
         color_lookup: ColorLookup = ColorLookup.CLASS,
     ):
         """
         Args:
-            color (Union[Color, ColorPalette]): The color to draw the trace, can be
+            color (Union[Color, ColorPalette]): The color to draw the spline, can be
                 a single color or a color palette.
-            position (Position): The position of the trace.
+            position (Position): The position of the spline.
                 Defaults to `CENTER`.
-            trace_length (int): The maximum length of the trace in terms of historical
+            trace_length (int): The maximum length of the spline in terms of historical
                 points. Defaults to `30`.
-            thickness (int): The thickness of the trace lines. Defaults to `2`.
+            thickness (int): The thickness of the spline lines. Defaults to `2`.
+            smoothing_factor (int): The smoothing factor of the spline.
+            spline_order (int): The order of the spline. Use odd numbers that are 1 <= x <= 5.
             color_lookup (ColorLookup): Strategy for mapping colors to annotations.
                 Options are `INDEX`, `CLASS`, `TRACK`.
         """
         self.color: Color | ColorPalette = color
         self.trace = Trace(max_size=trace_length, anchor=position)
         self.thickness = thickness
+        self.smoothing_factor = smoothing_factor
         self.color_lookup: ColorLookup = color_lookup
 
+        if spline_order % 2 == 0 or spline_order < 1 or spline_order > 5:
+            raise ValueError("Spline order must be an odd number between 1 and 5.")
+        self.spline_order = spline_order
+
     @ensure_cv2_image_for_annotation
     def annotate(self, scene: ImageType, detections: Detections, custom_color_lookup: np.ndarray | None = None) -> ImageType:
         assert isinstance(scene, np.ndarray)
@@ -1985,7 +1994,7 @@ def annotate(self, scene: ImageType, detections: Detections, custom_color_lookup
 
             if len(xy) > 3:
                 x, y = xy[:, 0], xy[:, 1]
-                tck, u = splprep([x, y], s=100)
+                tck, u = splprep([x, y], s=self.smoothing_factor, k=self.spline_order)
                 x_new, y_new = splev(np.linspace(0, 1, 100), tck)
                 spline_points = np.stack([x_new, y_new], axis=1).astype(np.int32)
             else:

From f447b6fa95690f58e4c95dded17754605eb9dd39 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 30 Jul 2025 07:10:13 +0000
Subject: [PATCH 032/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py        |  2 +-
 supervision/annotators/core.py | 17 ++++++++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 2a3a642d4c..1b2266cd76 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -28,8 +28,8 @@
     PolygonAnnotator,
     RichLabelAnnotator,
     RoundBoxAnnotator,
-    TraceAnnotator,
     SplineAnnotator,
+    TraceAnnotator,
     TriangleAnnotator,
 )
 from supervision.annotators.utils import ColorLookup
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 61632a047b..b1ecc674eb 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -7,7 +7,7 @@
 import numpy as np
 import numpy.typing as npt
 from PIL import Image, ImageDraw, ImageFont
-from scipy.interpolate import splprep, splev
+from scipy.interpolate import splev, splprep
 
 from supervision.annotators.base import BaseAnnotator, ImageType
 from supervision.annotators.utils import (
@@ -1920,6 +1920,7 @@ def annotate(
                 )
         return scene
 
+
 class SplineAnnotator(BaseAnnotator):
     """
     A class for drawing trace paths on an image based on detection coordinates.
@@ -1966,7 +1967,12 @@ def __init__(
         self.spline_order = spline_order
 
     @ensure_cv2_image_for_annotation
-    def annotate(self, scene: ImageType, detections: Detections, custom_color_lookup: np.ndarray | None = None) -> ImageType:
+    def annotate(
+        self,
+        scene: ImageType,
+        detections: Detections,
+        custom_color_lookup: np.ndarray | None = None,
+    ) -> ImageType:
         assert isinstance(scene, np.ndarray)
 
         if detections.tracker_id is None:
@@ -1974,11 +1980,11 @@ def annotate(self, scene: ImageType, detections: Detections, custom_color_lookup
                 "The `tracker_id` field is missing in the provided detections."
                 " See more: https://supervision.roboflow.com/latest/how_to/track_objects"
             )
-        
+
         detections = detections[detections.tracker_id != PENDING_TRACK_ID]
 
         self.trace.put(detections)
-        
+
         for detection_idx in range(len(detections)):
             tracker_id = int(detections.tracker_id[detection_idx])
             color = resolve_color(
@@ -1990,7 +1996,7 @@ def annotate(self, scene: ImageType, detections: Detections, custom_color_lookup
                 else custom_color_lookup,
             )
             xy = self.trace.get(tracker_id=tracker_id)
-            spline_points = None;
+            spline_points = None
 
             if len(xy) > 3:
                 x, y = xy[:, 0], xy[:, 1]
@@ -2010,6 +2016,7 @@ def annotate(self, scene: ImageType, detections: Detections, custom_color_lookup
                 )
         return scene
 
+
 class HeatMapAnnotator(BaseAnnotator):
     """
     A class for drawing heatmaps on an image based on provided detections.

From eaf6cdd47701bebd2a6e9157d080c40a08a1790b Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 30 Jul 2025 03:14:00 -0400
Subject: [PATCH 033/124] Precommit

---
 supervision/annotators/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index b1ecc674eb..303b2ded51 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -1952,7 +1952,8 @@ def __init__(
                 points. Defaults to `30`.
             thickness (int): The thickness of the spline lines. Defaults to `2`.
             smoothing_factor (int): The smoothing factor of the spline.
-            spline_order (int): The order of the spline. Use odd numbers that are 1 <= x <= 5.
+            spline_order (int): The order of the spline. Use odd numbers that are 
+                between 1 <= x <= 5.
             color_lookup (ColorLookup): Strategy for mapping colors to annotations.
                 Options are `INDEX`, `CLASS`, `TRACK`.
         """

From 8407d5a6923bc7ba556cd720721df69892cdb7b4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 30 Jul 2025 07:14:56 +0000
Subject: [PATCH 034/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/annotators/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 303b2ded51..26c08e8036 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -1952,7 +1952,7 @@ def __init__(
                 points. Defaults to `30`.
             thickness (int): The thickness of the spline lines. Defaults to `2`.
             smoothing_factor (int): The smoothing factor of the spline.
-            spline_order (int): The order of the spline. Use odd numbers that are 
+            spline_order (int): The order of the spline. Use odd numbers that are
                 between 1 <= x <= 5.
             color_lookup (ColorLookup): Strategy for mapping colors to annotations.
                 Options are `INDEX`, `CLASS`, `TRACK`.

From cdca2e6b51eee680fd1858392290c91999244dbe Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 30 Jul 2025 13:48:38 -0400
Subject: [PATCH 035/124] UPDATE: Removed SplineAnnotator and added smooth flag
 for TraceAnnotator

---
 supervision/__init__.py        |  1 -
 supervision/annotators/core.py | 97 ++--------------------------------
 2 files changed, 5 insertions(+), 93 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 1b2266cd76..ab45651ac9 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -28,7 +28,6 @@
     PolygonAnnotator,
     RichLabelAnnotator,
     RoundBoxAnnotator,
-    SplineAnnotator,
     TraceAnnotator,
     TriangleAnnotator,
 )
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 26c08e8036..41189e0756 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -1821,6 +1821,7 @@ def __init__(
         position: Position = Position.CENTER,
         trace_length: int = 30,
         thickness: int = 2,
+        smooth: int = 0,
         color_lookup: ColorLookup = ColorLookup.CLASS,
     ):
         """
@@ -1832,12 +1833,14 @@ def __init__(
             trace_length (int): The maximum length of the trace in terms of historical
                 points. Defaults to `30`.
             thickness (int): The thickness of the trace lines. Defaults to `2`.
+            smooth (int): The smoothing factor of the trace lines. Defaults to `0`
             color_lookup (ColorLookup): Strategy for mapping colors to annotations.
                 Options are `INDEX`, `CLASS`, `TRACK`.
         """
         self.color: Color | ColorPalette = color
         self.trace = Trace(max_size=trace_length, anchor=position)
         self.thickness = thickness
+        self.smooth = smooth
         self.color_lookup: ColorLookup = color_lookup
 
     @ensure_cv2_image_for_annotation
@@ -1909,103 +1912,13 @@ def annotate(
                 else custom_color_lookup,
             )
             xy = self.trace.get(tracker_id=tracker_id)
-
-            if len(xy) > 1:
-                scene = cv2.polylines(
-                    scene,
-                    [xy.astype(np.int32)],
-                    False,
-                    color=color.as_bgr(),
-                    thickness=self.thickness,
-                )
-        return scene
-
-
-class SplineAnnotator(BaseAnnotator):
-    """
-    A class for drawing trace paths on an image based on detection coordinates.
-
-    !!! warning
-
-        This annotator uses the `sv.Detections.tracker_id`. Read
-        [here](/latest/trackers/) to learn how to plug
-        tracking into your inference pipeline.
-    """
-
-    def __init__(
-        self,
-        color: Color | ColorPalette = ColorPalette.DEFAULT,
-        position: Position = Position.CENTER,
-        trace_length: int = 30,
-        thickness: int = 2,
-        smoothing_factor: int = 20,
-        spline_order: int = 3,
-        color_lookup: ColorLookup = ColorLookup.CLASS,
-    ):
-        """
-        Args:
-            color (Union[Color, ColorPalette]): The color to draw the spline, can be
-                a single color or a color palette.
-            position (Position): The position of the spline.
-                Defaults to `CENTER`.
-            trace_length (int): The maximum length of the spline in terms of historical
-                points. Defaults to `30`.
-            thickness (int): The thickness of the spline lines. Defaults to `2`.
-            smoothing_factor (int): The smoothing factor of the spline.
-            spline_order (int): The order of the spline. Use odd numbers that are
-                between 1 <= x <= 5.
-            color_lookup (ColorLookup): Strategy for mapping colors to annotations.
-                Options are `INDEX`, `CLASS`, `TRACK`.
-        """
-        self.color: Color | ColorPalette = color
-        self.trace = Trace(max_size=trace_length, anchor=position)
-        self.thickness = thickness
-        self.smoothing_factor = smoothing_factor
-        self.color_lookup: ColorLookup = color_lookup
-
-        if spline_order % 2 == 0 or spline_order < 1 or spline_order > 5:
-            raise ValueError("Spline order must be an odd number between 1 and 5.")
-        self.spline_order = spline_order
-
-    @ensure_cv2_image_for_annotation
-    def annotate(
-        self,
-        scene: ImageType,
-        detections: Detections,
-        custom_color_lookup: np.ndarray | None = None,
-    ) -> ImageType:
-        assert isinstance(scene, np.ndarray)
-
-        if detections.tracker_id is None:
-            raise ValueError(
-                "The `tracker_id` field is missing in the provided detections."
-                " See more: https://supervision.roboflow.com/latest/how_to/track_objects"
-            )
-
-        detections = detections[detections.tracker_id != PENDING_TRACK_ID]
-
-        self.trace.put(detections)
-
-        for detection_idx in range(len(detections)):
-            tracker_id = int(detections.tracker_id[detection_idx])
-            color = resolve_color(
-                color=self.color,
-                detections=detections,
-                detection_idx=detection_idx,
-                color_lookup=self.color_lookup
-                if custom_color_lookup is None
-                else custom_color_lookup,
-            )
-            xy = self.trace.get(tracker_id=tracker_id)
-            spline_points = None
+            spline_points = xy.astype(np.int32)
 
             if len(xy) > 3:
                 x, y = xy[:, 0], xy[:, 1]
-                tck, u = splprep([x, y], s=self.smoothing_factor, k=self.spline_order)
+                tck, u = splprep([x, y], s=self.smooth)
                 x_new, y_new = splev(np.linspace(0, 1, 100), tck)
                 spline_points = np.stack([x_new, y_new], axis=1).astype(np.int32)
-            else:
-                spline_points = xy.astype(np.int32)
 
             if len(xy) > 1:
                 scene = cv2.polylines(

From 0c9fea4d9fd80366998be2f400dac91f58ee7998 Mon Sep 17 00:00:00 2001
From: Ashp116 <ashp116@outlook.com>
Date: Wed, 30 Jul 2025 18:24:14 -0400
Subject: [PATCH 036/124] UPDATE: Change Smooth to bool

---
 supervision/annotators/core.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 41189e0756..f951e68177 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -1821,7 +1821,7 @@ def __init__(
         position: Position = Position.CENTER,
         trace_length: int = 30,
         thickness: int = 2,
-        smooth: int = 0,
+        smooth: bool = False,
         color_lookup: ColorLookup = ColorLookup.CLASS,
     ):
         """
@@ -1833,7 +1833,7 @@ def __init__(
             trace_length (int): The maximum length of the trace in terms of historical
                 points. Defaults to `30`.
             thickness (int): The thickness of the trace lines. Defaults to `2`.
-            smooth (int): The smoothing factor of the trace lines. Defaults to `0`
+            smooth (bool): Smooth the trace lines.
             color_lookup (ColorLookup): Strategy for mapping colors to annotations.
                 Options are `INDEX`, `CLASS`, `TRACK`.
         """
@@ -1914,9 +1914,9 @@ def annotate(
             xy = self.trace.get(tracker_id=tracker_id)
             spline_points = xy.astype(np.int32)
 
-            if len(xy) > 3:
+            if len(xy) > 3 and self.smooth:
                 x, y = xy[:, 0], xy[:, 1]
-                tck, u = splprep([x, y], s=self.smooth)
+                tck, u = splprep([x, y], s=20)
                 x_new, y_new = splev(np.linspace(0, 1, 100), tck)
                 spline_points = np.stack([x_new, y_new], axis=1).astype(np.int32)
 

From e1393691a4ecd297ee9b014c9b56b043d0978d83 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 31 Jul 2025 17:49:06 +0200
Subject: [PATCH 037/124] bump version from `0.26.1` to `0.27.0rc0`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index cae78492ac..46f7631edc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.26.1"
+version = "0.27.0rc0"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [

From 6f87e2639eb1052ae2af4382190da34e21516d46 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Aug 2025 00:50:49 +0000
Subject: [PATCH 038/124] :arrow_up: Update mkdocstrings requirement

Updates the requirements on [mkdocstrings](https://github.com/mkdocstrings/mkdocstrings) to permit the latest version.
- [Release notes](https://github.com/mkdocstrings/mkdocstrings/releases)
- [Changelog](https://github.com/mkdocstrings/mkdocstrings/blob/main/CHANGELOG.md)
- [Commits](https://github.com/mkdocstrings/mkdocstrings/compare/0.25.2...0.30.0)

---
updated-dependencies:
- dependency-name: mkdocstrings
  dependency-version: 0.30.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 46f7631edc..e8ff22d0ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,7 +71,7 @@ dev = [
 ]
 docs = [
     "mkdocs-material[imaging]>=9.5.5",
-    "mkdocstrings>=0.25.2,<0.30.0",
+    "mkdocstrings>=0.25.2,<0.31.0",
     "mkdocstrings-python>=1.10.9",
     "mike>=2.0.0",
     "mkdocs-jupyter>=0.24.3",

From 652b0a190f6fe445e91b673c5e5317f41a0d0fde Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Aug 2025 00:52:10 +0000
Subject: [PATCH 039/124] :arrow_up: Update build requirement from <1.3,>=0.10
 to >=0.10,<1.4

Updates the requirements on [build](https://github.com/pypa/build) to permit the latest version.
- [Release notes](https://github.com/pypa/build/releases)
- [Changelog](https://github.com/pypa/build/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/pypa/build/compare/0.10.0...1.3.0)

---
updated-dependencies:
- dependency-name: build
  dependency-version: 1.3.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 46f7631edc..8690253032 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -81,7 +81,7 @@ docs = [
 build = [
     "twine>=5.1.1,<7.0.0",
     "wheel>=0.40,<0.46",
-    "build>=0.10,<1.3"
+    "build>=0.10,<1.4"
 ]
 
 [tool.bandit]

From a08cda30fb5476ea1b9c60839ce95adbfd6edf16 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 6 Aug 2025 12:02:51 +0200
Subject: [PATCH 040/124] ready for review

---
 supervision/annotators/utils.py |  30 +++++----
 test/annotators/test_utils.py   | 116 +++++++++++++++++++++++++++++++-
 2 files changed, 132 insertions(+), 14 deletions(-)

diff --git a/supervision/annotators/utils.py b/supervision/annotators/utils.py
index 511c68dee1..f3f38fc341 100644
--- a/supervision/annotators/utils.py
+++ b/supervision/annotators/utils.py
@@ -2,6 +2,7 @@
 
 import textwrap
 from enum import Enum
+from typing import Any
 
 import numpy as np
 
@@ -151,30 +152,36 @@ def resolve_color(
     return get_color_by_index(color=color, idx=idx)
 
 
-def wrap_text(text: str, max_line_length=None) -> list[str]:
+def wrap_text(text: Any, max_line_length=None) -> list[str]:
     """
-    Wraps text to the specified maximum line length, respecting existing newlines.
-    Uses the textwrap library for robust text wrapping.
+    Wrap `text` to the specified maximum line length, respecting existing
+    newlines. Falls back to str() if `text` is not already a string.
 
     Args:
-        text (str): The text to wrap.
+        text (Any): The text (or object) to wrap.
+        max_line_length (int | None): Maximum width for each wrapped line.
 
     Returns:
-        List[str]: A list of text lines after wrapping.
+        list[str]: Wrapped lines.
     """
 
     if not text:
         return [""]
 
+    if not isinstance(text, str):
+        text = str(text)
+
     if max_line_length is None:
         return text.splitlines() or [""]
 
+    if max_line_length <= 0:
+        raise ValueError(f"max_line_length must be a positive integer")
+
     paragraphs = text.split("\n")
-    all_lines = []
+    all_lines: list[str] = []
 
     for paragraph in paragraphs:
-        if not paragraph:
-            # Keep empty lines
+        if paragraph == "":
             all_lines.append("")
             continue
 
@@ -186,12 +193,9 @@ def wrap_text(text: str, max_line_length=None) -> list[str]:
             drop_whitespace=True,
         )
 
-        if wrapped:
-            all_lines.extend(wrapped)
-        else:
-            all_lines.append("")
+        all_lines.extend(wrapped or [""])
 
-    return all_lines if all_lines else [""]
+    return all_lines or [""]
 
 
 def validate_labels(labels: list[str] | None, detections: Detections):
diff --git a/test/annotators/test_utils.py b/test/annotators/test_utils.py
index d6abc38183..9b19874a3d 100644
--- a/test/annotators/test_utils.py
+++ b/test/annotators/test_utils.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from supervision.annotators.utils import ColorLookup, resolve_color_idx
+from supervision.annotators.utils import ColorLookup, resolve_color_idx, wrap_text
 from supervision.detection.core import Detections
 from test.test_utils import mock_detections
 
@@ -108,3 +108,117 @@ def test_resolve_color_idx(
             color_lookup=color_lookup,
         )
         assert result == expected_result
+
+
+@pytest.mark.parametrize(
+    "text, max_line_length, expected_result, exception",
+    [
+        (
+            None,
+            None,
+            [""],
+            DoesNotRaise()
+        ),  # text is None
+        (
+            "",
+            None,
+            [""],
+            DoesNotRaise()
+        ),  # empty string
+        (
+            "   \t  ",
+            3,
+            [""],
+            DoesNotRaise()
+        ),  # whitespace-only (spaces + tab)
+
+        (
+            12345,
+            None,
+            ["12345"],
+            DoesNotRaise()
+        ),  # plain integer
+        (
+            -6789,
+            None,
+            ["-6789"],
+            DoesNotRaise()
+        ),  # negative integer
+        (
+            np.int64(1000),
+            None,
+            ["1000"],
+            DoesNotRaise()
+        ),  # NumPy int64
+        (
+            [1, 2, 3],
+            None,
+            ["[1, 2, 3]"],
+            DoesNotRaise()
+        ),  # list to string
+
+        (
+            "When you play the game of thrones, you win or you die.\nFear cuts deeper than swords.\nA mind needs books as a sword needs a whetstone.", # noqa: E501
+            None,
+            [
+                "When you play the game of thrones, you win or you die.",
+                "Fear cuts deeper than swords.",
+                "A mind needs books as a sword needs a whetstone.",
+            ],
+            DoesNotRaise()
+        ),  # Game-of-Thrones quotes, multiline
+        (
+            "\n",
+            None,
+            [""],
+            DoesNotRaise()
+        ),  # single newline
+
+        (
+            "valarmorghulisvalardoharis",
+            6,
+            ["valarm", "orghul", "isvala", "rdohar", "is"],
+            DoesNotRaise()
+        ),  # long Valyrian phrase, wrapped
+        (
+            "Winter is coming\nFire and blood",
+            10,
+            [
+                "Winter is",
+                "coming",
+                "Fire and",
+                "blood",
+            ],
+            DoesNotRaise()
+        ),  # mix of short/long with newline
+
+        (
+            "What is dead may never die",
+            0,
+            None,
+            pytest.raises(ValueError)
+        ),  # width 0 – invalid
+        (
+            "A Lannister always pays his debts",
+            -1,
+            None,
+            pytest.raises(ValueError)
+        ),  # width -1 – invalid
+
+        (
+            None,
+            10,
+            [""],
+            DoesNotRaise()
+        ),  # text None, width set
+    ],
+)
+def test_wrap_text(
+    text: object,
+    max_line_length: int | None,
+    expected_result: list[str],
+    exception: Exception,
+) -> None:
+    with exception:
+        result = wrap_text(text=text, max_line_length=max_line_length)
+        assert result == expected_result
\ No newline at end of file

From 13ab215fdfda808985e0c6d83a814f9d790126a4 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 6 Aug 2025 10:05:41 +0000
Subject: [PATCH 041/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/annotators/utils.py |  2 +-
 test/annotators/test_utils.py   | 82 +++++++--------------------------
 2 files changed, 17 insertions(+), 67 deletions(-)

diff --git a/supervision/annotators/utils.py b/supervision/annotators/utils.py
index f3f38fc341..42a436c9bb 100644
--- a/supervision/annotators/utils.py
+++ b/supervision/annotators/utils.py
@@ -175,7 +175,7 @@ def wrap_text(text: Any, max_line_length=None) -> list[str]:
         return text.splitlines() or [""]
 
     if max_line_length <= 0:
-        raise ValueError(f"max_line_length must be a positive integer")
+        raise ValueError("max_line_length must be a positive integer")
 
     paragraphs = text.split("\n")
     all_lines: list[str] = []
diff --git a/test/annotators/test_utils.py b/test/annotators/test_utils.py
index 9b19874a3d..89ae81bb7d 100644
--- a/test/annotators/test_utils.py
+++ b/test/annotators/test_utils.py
@@ -113,72 +113,29 @@ def test_resolve_color_idx(
 @pytest.mark.parametrize(
     "text, max_line_length, expected_result, exception",
     [
+        (None, None, [""], DoesNotRaise()),  # text is None
+        ("", None, [""], DoesNotRaise()),  # empty string
+        ("   \t  ", 3, [""], DoesNotRaise()),  # whitespace-only (spaces + tab)
+        (12345, None, ["12345"], DoesNotRaise()),  # plain integer
+        (-6789, None, ["-6789"], DoesNotRaise()),  # negative integer
+        (np.int64(1000), None, ["1000"], DoesNotRaise()),  # NumPy int64
+        ([1, 2, 3], None, ["[1, 2, 3]"], DoesNotRaise()),  # list to string
         (
-            None,
-            None,
-            [""],
-            DoesNotRaise()
-        ),  # text is None
-        (
-            "",
-            None,
-            [""],
-            DoesNotRaise()
-        ),  # empty string
-        (
-            "   \t  ",
-            3,
-            [""],
-            DoesNotRaise()
-        ),  # whitespace-only (spaces + tab)
-
-        (
-            12345,
-            None,
-            ["12345"],
-            DoesNotRaise()
-        ),  # plain integer
-        (
-            -6789,
-            None,
-            ["-6789"],
-            DoesNotRaise()
-        ),  # negative integer
-        (
-            np.int64(1000),
-            None,
-            ["1000"],
-            DoesNotRaise()
-        ),  # NumPy int64
-        (
-            [1, 2, 3],
-            None,
-            ["[1, 2, 3]"],
-            DoesNotRaise()
-        ),  # list to string
-
-        (
-            "When you play the game of thrones, you win or you die.\nFear cuts deeper than swords.\nA mind needs books as a sword needs a whetstone.", # noqa: E501
+            "When you play the game of thrones, you win or you die.\nFear cuts deeper than swords.\nA mind needs books as a sword needs a whetstone.",  # noqa: E501
             None,
             [
                 "When you play the game of thrones, you win or you die.",
                 "Fear cuts deeper than swords.",
                 "A mind needs books as a sword needs a whetstone.",
             ],
-            DoesNotRaise()
+            DoesNotRaise(),
         ),  # Game-of-Thrones quotes, multiline
-        (
-            "\n",
-            None,
-            [""],
-            DoesNotRaise()
-        ),  # single newline
-
+        ("\n", None, [""], DoesNotRaise()),  # single newline
         (
             "valarmorghulisvalardoharis",
             6,
             ["valarm", "orghul", "isvala", "rdohar", "is"],
-            DoesNotRaise()
+            DoesNotRaise(),
         ),  # long Valyrian phrase, wrapped
         (
             "Winter is coming\nFire and blood",
@@ -189,28 +146,21 @@ def test_resolve_color_idx(
                 "Fire and",
                 "blood",
             ],
-            DoesNotRaise()
+            DoesNotRaise(),
         ),  # mix of short/long with newline
-
         (
             "What is dead may never die",
             0,
             None,
-            pytest.raises(ValueError)
+            pytest.raises(ValueError),
         ),  # width 0 – invalid
         (
             "A Lannister always pays his debts",
             -1,
             None,
-            pytest.raises(ValueError)
+            pytest.raises(ValueError),
         ),  # width -1 – invalid
-
-        (
-            None,
-            10,
-            [""],
-            DoesNotRaise()
-        ),  # text None, width set
+        (None, 10, [""], DoesNotRaise()),  # text None, width set
     ],
 )
 def test_wrap_text(
@@ -221,4 +171,4 @@ def test_wrap_text(
 ) -> None:
     with exception:
         result = wrap_text(text=text, max_line_length=max_line_length)
-        assert result == expected_result
\ No newline at end of file
+        assert result == expected_result

From aab3160cd73b42107dcd12a3219238f4a105f0c0 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 6 Aug 2025 12:17:04 +0200
Subject: [PATCH 042/124] make ruff happy

---
 test/annotators/test_utils.py | 53 +++++++++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/test/annotators/test_utils.py b/test/annotators/test_utils.py
index 89ae81bb7d..32e3ec2a2e 100644
--- a/test/annotators/test_utils.py
+++ b/test/annotators/test_utils.py
@@ -113,13 +113,48 @@ def test_resolve_color_idx(
 @pytest.mark.parametrize(
     "text, max_line_length, expected_result, exception",
     [
-        (None, None, [""], DoesNotRaise()),  # text is None
-        ("", None, [""], DoesNotRaise()),  # empty string
-        ("   \t  ", 3, [""], DoesNotRaise()),  # whitespace-only (spaces + tab)
-        (12345, None, ["12345"], DoesNotRaise()),  # plain integer
-        (-6789, None, ["-6789"], DoesNotRaise()),  # negative integer
-        (np.int64(1000), None, ["1000"], DoesNotRaise()),  # NumPy int64
-        ([1, 2, 3], None, ["[1, 2, 3]"], DoesNotRaise()),  # list to string
+        (
+            None,
+            None,
+            [""],
+            DoesNotRaise()
+        ),  # text is None
+        (
+            "",
+            None,
+            [""],
+            DoesNotRaise()
+        ),  # empty string
+        (
+            "   \t  ",
+            3,
+            [""],
+            DoesNotRaise()
+        ),  # whitespace-only (spaces + tab)
+        (
+            12345,
+            None,
+            ["12345"],
+            DoesNotRaise()
+        ),  # plain integer
+        (
+            -6789,
+            None,
+            ["-6789"],
+            DoesNotRaise()
+        ),  # negative integer
+        (
+            np.int64(1000),
+            None,
+            ["1000"],
+            DoesNotRaise()
+        ),  # NumPy int64
+        (
+            [1, 2, 3],
+            None,
+            ["[1, 2, 3]"],
+            DoesNotRaise()
+        ),  # list to string
         (
             "When you play the game of thrones, you win or you die.\nFear cuts deeper than swords.\nA mind needs books as a sword needs a whetstone.",  # noqa: E501
             None,
@@ -153,13 +188,13 @@ def test_resolve_color_idx(
             0,
             None,
             pytest.raises(ValueError),
-        ),  # width 0 – invalid
+        ),  # width 0 - invalid
         (
             "A Lannister always pays his debts",
             -1,
             None,
             pytest.raises(ValueError),
-        ),  # width -1 – invalid
+        ),  # width -1 - invalid
         (None, 10, [""], DoesNotRaise()),  # text None, width set
     ],
 )

From 46e59656809964479d83dd35b917754aa84395e0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 6 Aug 2025 10:17:30 +0000
Subject: [PATCH 043/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/annotators/test_utils.py | 49 +++++------------------------------
 1 file changed, 7 insertions(+), 42 deletions(-)

diff --git a/test/annotators/test_utils.py b/test/annotators/test_utils.py
index 32e3ec2a2e..3ab0f9b902 100644
--- a/test/annotators/test_utils.py
+++ b/test/annotators/test_utils.py
@@ -113,48 +113,13 @@ def test_resolve_color_idx(
 @pytest.mark.parametrize(
     "text, max_line_length, expected_result, exception",
     [
-        (
-            None,
-            None,
-            [""],
-            DoesNotRaise()
-        ),  # text is None
-        (
-            "",
-            None,
-            [""],
-            DoesNotRaise()
-        ),  # empty string
-        (
-            "   \t  ",
-            3,
-            [""],
-            DoesNotRaise()
-        ),  # whitespace-only (spaces + tab)
-        (
-            12345,
-            None,
-            ["12345"],
-            DoesNotRaise()
-        ),  # plain integer
-        (
-            -6789,
-            None,
-            ["-6789"],
-            DoesNotRaise()
-        ),  # negative integer
-        (
-            np.int64(1000),
-            None,
-            ["1000"],
-            DoesNotRaise()
-        ),  # NumPy int64
-        (
-            [1, 2, 3],
-            None,
-            ["[1, 2, 3]"],
-            DoesNotRaise()
-        ),  # list to string
+        (None, None, [""], DoesNotRaise()),  # text is None
+        ("", None, [""], DoesNotRaise()),  # empty string
+        ("   \t  ", 3, [""], DoesNotRaise()),  # whitespace-only (spaces + tab)
+        (12345, None, ["12345"], DoesNotRaise()),  # plain integer
+        (-6789, None, ["-6789"], DoesNotRaise()),  # negative integer
+        (np.int64(1000), None, ["1000"], DoesNotRaise()),  # NumPy int64
+        ([1, 2, 3], None, ["[1, 2, 3]"], DoesNotRaise()),  # list to string
         (
             "When you play the game of thrones, you win or you die.\nFear cuts deeper than swords.\nA mind needs books as a sword needs a whetstone.",  # noqa: E501
             None,

From 005cdbd37abec947301ed005d34d07a4c97f80bc Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 6 Aug 2025 12:27:16 +0200
Subject: [PATCH 044/124] bump version from `0.27.0rc0` to `0.27.0rc1`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 46f7631edc..9bf3b24aa4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.27.0rc0"
+version = "0.27.0rc1"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [

From 83d490dadcdda50d29248f7e82d28dc4683efaa0 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 6 Aug 2025 23:19:21 +0200
Subject: [PATCH 045/124] initial support for `pycon` docs examples

---
 docs/stylesheets/code_select.css              |  3 ++
 ...{cookbooks-card.css => cookbooks_card.css} |  0
 mkdocs.yml                                    |  6 ++-
 supervision/draw/utils.py                     | 42 ++++++++++++++-----
 4 files changed, 39 insertions(+), 12 deletions(-)
 create mode 100644 docs/stylesheets/code_select.css
 rename docs/stylesheets/{cookbooks-card.css => cookbooks_card.css} (100%)

diff --git a/docs/stylesheets/code_select.css b/docs/stylesheets/code_select.css
new file mode 100644
index 0000000000..dce599a2cf
--- /dev/null
+++ b/docs/stylesheets/code_select.css
@@ -0,0 +1,3 @@
+.language-pycon .gp, .language-pycon .go {
+    user-select: none;
+}
\ No newline at end of file
diff --git a/docs/stylesheets/cookbooks-card.css b/docs/stylesheets/cookbooks_card.css
similarity index 100%
rename from docs/stylesheets/cookbooks-card.css
rename to docs/stylesheets/cookbooks_card.css
diff --git a/mkdocs.yml b/mkdocs.yml
index 394d5ddd3b..612f8901ce 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -26,7 +26,8 @@ extra:
 
 extra_css:
   - stylesheets/extra.css
-  - stylesheets/cookbooks-card.css
+  - stylesheets/cookbooks_card.css
+  - stylesheets/code_select.css
 
 nav:
   - Home: index.md
@@ -169,6 +170,7 @@ markdown_extensions:
   - pymdownx.snippets:
       check_paths: true
   - pymdownx.highlight:
+      use_pygments: true
       anchor_linenums: true
       line_spans: __span
       pygments_lang_class: true
@@ -190,4 +192,4 @@ validation:
   nav:
     absolute_links: ignore
   links:
-    absolute_links: ignore
+    absolute_links: ignore
\ No newline at end of file
diff --git a/supervision/draw/utils.py b/supervision/draw/utils.py
index ed4a903746..536e493fb2 100644
--- a/supervision/draw/utils.py
+++ b/supervision/draw/utils.py
@@ -346,28 +346,50 @@ def draw_image(
 
 def calculate_optimal_text_scale(resolution_wh: tuple[int, int]) -> float:
     """
-    Calculate font scale based on the resolution of an image.
+    Calculate optimal font scale based on image resolution.
 
-    Parameters:
-        resolution_wh (Tuple[int, int]): A tuple representing the width and height
-            of the image.
+    Adjusts font scale proportionally to the smallest dimension of the given image
+    resolution for consistent readability.
+
+    Args:
+        resolution_wh (tuple[int, int]): (width, height) of the image in pixels
 
     Returns:
-         float: The calculated font scale factor.
+        float: recommended font scale factor
+
+    Examples:
+        ```pycon
+        >>> from supervision import calculate_optimal_text_scale
+        >>> calculate_optimal_text_scale((1920, 1080))
+        1.08
+        >>> calculate_optimal_text_scale((640, 480))
+        0.48
+        ```
     """
     return min(resolution_wh) * 1e-3
 
 
 def calculate_optimal_line_thickness(resolution_wh: tuple[int, int]) -> int:
     """
-    Calculate line thickness based on the resolution of an image.
+    Calculate optimal line thickness based on image resolution.
 
-    Parameters:
-        resolution_wh (Tuple[int, int]): A tuple representing the width and height
-            of the image.
+    Adjusts the line thickness for readability depending on the smallest dimension
+    of the provided image resolution.
+
+    Args:
+        resolution_wh (tuple[int, int]): (width, height) of the image in pixels
 
     Returns:
-        int: The calculated line thickness in pixels.
+        int: recommended line thickness in pixels
+
+    Examples:
+        ```pycon
+        >>> from supervision import calculate_optimal_line_thickness
+        >>> calculate_optimal_line_thickness((1920, 1080))
+        4
+        >>> calculate_optimal_line_thickness((640, 480))
+        2
+        ```
     """
     if min(resolution_wh) < 1080:
         return 2

From 5c6745291b401983d846596fbed84829ef0c6b94 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 7 Aug 2025 07:41:52 +0200
Subject: [PATCH 046/124] after few tests drop support for `pycon` in docs
 examples

---
 docs/stylesheets/code_select.css |  3 ---
 mkdocs.yml                       |  2 --
 supervision/draw/utils.py        | 24 ++++++++++++------------
 3 files changed, 12 insertions(+), 17 deletions(-)
 delete mode 100644 docs/stylesheets/code_select.css

diff --git a/docs/stylesheets/code_select.css b/docs/stylesheets/code_select.css
deleted file mode 100644
index dce599a2cf..0000000000
--- a/docs/stylesheets/code_select.css
+++ /dev/null
@@ -1,3 +0,0 @@
-.language-pycon .gp, .language-pycon .go {
-    user-select: none;
-}
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 612f8901ce..131b7aadae 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -27,7 +27,6 @@ extra:
 extra_css:
   - stylesheets/extra.css
   - stylesheets/cookbooks_card.css
-  - stylesheets/code_select.css
 
 nav:
   - Home: index.md
@@ -170,7 +169,6 @@ markdown_extensions:
   - pymdownx.snippets:
       check_paths: true
   - pymdownx.highlight:
-      use_pygments: true
       anchor_linenums: true
       line_spans: __span
       pygments_lang_class: true
diff --git a/supervision/draw/utils.py b/supervision/draw/utils.py
index 536e493fb2..d72ac2c4e9 100644
--- a/supervision/draw/utils.py
+++ b/supervision/draw/utils.py
@@ -358,12 +358,12 @@ def calculate_optimal_text_scale(resolution_wh: tuple[int, int]) -> float:
         float: recommended font scale factor
 
     Examples:
-        ```pycon
-        >>> from supervision import calculate_optimal_text_scale
-        >>> calculate_optimal_text_scale((1920, 1080))
-        1.08
-        >>> calculate_optimal_text_scale((640, 480))
-        0.48
+        ```python
+        from supervision import calculate_optimal_text_scale
+        calculate_optimal_text_scale((1920, 1080))
+        # 1.08
+        calculate_optimal_text_scale((640, 480))
+        # 0.48
         ```
     """
     return min(resolution_wh) * 1e-3
@@ -383,12 +383,12 @@ def calculate_optimal_line_thickness(resolution_wh: tuple[int, int]) -> int:
         int: recommended line thickness in pixels
 
     Examples:
-        ```pycon
-        >>> from supervision import calculate_optimal_line_thickness
-        >>> calculate_optimal_line_thickness((1920, 1080))
-        4
-        >>> calculate_optimal_line_thickness((640, 480))
-        2
+        ```python
+        from supervision import calculate_optimal_line_thickness
+        calculate_optimal_line_thickness((1920, 1080))
+        # 4
+        calculate_optimal_line_thickness((640, 480))
+        # 2
         ```
     """
     if min(resolution_wh) < 1080:

From 4b7b347aeabf45ea1508e0ecfa6fbf1b9a9eb50e Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 7 Aug 2025 07:45:55 +0200
Subject: [PATCH 047/124] drop create tiles from public API

---
 docs/how_to/process_datasets.md |   5 +-
 supervision/__init__.py         |   2 -
 supervision/utils/image.py      | 352 --------------------------------
 test/utils/test_image.py        | 148 +-------------
 4 files changed, 2 insertions(+), 505 deletions(-)

diff --git a/docs/how_to/process_datasets.md b/docs/how_to/process_datasets.md
index 36c122df41..acfd941c47 100644
--- a/docs/how_to/process_datasets.md
+++ b/docs/how_to/process_datasets.md
@@ -331,12 +331,9 @@ for i in range(16):
     annotated_image = label_annotator.annotate(annotated_image, annotations, labels)
     annotated_images.append(annotated_image)
 
-grid = sv.create_tiles(
+sv.plot_images_grid(
     annotated_images,
     grid_size=(4, 4),
-    single_tile_size=(400, 400),
-    tile_padding_color=sv.Color.WHITE,
-    tile_margin_color=sv.Color.WHITE
 )
 ```
 
diff --git a/supervision/__init__.py b/supervision/__init__.py
index ab45651ac9..0b3df6c033 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -120,7 +120,6 @@
 from supervision.utils.file import list_files_with_extensions
 from supervision.utils.image import (
     ImageSink,
-    create_tiles,
     crop_image,
     letterbox_image,
     overlay_image,
@@ -206,7 +205,6 @@
     "clip_boxes",
     "contains_holes",
     "contains_multiple_segments",
-    "create_tiles",
     "crop_image",
     "cv2_to_pillow",
     "draw_filled_polygon",
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 6960986768..5b51b628bc 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -435,355 +435,3 @@ def save_image(self, image: np.ndarray, image_name: str | None = None):
 
     def __exit__(self, exc_type, exc_value, exc_traceback):
         pass
-
-
-def create_tiles(
-    images: list[ImageType],
-    grid_size: tuple[int | None, int | None] | None = None,
-    single_tile_size: tuple[int, int] | None = None,
-    tile_scaling: Literal["min", "max", "avg"] = "avg",
-    tile_padding_color: tuple[int, int, int] | Color = Color.from_hex("#D9D9D9"),
-    tile_margin: int = 10,
-    tile_margin_color: tuple[int, int, int] | Color = Color.from_hex("#BFBEBD"),
-    return_type: Literal["auto", "cv2", "pillow"] = "auto",
-    titles: list[str | None] | None = None,
-    titles_anchors: Point | list[Point | None] | None = None,
-    titles_color: tuple[int, int, int] | Color = Color.from_hex("#262523"),
-    titles_scale: float | None = None,
-    titles_thickness: int = 1,
-    titles_padding: int = 10,
-    titles_text_font: int = cv2.FONT_HERSHEY_SIMPLEX,
-    titles_background_color: tuple[int, int, int] | Color = Color.from_hex("#D9D9D9"),
-    default_title_placement: RelativePosition = "top",
-) -> ImageType:
-    """
-    Creates tiles mosaic from input images, automating grid placement and
-    converting images to common resolution maintaining aspect ratio. It is
-    also possible to render text titles on tiles, using optional set of
-    parameters specifying text drawing (see parameters description).
-
-    Automated grid placement will try to maintain square shape of grid
-    (with size being the nearest integer square root of #images), up to two exceptions:
-    * if there are up to 3 images - images will be displayed in single row
-    * if square-grid placement causes last row to be empty - number of rows is trimmed
-        until last row has at least one image
-
-    Args:
-        images (List[ImageType]): Images to create tiles. Elements can be either
-            np.ndarray or PIL.Image, common representation will be agreed by the
-            function.
-        grid_size (Optional[Tuple[Optional[int], Optional[int]]]): Expected grid
-            size in format (n_rows, n_cols). If not given - automated grid placement
-            will be applied. One may also provide only one out of two elements of the
-            tuple - then grid will be created with either n_rows or n_cols fixed,
-            leaving the other dimension to be adjusted by the number of images
-        single_tile_size (Optional[Tuple[int, int]]): sizeof a single tile element
-            provided in (width, height) format. If not given - size of tile will be
-            automatically calculated based on `tile_scaling` parameter.
-        tile_scaling (Literal["min", "max", "avg"]): If `single_tile_size` is not
-            given - parameter will be used to calculate tile size - using
-            min / max / avg size of image provided in `images` list.
-        tile_padding_color (Union[Tuple[int, int, int], sv.Color]): Color to be used in
-            images letterbox procedure (while standardising tiles sizes) as a padding.
-            If tuple provided - should be BGR.
-        tile_margin (int): size of margin between tiles (in pixels)
-        tile_margin_color (Union[Tuple[int, int, int], sv.Color]): Color of tile margin.
-            If tuple provided - should be BGR.
-        return_type (Literal["auto", "cv2", "pillow"]): Parameter dictates the format of
-            return image. One may choose specific type ("cv2" or "pillow") to enforce
-            conversion. "auto" mode takes a majority vote between types of elements in
-            `images` list - resolving draws in favour of OpenCV format. "auto" can be
-            safely used when all input images are of the same type.
-        titles (Optional[List[Optional[str]]]): Optional titles to be added to tiles.
-            Elements of that list may be empty - then specific tile (in order presented
-            in `images` parameter) will not be filled with title. It is possible to
-            provide list of titles shorter than `images` - then remaining titles will
-            be assumed empty.
-        titles_anchors (Optional[Union[Point, List[Optional[Point]]]]): Parameter to
-            specify anchor points for titles. It is possible to specify anchor either
-            globally or for specific tiles (following order of `images`).
-            If not given (either globally, or for specific element of the list),
-            it will be calculated automatically based on `default_title_placement`.
-        titles_color (Union[Tuple[int, int, int], Color]): Color of titles text.
-            If tuple provided - should be BGR.
-        titles_scale (Optional[float]): Scale of titles. If not provided - value will
-            be calculated using `calculate_optimal_text_scale(...)`.
-        titles_thickness (int): Thickness of titles text.
-        titles_padding (int): Size of titles padding.
-        titles_text_font (int): Font to be used to render titles. Must be integer
-            constant representing OpenCV font.
-            (See docs: https://docs.opencv.org/4.x/d6/d6e/group__imgproc__draw.html)
-        titles_background_color (Union[Tuple[int, int, int], Color]): Color of title
-            text padding.
-        default_title_placement (Literal["top", "bottom"]): Parameter specifies title
-            anchor placement in case if explicit anchor is not provided.
-
-    Returns:
-        ImageType: Image with all input images located in tails grid. The output type is
-            determined by `return_type` parameter.
-
-    Raises:
-        ValueError: In case when input images list is empty, provided `grid_size` is too
-            small to fit all images, `tile_scaling` mode is invalid.
-    """
-    if len(images) == 0:
-        raise ValueError("Could not create image tiles from empty list of images.")
-    if return_type == "auto":
-        return_type = _negotiate_tiles_format(images=images)
-    tile_padding_color = unify_to_bgr(color=tile_padding_color)
-    tile_margin_color = unify_to_bgr(color=tile_margin_color)
-    images = images_to_cv2(images=images)
-    if single_tile_size is None:
-        single_tile_size = _aggregate_images_shape(images=images, mode=tile_scaling)
-    resized_images = [
-        letterbox_image(
-            image=i, resolution_wh=single_tile_size, color=tile_padding_color
-        )
-        for i in images
-    ]
-    grid_size = _establish_grid_size(images=images, grid_size=grid_size)
-    if len(images) > grid_size[0] * grid_size[1]:
-        raise ValueError(
-            f"Could not place {len(images)} in grid with size: {grid_size}."
-        )
-    if titles is not None:
-        titles = fill(sequence=titles, desired_size=len(images), content=None)
-    titles_anchors = (
-        [titles_anchors]
-        if not issubclass(type(titles_anchors), list)
-        else titles_anchors
-    )
-    titles_anchors = fill(
-        sequence=titles_anchors, desired_size=len(images), content=None
-    )
-    titles_color = unify_to_bgr(color=titles_color)
-    titles_background_color = unify_to_bgr(color=titles_background_color)
-    tiles = _generate_tiles(
-        images=resized_images,
-        grid_size=grid_size,
-        single_tile_size=single_tile_size,
-        tile_padding_color=tile_padding_color,
-        tile_margin=tile_margin,
-        tile_margin_color=tile_margin_color,
-        titles=titles,
-        titles_anchors=titles_anchors,
-        titles_color=titles_color,
-        titles_scale=titles_scale,
-        titles_thickness=titles_thickness,
-        titles_padding=titles_padding,
-        titles_text_font=titles_text_font,
-        titles_background_color=titles_background_color,
-        default_title_placement=default_title_placement,
-    )
-    if return_type == "pillow":
-        tiles = cv2_to_pillow(image=tiles)
-    return tiles
-
-
-def _negotiate_tiles_format(images: list[ImageType]) -> Literal["cv2", "pillow"]:
-    number_of_np_arrays = sum(issubclass(type(i), np.ndarray) for i in images)
-    if number_of_np_arrays >= (len(images) // 2):
-        return "cv2"
-    return "pillow"
-
-
-def _calculate_aggregated_images_shape(
-    images: list[np.ndarray], aggregator: Callable[[list[int]], float]
-) -> tuple[int, int]:
-    height = round(aggregator([i.shape[0] for i in images]))
-    width = round(aggregator([i.shape[1] for i in images]))
-    return width, height
-
-
-SHAPE_AGGREGATION_FUN = {
-    "min": partial(_calculate_aggregated_images_shape, aggregator=np.min),
-    "max": partial(_calculate_aggregated_images_shape, aggregator=np.max),
-    "avg": partial(_calculate_aggregated_images_shape, aggregator=np.average),
-}
-
-
-def _aggregate_images_shape(
-    images: list[np.ndarray], mode: Literal["min", "max", "avg"]
-) -> tuple[int, int]:
-    if mode not in SHAPE_AGGREGATION_FUN:
-        raise ValueError(
-            f"Could not aggregate images shape - provided unknown mode: {mode}. "
-            f"Supported modes: {list(SHAPE_AGGREGATION_FUN.keys())}."
-        )
-    return SHAPE_AGGREGATION_FUN[mode](images)
-
-
-def _establish_grid_size(
-    images: list[np.ndarray], grid_size: tuple[int | None, int | None] | None
-) -> tuple[int, int]:
-    if grid_size is None or all(e is None for e in grid_size):
-        return _negotiate_grid_size(images=images)
-    if grid_size[0] is None:
-        return math.ceil(len(images) / grid_size[1]), grid_size[1]
-    if grid_size[1] is None:
-        return grid_size[0], math.ceil(len(images) / grid_size[0])
-    return grid_size
-
-
-def _negotiate_grid_size(images: list[np.ndarray]) -> tuple[int, int]:
-    if len(images) <= MAX_COLUMNS_FOR_SINGLE_ROW_GRID:
-        return 1, len(images)
-    nearest_sqrt = math.ceil(np.sqrt(len(images)))
-    proposed_columns = nearest_sqrt
-    proposed_rows = nearest_sqrt
-    while proposed_columns * (proposed_rows - 1) >= len(images):
-        proposed_rows -= 1
-    return proposed_rows, proposed_columns
-
-
-def _generate_tiles(
-    images: list[np.ndarray],
-    grid_size: tuple[int, int],
-    single_tile_size: tuple[int, int],
-    tile_padding_color: tuple[int, int, int],
-    tile_margin: int,
-    tile_margin_color: tuple[int, int, int],
-    titles: list[str | None] | None,
-    titles_anchors: list[Point | None],
-    titles_color: tuple[int, int, int],
-    titles_scale: float | None,
-    titles_thickness: int,
-    titles_padding: int,
-    titles_text_font: int,
-    titles_background_color: tuple[int, int, int],
-    default_title_placement: RelativePosition,
-) -> np.ndarray:
-    images = _draw_texts(
-        images=images,
-        titles=titles,
-        titles_anchors=titles_anchors,
-        titles_color=titles_color,
-        titles_scale=titles_scale,
-        titles_thickness=titles_thickness,
-        titles_padding=titles_padding,
-        titles_text_font=titles_text_font,
-        titles_background_color=titles_background_color,
-        default_title_placement=default_title_placement,
-    )
-    rows, columns = grid_size
-    tiles_elements = list(create_batches(sequence=images, batch_size=columns))
-    while len(tiles_elements[-1]) < columns:
-        tiles_elements[-1].append(
-            _generate_color_image(shape=single_tile_size, color=tile_padding_color)
-        )
-    while len(tiles_elements) < rows:
-        tiles_elements.append(
-            [_generate_color_image(shape=single_tile_size, color=tile_padding_color)]
-            * columns
-        )
-    return _merge_tiles_elements(
-        tiles_elements=tiles_elements,
-        grid_size=grid_size,
-        single_tile_size=single_tile_size,
-        tile_margin=tile_margin,
-        tile_margin_color=tile_margin_color,
-    )
-
-
-def _draw_texts(
-    images: list[np.ndarray],
-    titles: list[str | None] | None,
-    titles_anchors: list[Point | None],
-    titles_color: tuple[int, int, int],
-    titles_scale: float | None,
-    titles_thickness: int,
-    titles_padding: int,
-    titles_text_font: int,
-    titles_background_color: tuple[int, int, int],
-    default_title_placement: RelativePosition,
-) -> list[np.ndarray]:
-    if titles is None:
-        return images
-    titles_anchors = _prepare_default_titles_anchors(
-        images=images,
-        titles_anchors=titles_anchors,
-        default_title_placement=default_title_placement,
-    )
-    if titles_scale is None:
-        image_height, image_width = images[0].shape[:2]
-        titles_scale = calculate_optimal_text_scale(
-            resolution_wh=(image_width, image_height)
-        )
-    result = []
-    for image, text, anchor in zip(images, titles, titles_anchors):
-        if text is None:
-            result.append(image)
-            continue
-        processed_image = draw_text(
-            scene=image,
-            text=text,
-            text_anchor=anchor,
-            text_color=Color.from_bgr_tuple(titles_color),
-            text_scale=titles_scale,
-            text_thickness=titles_thickness,
-            text_padding=titles_padding,
-            text_font=titles_text_font,
-            background_color=Color.from_bgr_tuple(titles_background_color),
-        )
-        result.append(processed_image)
-    return result
-
-
-def _prepare_default_titles_anchors(
-    images: list[np.ndarray],
-    titles_anchors: list[Point | None],
-    default_title_placement: RelativePosition,
-) -> list[Point]:
-    result = []
-    for image, anchor in zip(images, titles_anchors):
-        if anchor is not None:
-            result.append(anchor)
-            continue
-        image_height, image_width = image.shape[:2]
-        if default_title_placement == "top":
-            default_anchor = Point(x=image_width / 2, y=image_height * 0.1)
-        else:
-            default_anchor = Point(x=image_width / 2, y=image_height * 0.9)
-        result.append(default_anchor)
-    return result
-
-
-def _merge_tiles_elements(
-    tiles_elements: list[list[np.ndarray]],
-    grid_size: tuple[int, int],
-    single_tile_size: tuple[int, int],
-    tile_margin: int,
-    tile_margin_color: tuple[int, int, int],
-) -> np.ndarray:
-    vertical_padding = (
-        np.ones((single_tile_size[1], tile_margin, 3)) * tile_margin_color
-    )
-    merged_rows = [
-        np.concatenate(
-            list(
-                itertools.chain.from_iterable(
-                    zip(row, [vertical_padding] * grid_size[1])
-                )
-            )[:-1],
-            axis=1,
-        )
-        for row in tiles_elements
-    ]
-    row_width = merged_rows[0].shape[1]
-    horizontal_padding = (
-        np.ones((tile_margin, row_width, 3), dtype=np.uint8) * tile_margin_color
-    )
-    rows_with_paddings = []
-    for row in merged_rows:
-        rows_with_paddings.append(row)
-        rows_with_paddings.append(horizontal_padding)
-    return np.concatenate(
-        rows_with_paddings[:-1],
-        axis=0,
-    ).astype(np.uint8)
-
-
-def _generate_color_image(
-    shape: tuple[int, int], color: tuple[int, int, int]
-) -> np.ndarray:
-    return np.ones((*shape[::-1], 3), dtype=np.uint8) * color
diff --git a/test/utils/test_image.py b/test/utils/test_image.py
index 39640330e7..6ae9567b99 100644
--- a/test/utils/test_image.py
+++ b/test/utils/test_image.py
@@ -1,9 +1,7 @@
 import numpy as np
-import pytest
 from PIL import Image, ImageChops
 
-from supervision import Color, Point
-from supervision.utils.image import create_tiles, letterbox_image, resize_image
+from supervision.utils.image import letterbox_image, resize_image
 
 
 def test_resize_image_for_opencv_image() -> None:
@@ -96,147 +94,3 @@ def test_letterbox_image_for_pillow_image() -> None:
     assert difference.getbbox() is None, (
         "Expected padding to be added top and bottom with padding added top and bottom"
     )
-
-
-def test_create_tiles_with_one_image(
-    one_image: np.ndarray, single_image_tile: np.ndarray
-) -> None:
-    # when
-    result = create_tiles(images=[one_image], single_tile_size=(240, 240))
-
-    # # then
-    assert np.allclose(result, single_image_tile, atol=5.0)
-
-
-def test_create_tiles_with_one_image_and_enforced_grid(
-    one_image: np.ndarray, single_image_tile_enforced_grid: np.ndarray
-) -> None:
-    # when
-    result = create_tiles(
-        images=[one_image],
-        grid_size=(None, 3),
-        single_tile_size=(240, 240),
-    )
-
-    # then
-    assert np.allclose(result, single_image_tile_enforced_grid, atol=5.0)
-
-
-def test_create_tiles_with_two_images(
-    two_images: list[np.ndarray], two_images_tile: np.ndarray
-) -> None:
-    # when
-    result = create_tiles(images=two_images, single_tile_size=(240, 240))
-
-    # then
-    assert np.allclose(result, two_images_tile, atol=5.0)
-
-
-def test_create_tiles_with_three_images(
-    three_images: list[np.ndarray], three_images_tile: np.ndarray
-) -> None:
-    # when
-    result = create_tiles(images=three_images, single_tile_size=(240, 240))
-
-    # then
-    assert np.allclose(result, three_images_tile, atol=5.0)
-
-
-def test_create_tiles_with_four_images(
-    four_images: list[np.ndarray],
-    four_images_tile: np.ndarray,
-) -> None:
-    # when
-    result = create_tiles(images=four_images, single_tile_size=(240, 240))
-
-    # then
-    assert np.allclose(result, four_images_tile, atol=5.0)
-
-
-def test_create_tiles_with_all_images(
-    all_images: list[np.ndarray],
-    all_images_tile: np.ndarray,
-) -> None:
-    # when
-    result = create_tiles(images=all_images, single_tile_size=(240, 240))
-
-    # then
-    assert np.allclose(result, all_images_tile, atol=5.0)
-
-
-def test_create_tiles_with_all_images_and_custom_grid(
-    all_images: list[np.ndarray], all_images_tile_and_custom_grid: np.ndarray
-) -> None:
-    # when
-    result = create_tiles(
-        images=all_images,
-        grid_size=(3, 3),
-        single_tile_size=(240, 240),
-    )
-
-    # then
-    assert np.allclose(result, all_images_tile_and_custom_grid, atol=5.0)
-
-
-def test_create_tiles_with_all_images_and_custom_colors(
-    all_images: list[np.ndarray], all_images_tile_and_custom_colors: np.ndarray
-) -> None:
-    # when
-    result = create_tiles(
-        images=all_images,
-        tile_margin_color=(127, 127, 127),
-        tile_padding_color=(224, 224, 224),
-        single_tile_size=(240, 240),
-    )
-
-    # then
-    assert np.allclose(result, all_images_tile_and_custom_colors, atol=5.0)
-
-
-def test_create_tiles_with_all_images_and_titles(
-    all_images: list[np.ndarray],
-    all_images_tile_and_custom_colors_and_titles: np.ndarray,
-) -> None:
-    # when
-    result = create_tiles(
-        images=all_images,
-        titles=["Image 1", None, "Image 3", "Image 4"],
-        single_tile_size=(240, 240),
-    )
-
-    # then
-    assert np.allclose(result, all_images_tile_and_custom_colors_and_titles, atol=5.0)
-
-
-def test_create_tiles_with_all_images_and_titles_with_custom_configs(
-    all_images: list[np.ndarray],
-    all_images_tile_and_titles_with_custom_configs: np.ndarray,
-) -> None:
-    # when
-    result = create_tiles(
-        images=all_images,
-        titles=["Image 1", None, "Image 3", "Image 4"],
-        single_tile_size=(240, 240),
-        titles_anchors=[
-            Point(x=200, y=300),
-            Point(x=300, y=400),
-            None,
-            Point(x=300, y=400),
-        ],
-        titles_color=Color.RED,
-        titles_scale=1.5,
-        titles_thickness=3,
-        titles_padding=20,
-        titles_background_color=Color.BLACK,
-        default_title_placement="bottom",
-    )
-
-    # then
-    assert np.allclose(result, all_images_tile_and_titles_with_custom_configs, atol=5.0)
-
-
-def test_create_tiles_with_all_images_and_custom_grid_to_small_to_fit_images(
-    all_images: list[np.ndarray],
-) -> None:
-    with pytest.raises(ValueError):
-        _ = create_tiles(images=all_images, grid_size=(2, 2))

From 8fc22f0281e855d9a0611609918c17f250d6974a Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 7 Aug 2025 07:54:50 +0200
Subject: [PATCH 048/124] move `ImageType` to a more suitable location

---
 supervision/annotators/base.py       | 14 +-------------
 supervision/annotators/core.py       |  3 ++-
 supervision/draw/base.py             | 13 +++++++++++++
 supervision/key_points/annotators.py |  2 +-
 supervision/utils/conversion.py      |  2 +-
 supervision/utils/notebook.py        |  2 +-
 6 files changed, 19 insertions(+), 17 deletions(-)
 create mode 100644 supervision/draw/base.py

diff --git a/supervision/annotators/base.py b/supervision/annotators/base.py
index 159ad5567e..9b4bbcbe27 100644
--- a/supervision/annotators/base.py
+++ b/supervision/annotators/base.py
@@ -1,19 +1,7 @@
 from abc import ABC, abstractmethod
-from typing import TypeVar
-
-import numpy as np
-from PIL import Image
 
 from supervision.detection.core import Detections
-
-ImageType = TypeVar("ImageType", np.ndarray, Image.Image)
-"""
-An image of type `np.ndarray` or `PIL.Image.Image`.
-
-Unlike a `Union`, ensures the type remains consistent. If a function
-takes an `ImageType` argument and returns an `ImageType`, when you
-pass an `np.ndarray`, you will get an `np.ndarray` back.
-"""
+from supervision.draw.base import ImageType
 
 
 class BaseAnnotator(ABC):
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index f951e68177..780d175438 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -9,7 +9,8 @@
 from PIL import Image, ImageDraw, ImageFont
 from scipy.interpolate import splev, splprep
 
-from supervision.annotators.base import BaseAnnotator, ImageType
+from supervision.draw.base import ImageType
+from supervision.annotators.base import BaseAnnotator
 from supervision.annotators.utils import (
     PENDING_TRACK_ID,
     ColorLookup,
diff --git a/supervision/draw/base.py b/supervision/draw/base.py
new file mode 100644
index 0000000000..e27c1d3c6b
--- /dev/null
+++ b/supervision/draw/base.py
@@ -0,0 +1,13 @@
+from typing import TypeVar
+
+import numpy as np
+from PIL import Image
+
+ImageType = TypeVar("ImageType", np.ndarray, Image.Image)
+"""
+An image of type `np.ndarray` or `PIL.Image.Image`.
+
+Unlike a `Union`, ensures the type remains consistent. If a function
+takes an `ImageType` argument and returns an `ImageType`, when you
+pass an `np.ndarray`, you will get an `np.ndarray` back.
+"""
diff --git a/supervision/key_points/annotators.py b/supervision/key_points/annotators.py
index ab9f04d171..7cc752ebe1 100644
--- a/supervision/key_points/annotators.py
+++ b/supervision/key_points/annotators.py
@@ -6,7 +6,7 @@
 import cv2
 import numpy as np
 
-from supervision.annotators.base import ImageType
+from supervision.draw.base import ImageType
 from supervision.detection.utils.boxes import pad_boxes, spread_out_boxes
 from supervision.draw.color import Color
 from supervision.draw.utils import draw_rounded_rectangle
diff --git a/supervision/utils/conversion.py b/supervision/utils/conversion.py
index 79ec500300..30a9465e26 100644
--- a/supervision/utils/conversion.py
+++ b/supervision/utils/conversion.py
@@ -4,7 +4,7 @@
 import numpy as np
 from PIL import Image
 
-from supervision.annotators.base import ImageType
+from supervision.draw.base import ImageType
 
 
 def ensure_cv2_image_for_annotation(annotate_func):
diff --git a/supervision/utils/notebook.py b/supervision/utils/notebook.py
index 9262f12bc4..3af09ebbec 100644
--- a/supervision/utils/notebook.py
+++ b/supervision/utils/notebook.py
@@ -4,7 +4,7 @@
 import matplotlib.pyplot as plt
 from PIL import Image
 
-from supervision.annotators.base import ImageType
+from supervision.draw.base import ImageType
 from supervision.utils.conversion import pillow_to_cv2
 
 

From cbb58a8c6fff7ca4ea8f88d9a5e8425397656071 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 7 Aug 2025 10:01:50 +0200
Subject: [PATCH 049/124] `tint_image` and `grayscale_image` functions added

---
 docs/utils/image.md                  |  12 +++
 supervision/__init__.py              |   4 +
 supervision/annotators/core.py       |  50 ++++++------
 supervision/draw/utils.py            |  28 +++----
 supervision/key_points/annotators.py |   6 +-
 supervision/utils/conversion.py      |   6 +-
 supervision/utils/image.py           | 110 +++++++++++++++++++++++----
 test/utils/test_conversion.py        |   6 +-
 8 files changed, 160 insertions(+), 62 deletions(-)

diff --git a/docs/utils/image.md b/docs/utils/image.md
index 8e39136a8b..b1daa9c20b 100644
--- a/docs/utils/image.md
+++ b/docs/utils/image.md
@@ -34,6 +34,18 @@ comments: true
 
 :::supervision.utils.image.overlay_image
 
+<div class="md-typeset">
+    <h2><a href="#supervision.utils.image.tint_image">tint_image</a></h2>
+</div>
+
+:::supervision.utils.image.tint_image
+
+<div class="md-typeset">
+    <h2><a href="#supervision.utils.image.grayscale_image">grayscale_image</a></h2>
+</div>
+
+:::supervision.utils.image.grayscale_image
+
 <div class="md-typeset">
     <h2><a href="#supervision.utils.image.ImageSink">ImageSink</a></h2>
 </div>
diff --git a/supervision/__init__.py b/supervision/__init__.py
index 0b3df6c033..9f16f0bba8 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -125,6 +125,8 @@
     overlay_image,
     resize_image,
     scale_image,
+    tint_image,
+    grayscale_image
 )
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
@@ -248,4 +250,6 @@
     "xyxy_to_polygons",
     "xyxy_to_xcycarh",
     "xyxy_to_xywh",
+    "tint_image",
+    "grayscale_image"
 ]
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 780d175438..555363077a 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -34,8 +34,8 @@
 from supervision.draw.utils import draw_polygon, draw_rounded_rectangle, draw_text
 from supervision.geometry.core import Point, Position, Rect
 from supervision.utils.conversion import (
-    ensure_cv2_image_for_annotation,
-    ensure_pil_image_for_annotation,
+    ensure_cv2_image_for_class_method,
+    ensure_pil_image_for_class_method,
 )
 from supervision.utils.image import (
     crop_image,
@@ -178,7 +178,7 @@ def __init__(
         self.thickness: int = thickness
         self.color_lookup: ColorLookup = color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -261,7 +261,7 @@ def __init__(
         self.thickness: int = thickness
         self.color_lookup: ColorLookup = color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -350,7 +350,7 @@ def __init__(
         self.opacity = opacity
         self.color_lookup: ColorLookup = color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -440,7 +440,7 @@ def __init__(
         self.thickness: int = thickness
         self.color_lookup: ColorLookup = color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -527,7 +527,7 @@ def __init__(
         self.color_lookup: ColorLookup = color_lookup
         self.opacity = opacity
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -623,7 +623,7 @@ def __init__(
         self.color_lookup: ColorLookup = color_lookup
         self.kernel_size: int = kernel_size
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -723,7 +723,7 @@ def __init__(
         self.end_angle: int = end_angle
         self.color_lookup: ColorLookup = color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -815,7 +815,7 @@ def __init__(
         self.corner_length: int = corner_length
         self.color_lookup: ColorLookup = color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -904,7 +904,7 @@ def __init__(
         self.thickness: int = thickness
         self.color_lookup: ColorLookup = color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -1003,7 +1003,7 @@ def __init__(
         self.outline_thickness = outline_thickness
         self.outline_color: Color | ColorPalette = outline_color
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -1129,7 +1129,7 @@ def __init__(
             max_line_length=max_line_length,
         )
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -1439,7 +1439,7 @@ def __init__(
             max_line_length=max_line_length,
         )
 
-    @ensure_pil_image_for_annotation
+    @ensure_pil_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -1666,7 +1666,7 @@ def __init__(
         self.position = icon_position
         self.offset_xy = offset_xy
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self, scene: ImageType, detections: Detections, icon_path: str | list[str]
     ) -> ImageType:
@@ -1755,7 +1755,7 @@ def __init__(self, kernel_size: int = 15):
         """
         self.kernel_size: int = kernel_size
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -1844,7 +1844,7 @@ def __init__(
         self.smooth = smooth
         self.color_lookup: ColorLookup = color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -1966,7 +1966,7 @@ def __init__(
         self.low_hue = low_hue
         self.heat_mask: npt.NDArray[np.float32] | None = None
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(self, scene: ImageType, detections: Detections) -> ImageType:
         """
         Annotates the scene with a heatmap based on the provided detections.
@@ -2048,7 +2048,7 @@ def __init__(self, pixel_size: int = 20):
         """
         self.pixel_size: int = pixel_size
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -2145,7 +2145,7 @@ def __init__(
         self.outline_thickness: int = outline_thickness
         self.outline_color: Color | ColorPalette = outline_color
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -2257,7 +2257,7 @@ def __init__(
             raise ValueError("roundness attribute must be float between (0, 1.0]")
         self.roundness: float = roundness
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -2397,7 +2397,7 @@ def __init__(
             else int(0.15 * self.height)
         )
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -2578,7 +2578,7 @@ def __init__(
         self.border_thickness: int = border_thickness
         self.border_color_lookup: ColorLookup = border_color_lookup
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self,
         scene: ImageType,
@@ -2727,7 +2727,7 @@ def __init__(
         self.opacity = opacity
         self.force_box = force_box
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(self, scene: ImageType, detections: Detections) -> ImageType:
         """
         Applies a colored overlay to the scene outside of the detected regions.
@@ -2825,7 +2825,7 @@ def __init__(
         self.label_scale = label_scale
         self.text_thickness = int(self.label_scale + 1.2)
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(
         self, scene: ImageType, detections_1: Detections, detections_2: Detections
     ) -> ImageType:
diff --git a/supervision/draw/utils.py b/supervision/draw/utils.py
index d72ac2c4e9..0d9ffe1241 100644
--- a/supervision/draw/utils.py
+++ b/supervision/draw/utils.py
@@ -346,10 +346,9 @@ def draw_image(
 
 def calculate_optimal_text_scale(resolution_wh: tuple[int, int]) -> float:
     """
-    Calculate optimal font scale based on image resolution.
-
-    Adjusts font scale proportionally to the smallest dimension of the given image
-    resolution for consistent readability.
+    Calculate optimal font scale based on image resolution. Adjusts font scale
+    proportionally to the smallest dimension of the given image resolution for
+    consistent readability.
 
     Args:
         resolution_wh (tuple[int, int]): (width, height) of the image in pixels
@@ -359,10 +358,11 @@ def calculate_optimal_text_scale(resolution_wh: tuple[int, int]) -> float:
 
     Examples:
         ```python
-        from supervision import calculate_optimal_text_scale
-        calculate_optimal_text_scale((1920, 1080))
+        import supervision as sv
+
+        sv.calculate_optimal_text_scale((1920, 1080))
         # 1.08
-        calculate_optimal_text_scale((640, 480))
+        sv.calculate_optimal_text_scale((640, 480))
         # 0.48
         ```
     """
@@ -371,10 +371,9 @@ def calculate_optimal_text_scale(resolution_wh: tuple[int, int]) -> float:
 
 def calculate_optimal_line_thickness(resolution_wh: tuple[int, int]) -> int:
     """
-    Calculate optimal line thickness based on image resolution.
-
-    Adjusts the line thickness for readability depending on the smallest dimension
-    of the provided image resolution.
+    Calculate optimal line thickness based on image resolution. Adjusts the line
+    thickness for readability depending on the smallest dimension of the provided
+    image resolution.
 
     Args:
         resolution_wh (tuple[int, int]): (width, height) of the image in pixels
@@ -384,10 +383,11 @@ def calculate_optimal_line_thickness(resolution_wh: tuple[int, int]) -> int:
 
     Examples:
         ```python
-        from supervision import calculate_optimal_line_thickness
-        calculate_optimal_line_thickness((1920, 1080))
+        import supervision as sv
+
+        sv.calculate_optimal_line_thickness((1920, 1080))
         # 4
-        calculate_optimal_line_thickness((640, 480))
+        sv.calculate_optimal_line_thickness((640, 480))
         # 2
         ```
     """
diff --git a/supervision/key_points/annotators.py b/supervision/key_points/annotators.py
index 7cc752ebe1..5b649ab427 100644
--- a/supervision/key_points/annotators.py
+++ b/supervision/key_points/annotators.py
@@ -13,7 +13,7 @@
 from supervision.geometry.core import Rect
 from supervision.key_points.core import KeyPoints
 from supervision.key_points.skeletons import SKELETONS_BY_VERTEX_COUNT
-from supervision.utils.conversion import ensure_cv2_image_for_annotation
+from supervision.utils.conversion import ensure_cv2_image_for_class_method
 
 
 class BaseKeyPointAnnotator(ABC):
@@ -43,7 +43,7 @@ def __init__(
         self.color = color
         self.radius = radius
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:
         """
         Annotates the given scene with skeleton vertices based on the provided key
@@ -120,7 +120,7 @@ def __init__(
         self.thickness = thickness
         self.edges = edges
 
-    @ensure_cv2_image_for_annotation
+    @ensure_cv2_image_for_class_method
     def annotate(self, scene: ImageType, key_points: KeyPoints) -> ImageType:
         """
         Annotates the given scene by drawing lines between specified key points to form
diff --git a/supervision/utils/conversion.py b/supervision/utils/conversion.py
index 30a9465e26..b1c8f16bfd 100644
--- a/supervision/utils/conversion.py
+++ b/supervision/utils/conversion.py
@@ -7,7 +7,7 @@
 from supervision.draw.base import ImageType
 
 
-def ensure_cv2_image_for_annotation(annotate_func):
+def ensure_cv2_image_for_class_method(annotate_func):
     """
     Decorates `BaseAnnotator.annotate` implementations, converts scene to
     an image type used internally by the annotators, converts back when annotation
@@ -32,7 +32,7 @@ def wrapper(self, scene: ImageType, *args, **kwargs):
     return wrapper
 
 
-def ensure_cv2_image_for_processing(image_processing_fun):
+def ensure_cv2_image_for_standalone_function(image_processing_fun):
     """
     Decorates image processing functions that accept np.ndarray, converting `image` to
     np.ndarray, converts back when processing is complete.
@@ -55,7 +55,7 @@ def wrapper(image: ImageType, *args, **kwargs):
     return wrapper
 
 
-def ensure_pil_image_for_annotation(annotate_func):
+def ensure_pil_image_for_class_method(annotate_func):
     """
     Decorates image processing functions that accept np.ndarray, converting `image` to
     PIL image, converts back when processing is complete.
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 5b51b628bc..f8de981845 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -1,11 +1,7 @@
 from __future__ import annotations
 
-import itertools
-import math
 import os
 import shutil
-from collections.abc import Callable
-from functools import partial
 from typing import Literal
 
 import cv2
@@ -14,21 +10,16 @@
 
 from supervision.annotators.base import ImageType
 from supervision.draw.color import Color, unify_to_bgr
-from supervision.draw.utils import calculate_optimal_text_scale, draw_text
-from supervision.geometry.core import Point
 from supervision.utils.conversion import (
-    cv2_to_pillow,
-    ensure_cv2_image_for_processing,
-    images_to_cv2,
+    ensure_cv2_image_for_standalone_function,
 )
-from supervision.utils.iterables import create_batches, fill
 
 RelativePosition = Literal["top", "bottom"]
 
 MAX_COLUMNS_FOR_SINGLE_ROW_GRID = 3
 
 
-@ensure_cv2_image_for_processing
+@ensure_cv2_image_for_standalone_function
 def crop_image(
     image: ImageType,
     xyxy: npt.NDArray[int] | list[int] | tuple[int, int, int, int],
@@ -89,7 +80,7 @@ def crop_image(
     return image[y_min:y_max, x_min:x_max]
 
 
-@ensure_cv2_image_for_processing
+@ensure_cv2_image_for_standalone_function
 def scale_image(image: ImageType, scale_factor: float) -> ImageType:
     """
     Scales the given image based on the given scale factor.
@@ -146,7 +137,7 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType:
     return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)
 
 
-@ensure_cv2_image_for_processing
+@ensure_cv2_image_for_standalone_function
 def resize_image(
     image: ImageType,
     resolution_wh: tuple[int, int],
@@ -219,7 +210,7 @@ def resize_image(
     return cv2.resize(image, (width_new, height_new), interpolation=cv2.INTER_LINEAR)
 
 
-@ensure_cv2_image_for_processing
+@ensure_cv2_image_for_standalone_function
 def letterbox_image(
     image: ImageType,
     resolution_wh: tuple[int, int],
@@ -371,6 +362,97 @@ def overlay_image(
     return image
 
 
+@ensure_cv2_image_for_standalone_function
+def tint_image(
+    scene: ImageType,
+    color: Color = Color.BLACK,
+    opacity: float = 0.5,
+) -> ImageType:
+    """
+    Blend a solid-color overlay onto an image. Create a tinted effect by blending a
+    uniform color overlay with the input image at a specified opacity.
+
+    Args:
+        scene (ImageType): input image to be tinted (`numpy.ndarray` or `PIL.Image.Image`)
+        color (Color): overlay tint color
+        opacity (float): blend ratio between overlay and image (0.0–1.0, inclusive)
+
+    Returns:
+        ImageType: tinted image in the same format as the input
+
+    Raises:
+        ValueError: if opacity is outside the range [0.0, 1.0]
+
+    Examples:
+        ```python
+        import cv2
+        import supervision as sv
+
+        image = cv2.imread("source.jpg")
+        tinted = sv.tint_image(scene=image, color=sv.Color.BLACK, opacity=0.5)
+        cv2.imwrite("result.jpg", tinted)
+        ```
+
+        ```python
+        from PIL import Image
+        import supervision as sv
+
+        image = Image.open("source.jpg")
+        tinted = sv.tint_image(scene=image, color=Color.BLACK, opacity=0.5)
+        tinted.save("result.jpg")
+        ```
+    """  # noqa: E501 // docs
+    if not 0.0 <= opacity <= 1.0:
+        raise ValueError("opacity must be between 0.0 and 1.0")
+
+    overlay = np.full_like(scene, fill_value=color.as_bgr(), dtype=scene.dtype)
+    cv2.addWeighted(
+        src1=overlay,
+        alpha=opacity,
+        src2=scene,
+        beta=1 - opacity,
+        gamma=0,
+        dst=scene
+    )
+    return scene
+
+
+@ensure_cv2_image_for_standalone_function
+def grayscale_image(scene: ImageType) -> ImageType:
+    """
+    Convert an RGB or BGR image to 3-channel grayscale. The luminance channel is
+    broadcast to all three channels, ensuring compatibility with color-based drawing
+    helpers that expect 3-channel input.
+
+    Args:
+        scene (ImageType): input image to be converted (`numpy.ndarray` or `PIL.Image.Image`)
+
+    Returns:
+        ImageType: 3-channel grayscale version in the same format as input
+
+    Examples:
+        ```python
+        import cv2
+        import supervision as sv
+
+        image = cv2.imread("source.jpg")
+        grayscaled = sv.grayscale_image(scene=image)
+        cv2.imwrite("result.jpg", grayscaled)
+        ```
+
+        ```python
+        from PIL import Image
+        import supervision as sv
+
+        image = Image.open("source.jpg")
+        grayscaled = sv.grayscale_image(scene=image)
+        grayscaled.save("result.jpg")
+        ```
+    """  # noqa: E501 // docs
+    grayscaled = cv2.cvtColor(scene, cv2.COLOR_BGR2GRAY)
+    return cv2.cvtColor(grayscaled, cv2.COLOR_GRAY2BGR)
+
+
 class ImageSink:
     def __init__(
         self,
diff --git a/test/utils/test_conversion.py b/test/utils/test_conversion.py
index 65cbd8a1ca..e9fabb0d81 100644
--- a/test/utils/test_conversion.py
+++ b/test/utils/test_conversion.py
@@ -3,7 +3,7 @@
 
 from supervision.utils.conversion import (
     cv2_to_pillow,
-    ensure_cv2_image_for_processing,
+    ensure_cv2_image_for_standalone_function,
     images_to_cv2,
     pillow_to_cv2,
 )
@@ -16,7 +16,7 @@ def test_ensure_cv2_image_for_processing_when_pillow_image_submitted(
     param_a_value = 3
     param_b_value = "some"
 
-    @ensure_cv2_image_for_processing
+    @ensure_cv2_image_for_standalone_function
     def my_custom_processing_function(
         image: np.ndarray,
         param_a: int,
@@ -55,7 +55,7 @@ def test_ensure_cv2_image_for_processing_when_cv2_image_submitted(
     param_a_value = 3
     param_b_value = "some"
 
-    @ensure_cv2_image_for_processing
+    @ensure_cv2_image_for_standalone_function
     def my_custom_processing_function(
         image: np.ndarray,
         param_a: int,

From a933b93234e0bbd6d9571bb6f0e1ff3146c7b6b7 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 7 Aug 2025 11:51:59 +0200
Subject: [PATCH 050/124] update image utils docs; deprecate `overlay_image`
 image

---
 docs/utils/image.md        |   6 -
 supervision/utils/image.py | 295 +++++++++++++++++++++----------------
 2 files changed, 169 insertions(+), 132 deletions(-)

diff --git a/docs/utils/image.md b/docs/utils/image.md
index b1daa9c20b..a24d19dd29 100644
--- a/docs/utils/image.md
+++ b/docs/utils/image.md
@@ -28,12 +28,6 @@ comments: true
 
 :::supervision.utils.image.letterbox_image
 
-<div class="md-typeset">
-    <h2><a href="#supervision.utils.image.overlay_image">overlay_image</a></h2>
-</div>
-
-:::supervision.utils.image.overlay_image
-
 <div class="md-typeset">
     <h2><a href="#supervision.utils.image.tint_image">tint_image</a></h2>
 </div>
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index f8de981845..96a13f2035 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -2,7 +2,6 @@
 
 import os
 import shutil
-from typing import Literal
 
 import cv2
 import numpy as np
@@ -13,10 +12,7 @@
 from supervision.utils.conversion import (
     ensure_cv2_image_for_standalone_function,
 )
-
-RelativePosition = Literal["top", "bottom"]
-
-MAX_COLUMNS_FOR_SINGLE_ROW_GRID = 3
+from supervision.utils.internal import deprecated
 
 
 @ensure_cv2_image_for_standalone_function
@@ -25,54 +21,48 @@ def crop_image(
     xyxy: npt.NDArray[int] | list[int] | tuple[int, int, int, int],
 ) -> ImageType:
     """
-    Crops the given image based on the given bounding box.
+    Crop image based on bounding box coordinates.
 
     Args:
-        image (ImageType): The image to be cropped. `ImageType` is a flexible type,
-            accepting either `numpy.ndarray` or `PIL.Image.Image`.
-        xyxy (Union[np.ndarray, List[int], Tuple[int, int, int, int]]): A bounding box
-            coordinates in the format `(x_min, y_min, x_max, y_max)`, accepted as either
-            a `numpy.ndarray`, a `list`, or a `tuple`.
+        image (`numpy.ndarray` or `PIL.Image.Image`): The image to crop.
+        xyxy (`numpy.array`, `list[int]`, or `tuple[int, int, int, int]`):
+            Bounding box coordinates in `(x_min, y_min, x_max, y_max)` format.
 
     Returns:
-        (ImageType): The cropped image. The type is determined by the input type and
-            may be either a `numpy.ndarray` or `PIL.Image.Image`.
-
-    === "OpenCV"
+        (`numpy.ndarray` or `PIL.Image.Image`): Cropped image matching input
+            type.
 
+    Examples:
         ```python
         import cv2
         import supervision as sv
 
-        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image = cv2.imread("source.png")
         image.shape
         # (1080, 1920, 3)
 
-        xyxy = [200, 400, 600, 800]
+        xyxy = (200, 400, 600, 800)
         cropped_image = sv.crop_image(image=image, xyxy=xyxy)
         cropped_image.shape
         # (400, 400, 3)
         ```
 
-    === "Pillow"
-
         ```python
         from PIL import Image
         import supervision as sv
 
-        image = Image.open(<SOURCE_IMAGE_PATH>)
+        image = Image.open("source.png")
         image.size
         # (1920, 1080)
 
-        xyxy = [200, 400, 600, 800]
+        xyxy = (200, 400, 600, 800)
         cropped_image = sv.crop_image(image=image, xyxy=xyxy)
         cropped_image.size
         # (400, 400)
         ```
-
+        
     ![crop_image](https://media.roboflow.com/supervision-docs/crop-image.png){ align=center width="800" }
     """  # noqa E501 // docs
-
     if isinstance(xyxy, (list, tuple)):
         xyxy = np.array(xyxy)
     xyxy = np.round(xyxy).astype(int)
@@ -83,28 +73,25 @@ def crop_image(
 @ensure_cv2_image_for_standalone_function
 def scale_image(image: ImageType, scale_factor: float) -> ImageType:
     """
-    Scales the given image based on the given scale factor.
+    Scale image by given factor. Scale factor > 1.0 zooms in, < 1.0 zooms out.
 
     Args:
-        image (ImageType): The image to be scaled. `ImageType` is a flexible type,
-            accepting either `numpy.ndarray` or `PIL.Image.Image`.
-        scale_factor (float): The factor by which the image will be scaled. Scale
-            factor > `1.0` zooms in, < `1.0` zooms out.
+        image (`numpy.ndarray` or `PIL.Image.Image`): The image to scale.
+        scale_factor (`float`): Factor by which to scale the image.
 
     Returns:
-        (ImageType): The scaled image. The type is determined by the input type and
-            may be either a `numpy.ndarray` or `PIL.Image.Image`.
+        (`numpy.ndarray` or `PIL.Image.Image`): Scaled image matching input
+            type.
 
     Raises:
-        ValueError: If the scale factor is non-positive.
-
-    === "OpenCV"
+        ValueError: If scale factor is non-positive.
 
+    Examples:
         ```python
         import cv2
         import supervision as sv
 
-        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image = cv2.imread("source.png")
         image.shape
         # (1080, 1920, 3)
 
@@ -113,13 +100,11 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType:
         # (540, 960, 3)
         ```
 
-    === "Pillow"
-
         ```python
         from PIL import Image
         import supervision as sv
 
-        image = Image.open(<SOURCE_IMAGE_PATH>)
+        image = Image.open("source.png")
         image.size
         # (1920, 1080)
 
@@ -144,28 +129,24 @@ def resize_image(
     keep_aspect_ratio: bool = False,
 ) -> ImageType:
     """
-    Resizes the given image to a specified resolution. Can maintain the original aspect
-    ratio or resize directly to the desired dimensions.
+    Resize image to specified resolution. Can optionally maintain aspect ratio.
 
     Args:
-        image (ImageType): The image to be resized. `ImageType` is a flexible type,
-            accepting either `numpy.ndarray` or `PIL.Image.Image`.
-        resolution_wh (Tuple[int, int]): The target resolution as
-            `(width, height)`.
-        keep_aspect_ratio (bool): Flag to maintain the image's original
-            aspect ratio. Defaults to `False`.
+        image (`numpy.ndarray` or `PIL.Image.Image`): The image to resize.
+        resolution_wh (`tuple[int, int]`): Target resolution as `(width, height)`.
+        keep_aspect_ratio (`bool`): Flag to maintain original aspect ratio.
+            Defaults to `False`.
 
     Returns:
-        (ImageType): The resized image. The type is determined by the input type and
-            may be either a `numpy.ndarray` or `PIL.Image.Image`.
-
-    === "OpenCV"
+        (`numpy.ndarray` or `PIL.Image.Image`): Resized image matching input
+            type.
 
+    Examples:
         ```python
         import cv2
         import supervision as sv
 
-        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image = cv2.imread("source.png")
         image.shape
         # (1080, 1920, 3)
 
@@ -176,13 +157,11 @@ def resize_image(
         # (562, 1000, 3)
         ```
 
-    === "Pillow"
-
         ```python
         from PIL import Image
         import supervision as sv
 
-        image = Image.open(<SOURCE_IMAGE_PATH>)
+        image = Image.open("source.png")
         image.size
         # (1920, 1080)
 
@@ -192,7 +171,7 @@ def resize_image(
         resized_image.size
         # (1000, 562)
         ```
-
+        
     ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     if keep_aspect_ratio:
@@ -217,51 +196,50 @@ def letterbox_image(
     color: tuple[int, int, int] | Color = Color.BLACK,
 ) -> ImageType:
     """
-    Resizes and pads an image to a specified resolution with a given color, maintaining
-    the original aspect ratio.
+    Resize image and pad with color to achieve desired resolution while
+    maintaining aspect ratio.
 
     Args:
-        image (ImageType): The image to be resized. `ImageType` is a flexible type,
-            accepting either `numpy.ndarray` or `PIL.Image.Image`.
-        resolution_wh (Tuple[int, int]): The target resolution as
-            `(width, height)`.
-        color (Union[Tuple[int, int, int], Color]): The color to pad with. If tuple
-            provided it should be in BGR format.
+        image (`numpy.ndarray` or `PIL.Image.Image`): The image to resize and pad.
+        resolution_wh (`tuple[int, int]`): Target resolution as `(width, height)`.
+        color (`tuple[int, int, int]` or `Color`): Padding color. If tuple, should
+            be in BGR format. Defaults to `Color.BLACK`.
 
     Returns:
-        (ImageType): The resized image. The type is determined by the input type and
-            may be either a `numpy.ndarray` or `PIL.Image.Image`.
-
-    === "OpenCV"
+        (`numpy.ndarray` or `PIL.Image.Image`): Letterboxed image matching input
+            type.
 
+    Examples:
         ```python
         import cv2
         import supervision as sv
 
-        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image = cv2.imread("source.png")
         image.shape
         # (1080, 1920, 3)
 
-        letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))
+        letterboxed_image = sv.letterbox_image(
+            image=image, resolution_wh=(1000, 1000)
+        )
         letterboxed_image.shape
         # (1000, 1000, 3)
         ```
 
-    === "Pillow"
-
         ```python
         from PIL import Image
         import supervision as sv
 
-        image = Image.open(<SOURCE_IMAGE_PATH>)
+        image = Image.open("source.png")
         image.size
         # (1920, 1080)
 
-        letterboxed_image = sv.letterbox_image(image=image, resolution_wh=(1000, 1000))
+        letterboxed_image = sv.letterbox_image(
+            image=image, resolution_wh=(1000, 1000)
+        )
         letterboxed_image.size
         # (1000, 1000)
         ```
-
+        
     ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     assert isinstance(image, np.ndarray)
@@ -293,37 +271,59 @@ def letterbox_image(
     return image_with_borders
 
 
+@deprecated(
+    "`overlay_image` function is deprecated and will be removed in "
+    "`supervision-0.32.0`. Use `draw_image` instead."
+)
 def overlay_image(
     image: npt.NDArray[np.uint8],
     overlay: npt.NDArray[np.uint8],
     anchor: tuple[int, int],
 ) -> npt.NDArray[np.uint8]:
     """
-    Places an image onto a scene at a given anchor point, handling cases where
-    the image's position is partially or completely outside the scene's bounds.
+    Overlay image onto scene at specified anchor point. Handles cases where
+    overlay position is partially or completely outside scene bounds.
 
     Args:
-        image (np.ndarray): The background scene onto which the image is placed.
-        overlay (np.ndarray): The image to be placed onto the scene.
-        anchor (Tuple[int, int]): The `(x, y)` coordinates in the scene where the
-            top-left corner of the image will be placed.
+        image (`numpy.array`): Background scene with shape `(height, width, 3)`.
+        overlay (`numpy.array`): Image to overlay with shape
+            `(height, width, 3)` or `(height, width, 4)`.
+        anchor (`tuple[int, int]`): Coordinates `(x, y)` where top-left corner
+            of overlay will be placed.
 
     Returns:
-        (np.ndarray): The result image with overlay.
+        (`numpy.array`): Scene with overlay applied, shape `(height, width, 3)`.
 
     Examples:
-        ```python
+        ```
         import cv2
         import numpy as np
         import supervision as sv
 
-        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        image = cv2.imread("source.png")
         overlay = np.zeros((400, 400, 3), dtype=np.uint8)
-        result_image = sv.overlay_image(image=image, overlay=overlay, anchor=(200, 400))
+        overlay[:] = (0, 255, 0)  # Green overlay
+
+        result_image = sv.overlay_image(
+            image=image, overlay=overlay, anchor=(200, 400)
+        )
+        cv2.imwrite("target.png", result_image)
         ```
 
-    ![overlay_image](https://media.roboflow.com/supervision-docs/overlay-image.png){ align=center width="800" }
-    """  # noqa E501 // docs
+        ```
+        import cv2
+        import numpy as np
+        import supervision as sv
+
+        image = cv2.imread("source.png")
+        overlay = cv2.imread("overlay.png", cv2.IMREAD_UNCHANGED)
+
+        result_image = sv.overlay_image(
+            image=image, overlay=overlay, anchor=(100, 100)
+        )
+        cv2.imwrite("target.png", result_image)
+        ```
+    """
     scene_height, scene_width = image.shape[:2]
     image_height, image_width = overlay.shape[:2]
     anchor_x, anchor_y = anchor
@@ -369,39 +369,44 @@ def tint_image(
     opacity: float = 0.5,
 ) -> ImageType:
     """
-    Blend a solid-color overlay onto an image. Create a tinted effect by blending a
-    uniform color overlay with the input image at a specified opacity.
+    Tint image with solid color overlay at specified opacity.
 
     Args:
-        scene (ImageType): input image to be tinted (`numpy.ndarray` or `PIL.Image.Image`)
-        color (Color): overlay tint color
-        opacity (float): blend ratio between overlay and image (0.0–1.0, inclusive)
+        scene (`numpy.ndarray` or `PIL.Image.Image`): The image to tint.
+        color (`Color`): Overlay tint color. Defaults to `Color.BLACK`.
+        opacity (`float`): Blend ratio between overlay and image (0.0-1.0).
+            Defaults to `0.5`.
 
     Returns:
-        ImageType: tinted image in the same format as the input
+        (`numpy.ndarray` or `PIL.Image.Image`): Tinted image matching input
+            type.
 
     Raises:
-        ValueError: if opacity is outside the range [0.0, 1.0]
+        ValueError: If opacity is outside range [0.0, 1.0].
 
     Examples:
         ```python
         import cv2
         import supervision as sv
 
-        image = cv2.imread("source.jpg")
-        tinted = sv.tint_image(scene=image, color=sv.Color.BLACK, opacity=0.5)
-        cv2.imwrite("result.jpg", tinted)
+        image = cv2.imread("source.png")
+        tinted_image = sv.tint_image(
+            scene=image, color=sv.Color.BLACK, opacity=0.5
+        )
+        cv2.imwrite("target.png", tinted_image)
         ```
 
         ```python
         from PIL import Image
         import supervision as sv
 
-        image = Image.open("source.jpg")
-        tinted = sv.tint_image(scene=image, color=Color.BLACK, opacity=0.5)
-        tinted.save("result.jpg")
+        image = Image.open("source.png")
+        tinted_image = sv.tint_image(
+            scene=image, color=sv.Color.BLACK, opacity=0.5
+        )
+        tinted_image.save("target.png")
         ```
-    """  # noqa: E501 // docs
+    """
     if not 0.0 <= opacity <= 1.0:
         raise ValueError("opacity must be between 0.0 and 1.0")
 
@@ -420,35 +425,36 @@ def tint_image(
 @ensure_cv2_image_for_standalone_function
 def grayscale_image(scene: ImageType) -> ImageType:
     """
-    Convert an RGB or BGR image to 3-channel grayscale. The luminance channel is
-    broadcast to all three channels, ensuring compatibility with color-based drawing
-    helpers that expect 3-channel input.
+    Convert image to 3-channel grayscale. Luminance channel is broadcast to
+    all three channels for compatibility with color-based drawing helpers.
 
     Args:
-        scene (ImageType): input image to be converted (`numpy.ndarray` or `PIL.Image.Image`)
+        scene (`numpy.ndarray` or `PIL.Image.Image`): The image to convert to
+            grayscale.
 
     Returns:
-        ImageType: 3-channel grayscale version in the same format as input
+        (`numpy.ndarray` or `PIL.Image.Image`): 3-channel grayscale image
+            matching input type.
 
     Examples:
         ```python
         import cv2
         import supervision as sv
 
-        image = cv2.imread("source.jpg")
-        grayscaled = sv.grayscale_image(scene=image)
-        cv2.imwrite("result.jpg", grayscaled)
+        image = cv2.imread("source.png")
+        grayscale_image = sv.grayscale_image(scene=image)
+        cv2.imwrite("target.png", grayscale_image)
         ```
 
         ```python
         from PIL import Image
         import supervision as sv
 
-        image = Image.open("source.jpg")
-        grayscaled = sv.grayscale_image(scene=image)
-        grayscaled.save("result.jpg")
+        image = Image.open("source.png")
+        grayscale_image = sv.grayscale_image(scene=image)
+        grayscale_image.save("target.png")
         ```
-    """  # noqa: E501 // docs
+    """
     grayscaled = cv2.cvtColor(scene, cv2.COLOR_BGR2GRAY)
     return cv2.cvtColor(grayscaled, cv2.COLOR_GRAY2BGR)
 
@@ -461,27 +467,64 @@ def __init__(
         image_name_pattern: str = "image_{:05d}.png",
     ):
         """
-        Initialize a context manager for saving images.
+        Initialize context manager for saving images to directory.
 
         Args:
-            target_dir_path (str): The target directory where images will be saved.
-            overwrite (bool): Whether to overwrite the existing directory.
-                Defaults to False.
-            image_name_pattern (str): The image file name pattern.
-                Defaults to "image_{:05d}.png".
+            target_dir_path (`str`): Target directory path where images will be
+                saved.
+            overwrite (`bool`): Whether to overwrite existing directory.
+                Defaults to `False`.
+            image_name_pattern (`str`): File name pattern for saved images.
+                Defaults to `"image_{:05d}.png"`.
 
         Examples:
             ```python
             import supervision as sv
 
-            frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>, stride=2)
+            frames_generator = sv.get_video_frames_generator(
+                "source.mp4", stride=2
+            )
 
-            with sv.ImageSink(target_dir_path=<TARGET_CROPS_DIRECTORY>) as sink:
+            with sv.ImageSink(target_dir_path="output_frames") as sink:
                 for image in frames_generator:
                     sink.save_image(image=image)
+
+            # Directory structure:
+            # output_frames/
+            # ├── image_00000.png
+            # ├── image_00001.png
+            # ├── image_00002.png
+            # └── image_00003.png
             ```
-        """  # noqa E501 // docs
 
+            ```python
+            import cv2
+            import supervision as sv
+
+            image = cv2.imread("source.png")
+            crop_boxes = [
+                (  0,   0, 400, 400),
+                (400,   0, 800, 400),
+                (  0, 400, 400, 800),
+                (400, 400, 800, 800)
+            ]
+
+            with sv.ImageSink(
+                target_dir_path="image_crops",
+                overwrite=True
+            ) as sink:
+                for i, xyxy in enumerate(crop_boxes):
+                    crop = sv.crop_image(image=image, xyxy=xyxy)
+                    sink.save_image(image=crop, image_name=f"crop_{i}.png")
+
+            # Directory structure:
+            # image_crops/
+            # ├── crop_0.png
+            # ├── crop_1.png
+            # ├── crop_2.png
+            # └── crop_3.png
+            ```
+        """
         self.target_dir_path = target_dir_path
         self.overwrite = overwrite
         self.image_name_pattern = image_name_pattern
@@ -499,14 +542,14 @@ def __enter__(self):
 
     def save_image(self, image: np.ndarray, image_name: str | None = None):
         """
-        Save a given image in the target directory.
+        Save image to target directory with optional custom filename.
 
         Args:
-            image (np.ndarray): The image to be saved. The image must be in BGR color
-                format.
-            image_name (Optional[str]): The name to use for the saved image.
-                If not provided, a name will be
-                generated using the `image_name_pattern`.
+            image (`numpy.array`): Image to save with shape `(height, width, 3)`
+                in BGR format.
+            image_name (`str` or `None`): Custom filename for saved image. If
+                `None`, generates name using `image_name_pattern`. Defaults to
+                `None`.
         """
         if image_name is None:
             image_name = self.image_name_pattern.format(self.image_count)

From 4ad36db3bb0f0ae3884b339e65ca7bd85fc4ece2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 Aug 2025 09:53:16 +0000
Subject: [PATCH 051/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 mkdocs.yml                           |  2 +-
 supervision/__init__.py              |  6 +++---
 supervision/annotators/core.py       |  2 +-
 supervision/key_points/annotators.py |  2 +-
 supervision/utils/image.py           | 13 ++++---------
 5 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/mkdocs.yml b/mkdocs.yml
index 131b7aadae..daf3d09845 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -190,4 +190,4 @@ validation:
   nav:
     absolute_links: ignore
   links:
-    absolute_links: ignore
\ No newline at end of file
+    absolute_links: ignore
diff --git a/supervision/__init__.py b/supervision/__init__.py
index 9f16f0bba8..04d3fb2543 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -121,12 +121,12 @@
 from supervision.utils.image import (
     ImageSink,
     crop_image,
+    grayscale_image,
     letterbox_image,
     overlay_image,
     resize_image,
     scale_image,
     tint_image,
-    grayscale_image
 )
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
@@ -222,6 +222,7 @@
     "get_coco_class_index_mapping",
     "get_polygon_center",
     "get_video_frames_generator",
+    "grayscale_image",
     "letterbox_image",
     "list_files_with_extensions",
     "mask_iou_batch",
@@ -245,11 +246,10 @@
     "rle_to_mask",
     "scale_boxes",
     "scale_image",
+    "tint_image",
     "xcycwh_to_xyxy",
     "xywh_to_xyxy",
     "xyxy_to_polygons",
     "xyxy_to_xcycarh",
     "xyxy_to_xywh",
-    "tint_image",
-    "grayscale_image"
 ]
diff --git a/supervision/annotators/core.py b/supervision/annotators/core.py
index 555363077a..900d823b2c 100644
--- a/supervision/annotators/core.py
+++ b/supervision/annotators/core.py
@@ -9,7 +9,6 @@
 from PIL import Image, ImageDraw, ImageFont
 from scipy.interpolate import splev, splprep
 
-from supervision.draw.base import ImageType
 from supervision.annotators.base import BaseAnnotator
 from supervision.annotators.utils import (
     PENDING_TRACK_ID,
@@ -30,6 +29,7 @@
     polygon_to_mask,
     xyxy_to_polygons,
 )
+from supervision.draw.base import ImageType
 from supervision.draw.color import Color, ColorPalette
 from supervision.draw.utils import draw_polygon, draw_rounded_rectangle, draw_text
 from supervision.geometry.core import Point, Position, Rect
diff --git a/supervision/key_points/annotators.py b/supervision/key_points/annotators.py
index 5b649ab427..c3f9e984b1 100644
--- a/supervision/key_points/annotators.py
+++ b/supervision/key_points/annotators.py
@@ -6,8 +6,8 @@
 import cv2
 import numpy as np
 
-from supervision.draw.base import ImageType
 from supervision.detection.utils.boxes import pad_boxes, spread_out_boxes
+from supervision.draw.base import ImageType
 from supervision.draw.color import Color
 from supervision.draw.utils import draw_rounded_rectangle
 from supervision.geometry.core import Rect
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 96a13f2035..70b3d0e4ec 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -60,7 +60,7 @@ def crop_image(
         cropped_image.size
         # (400, 400)
         ```
-        
+
     ![crop_image](https://media.roboflow.com/supervision-docs/crop-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     if isinstance(xyxy, (list, tuple)):
@@ -171,7 +171,7 @@ def resize_image(
         resized_image.size
         # (1000, 562)
         ```
-        
+
     ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     if keep_aspect_ratio:
@@ -239,7 +239,7 @@ def letterbox_image(
         letterboxed_image.size
         # (1000, 1000)
         ```
-        
+
     ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     assert isinstance(image, np.ndarray)
@@ -412,12 +412,7 @@ def tint_image(
 
     overlay = np.full_like(scene, fill_value=color.as_bgr(), dtype=scene.dtype)
     cv2.addWeighted(
-        src1=overlay,
-        alpha=opacity,
-        src2=scene,
-        beta=1 - opacity,
-        gamma=0,
-        dst=scene
+        src1=overlay, alpha=opacity, src2=scene, beta=1 - opacity, gamma=0, dst=scene
     )
     return scene
 

From 8c49465d4909393af15c4ad96a44e37204521f40 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 7 Aug 2025 12:29:05 +0200
Subject: [PATCH 052/124] align new utils with naming conventions

---
 supervision/utils/image.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 96a13f2035..03511153db 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -364,7 +364,7 @@ def overlay_image(
 
 @ensure_cv2_image_for_standalone_function
 def tint_image(
-    scene: ImageType,
+    image: ImageType,
     color: Color = Color.BLACK,
     opacity: float = 0.5,
 ) -> ImageType:
@@ -372,7 +372,7 @@ def tint_image(
     Tint image with solid color overlay at specified opacity.
 
     Args:
-        scene (`numpy.ndarray` or `PIL.Image.Image`): The image to tint.
+        image (`numpy.ndarray` or `PIL.Image.Image`): The image to tint.
         color (`Color`): Overlay tint color. Defaults to `Color.BLACK`.
         opacity (`float`): Blend ratio between overlay and image (0.0-1.0).
             Defaults to `0.5`.
@@ -391,7 +391,7 @@ def tint_image(
 
         image = cv2.imread("source.png")
         tinted_image = sv.tint_image(
-            scene=image, color=sv.Color.BLACK, opacity=0.5
+            image=image, color=sv.Color.BLACK, opacity=0.5
         )
         cv2.imwrite("target.png", tinted_image)
         ```
@@ -402,7 +402,7 @@ def tint_image(
 
         image = Image.open("source.png")
         tinted_image = sv.tint_image(
-            scene=image, color=sv.Color.BLACK, opacity=0.5
+            image=image, color=sv.Color.BLACK, opacity=0.5
         )
         tinted_image.save("target.png")
         ```
@@ -410,26 +410,26 @@ def tint_image(
     if not 0.0 <= opacity <= 1.0:
         raise ValueError("opacity must be between 0.0 and 1.0")
 
-    overlay = np.full_like(scene, fill_value=color.as_bgr(), dtype=scene.dtype)
+    overlay = np.full_like(image, fill_value=color.as_bgr(), dtype=image.dtype)
     cv2.addWeighted(
         src1=overlay,
         alpha=opacity,
-        src2=scene,
+        src2=image,
         beta=1 - opacity,
         gamma=0,
-        dst=scene
+        dst=image
     )
-    return scene
+    return image
 
 
 @ensure_cv2_image_for_standalone_function
-def grayscale_image(scene: ImageType) -> ImageType:
+def grayscale_image(image: ImageType) -> ImageType:
     """
     Convert image to 3-channel grayscale. Luminance channel is broadcast to
     all three channels for compatibility with color-based drawing helpers.
 
     Args:
-        scene (`numpy.ndarray` or `PIL.Image.Image`): The image to convert to
+        image (`numpy.ndarray` or `PIL.Image.Image`): The image to convert to
             grayscale.
 
     Returns:
@@ -442,7 +442,7 @@ def grayscale_image(scene: ImageType) -> ImageType:
         import supervision as sv
 
         image = cv2.imread("source.png")
-        grayscale_image = sv.grayscale_image(scene=image)
+        grayscale_image = sv.grayscale_image(image=image)
         cv2.imwrite("target.png", grayscale_image)
         ```
 
@@ -455,7 +455,7 @@ def grayscale_image(scene: ImageType) -> ImageType:
         grayscale_image.save("target.png")
         ```
     """
-    grayscaled = cv2.cvtColor(scene, cv2.COLOR_BGR2GRAY)
+    grayscaled = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     return cv2.cvtColor(grayscaled, cv2.COLOR_GRAY2BGR)
 
 

From 72621dce32392eb09f3ffddb1403f2f3dc1ce553 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 Aug 2025 10:30:16 +0000
Subject: [PATCH 053/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/image.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 03511153db..9050a6eae1 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -60,7 +60,7 @@ def crop_image(
         cropped_image.size
         # (400, 400)
         ```
-        
+
     ![crop_image](https://media.roboflow.com/supervision-docs/crop-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     if isinstance(xyxy, (list, tuple)):
@@ -171,7 +171,7 @@ def resize_image(
         resized_image.size
         # (1000, 562)
         ```
-        
+
     ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     if keep_aspect_ratio:
@@ -239,7 +239,7 @@ def letterbox_image(
         letterboxed_image.size
         # (1000, 1000)
         ```
-        
+
     ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     assert isinstance(image, np.ndarray)
@@ -412,12 +412,7 @@ def tint_image(
 
     overlay = np.full_like(image, fill_value=color.as_bgr(), dtype=image.dtype)
     cv2.addWeighted(
-        src1=overlay,
-        alpha=opacity,
-        src2=image,
-        beta=1 - opacity,
-        gamma=0,
-        dst=image
+        src1=overlay, alpha=opacity, src2=image, beta=1 - opacity, gamma=0, dst=image
     )
     return image
 

From c8b966be1aa37709bf8319e91d5a9904abd210e8 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 7 Aug 2025 12:35:09 +0200
Subject: [PATCH 054/124] align new utils with naming conventions

---
 supervision/utils/image.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 03511153db..c5c84388aa 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -60,9 +60,7 @@ def crop_image(
         cropped_image.size
         # (400, 400)
         ```
-        
-    ![crop_image](https://media.roboflow.com/supervision-docs/crop-image.png){ align=center width="800" }
-    """  # noqa E501 // docs
+    """
     if isinstance(xyxy, (list, tuple)):
         xyxy = np.array(xyxy)
     xyxy = np.round(xyxy).astype(int)
@@ -451,7 +449,7 @@ def grayscale_image(image: ImageType) -> ImageType:
         import supervision as sv
 
         image = Image.open("source.png")
-        grayscale_image = sv.grayscale_image(scene=image)
+        grayscale_image = sv.grayscale_image(image=image)
         grayscale_image.save("target.png")
         ```
     """

From dce96d462ef5c601ef4606f103f2686f82aaabbb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 Aug 2025 10:35:57 +0000
Subject: [PATCH 055/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/image.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index c5c84388aa..23981c90cd 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -169,7 +169,7 @@ def resize_image(
         resized_image.size
         # (1000, 562)
         ```
-        
+
     ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     if keep_aspect_ratio:
@@ -237,7 +237,7 @@ def letterbox_image(
         letterboxed_image.size
         # (1000, 1000)
         ```
-        
+
     ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" }
     """  # noqa E501 // docs
     assert isinstance(image, np.ndarray)
@@ -410,12 +410,7 @@ def tint_image(
 
     overlay = np.full_like(image, fill_value=color.as_bgr(), dtype=image.dtype)
     cv2.addWeighted(
-        src1=overlay,
-        alpha=opacity,
-        src2=image,
-        beta=1 - opacity,
-        gamma=0,
-        dst=image
+        src1=overlay, alpha=opacity, src2=image, beta=1 - opacity, gamma=0, dst=image
     )
     return image
 

From 2ef029ef4bc64e361e5b0779d12e1231616ab23c Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Thu, 7 Aug 2025 13:32:44 +0200
Subject: [PATCH 056/124] visualize examples

---
 docs/utils/image.md        |  1 +
 supervision/utils/image.py | 28 ++++++++++++++++++----------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/docs/utils/image.md b/docs/utils/image.md
index a24d19dd29..17d94eac36 100644
--- a/docs/utils/image.md
+++ b/docs/utils/image.md
@@ -1,5 +1,6 @@
 ---
 comments: true
+status: new
 ---
 
 # Image Utils
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index c5c84388aa..7b53937ab5 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -41,7 +41,7 @@ def crop_image(
         image.shape
         # (1080, 1920, 3)
 
-        xyxy = (200, 400, 600, 800)
+        xyxy = (400, 400, 800, 800)
         cropped_image = sv.crop_image(image=image, xyxy=xyxy)
         cropped_image.shape
         # (400, 400, 3)
@@ -55,12 +55,14 @@ def crop_image(
         image.size
         # (1920, 1080)
 
-        xyxy = (200, 400, 600, 800)
+        xyxy = (400, 400, 800, 800)
         cropped_image = sv.crop_image(image=image, xyxy=xyxy)
         cropped_image.size
         # (400, 400)
         ```
-    """
+
+    ![crop-image](https://media.roboflow.com/supervision-docs/supervision-docs-crop-image-2.png){ align=center width="1000" }
+    """  # noqa E501 // docs
     if isinstance(xyxy, (list, tuple)):
         xyxy = np.array(xyxy)
     xyxy = np.round(xyxy).astype(int)
@@ -110,7 +112,9 @@ def scale_image(image: ImageType, scale_factor: float) -> ImageType:
         scaled_image.size
         # (960, 540)
         ```
-    """
+
+    ![scale-image](https://media.roboflow.com/supervision-docs/supervision-docs-scale-image-2.png){ align=center width="1000" }
+    """  # noqa E501 // docs
     if scale_factor <= 0:
         raise ValueError("Scale factor must be positive.")
 
@@ -170,7 +174,7 @@ def resize_image(
         # (1000, 562)
         ```
         
-    ![resize_image](https://media.roboflow.com/supervision-docs/resize-image.png){ align=center width="800" }
+    ![resize-image](https://media.roboflow.com/supervision-docs/supervision-docs-resize-image-2.png){ align=center width="1000" }
     """  # noqa E501 // docs
     if keep_aspect_ratio:
         image_ratio = image.shape[1] / image.shape[0]
@@ -238,7 +242,7 @@ def letterbox_image(
         # (1000, 1000)
         ```
         
-    ![letterbox_image](https://media.roboflow.com/supervision-docs/letterbox-image.png){ align=center width="800" }
+    ![letterbox-image](https://media.roboflow.com/supervision-docs/supervision-docs-letterbox-image-2.png){ align=center width="1000" }
     """  # noqa E501 // docs
     assert isinstance(image, np.ndarray)
     color = unify_to_bgr(color=color)
@@ -389,7 +393,7 @@ def tint_image(
 
         image = cv2.imread("source.png")
         tinted_image = sv.tint_image(
-            image=image, color=sv.Color.BLACK, opacity=0.5
+            image=image, color=sv.Color.ROBOFLOW, opacity=0.5
         )
         cv2.imwrite("target.png", tinted_image)
         ```
@@ -400,11 +404,13 @@ def tint_image(
 
         image = Image.open("source.png")
         tinted_image = sv.tint_image(
-            image=image, color=sv.Color.BLACK, opacity=0.5
+            image=image, color=sv.Color.ROBOFLOW, opacity=0.5
         )
         tinted_image.save("target.png")
         ```
-    """
+
+    ![tint-image](https://media.roboflow.com/supervision-docs/supervision-docs-tint-image-2.png){ align=center width="1000" }
+    """  # noqa E501 // docs
     if not 0.0 <= opacity <= 1.0:
         raise ValueError("opacity must be between 0.0 and 1.0")
 
@@ -452,7 +458,9 @@ def grayscale_image(image: ImageType) -> ImageType:
         grayscale_image = sv.grayscale_image(image=image)
         grayscale_image.save("target.png")
         ```
-    """
+        
+    ![grayscale-image](https://media.roboflow.com/supervision-docs/supervision-docs-grayscale-image-2.png){ align=center width="1000" }
+    """  # noqa E501 // docs
     grayscaled = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     return cv2.cvtColor(grayscaled, cv2.COLOR_GRAY2BGR)
 

From 6679b1f5a9353776d78b7fa9ccafc099e4f7628b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 7 Aug 2025 14:45:31 +0000
Subject: [PATCH 057/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/utils/image.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index 7b53937ab5..e8931f2194 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -173,7 +173,7 @@ def resize_image(
         resized_image.size
         # (1000, 562)
         ```
-        
+
     ![resize-image](https://media.roboflow.com/supervision-docs/supervision-docs-resize-image-2.png){ align=center width="1000" }
     """  # noqa E501 // docs
     if keep_aspect_ratio:
@@ -241,7 +241,7 @@ def letterbox_image(
         letterboxed_image.size
         # (1000, 1000)
         ```
-        
+
     ![letterbox-image](https://media.roboflow.com/supervision-docs/supervision-docs-letterbox-image-2.png){ align=center width="1000" }
     """  # noqa E501 // docs
     assert isinstance(image, np.ndarray)
@@ -416,12 +416,7 @@ def tint_image(
 
     overlay = np.full_like(image, fill_value=color.as_bgr(), dtype=image.dtype)
     cv2.addWeighted(
-        src1=overlay,
-        alpha=opacity,
-        src2=image,
-        beta=1 - opacity,
-        gamma=0,
-        dst=image
+        src1=overlay, alpha=opacity, src2=image, beta=1 - opacity, gamma=0, dst=image
     )
     return image
 
@@ -458,7 +453,7 @@ def grayscale_image(image: ImageType) -> ImageType:
         grayscale_image = sv.grayscale_image(image=image)
         grayscale_image.save("target.png")
         ```
-        
+
     ![grayscale-image](https://media.roboflow.com/supervision-docs/supervision-docs-grayscale-image-2.png){ align=center width="1000" }
     """  # noqa E501 // docs
     grayscaled = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

From 86e10db19e885d96c07ce72fe0a321cb005976b9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 8 Sep 2025 00:07:58 +0000
Subject: [PATCH 058/124] :arrow_up: Bump pypa/gh-action-pypi-publish from
 1.12.4 to 1.13.0

Bumps [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish) from 1.12.4 to 1.13.0.
- [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases)
- [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/76f52bc884231f62b9a034ebfe128415bbaabdfc...ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e)

---
updated-dependencies:
- dependency-name: pypa/gh-action-pypi-publish
  dependency-version: 1.13.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/publish-pre-release.yml | 2 +-
 .github/workflows/publish-release.yml     | 2 +-
 .github/workflows/publish-testpypi.yml    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/publish-pre-release.yml b/.github/workflows/publish-pre-release.yml
index 7253538603..ebb4d8de27 100644
--- a/.github/workflows/publish-pre-release.yml
+++ b/.github/workflows/publish-pre-release.yml
@@ -42,6 +42,6 @@ jobs:
           uv run twine check --strict dist/*
 
       - name: 🚀 Publish to PyPi
-        uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
+        uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
         with:
           attestations: true
diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml
index 2b90a3e9ee..931f9c4a18 100644
--- a/.github/workflows/publish-release.yml
+++ b/.github/workflows/publish-release.yml
@@ -40,6 +40,6 @@ jobs:
           uv run twine check --strict dist/*
 
       - name: 🚀 Publish to PyPi
-        uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
+        uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
         with:
           attestations: true
diff --git a/.github/workflows/publish-testpypi.yml b/.github/workflows/publish-testpypi.yml
index c15a71f9b8..bb5c66881b 100644
--- a/.github/workflows/publish-testpypi.yml
+++ b/.github/workflows/publish-testpypi.yml
@@ -37,7 +37,7 @@ jobs:
           uv run twine check --strict dist/*
 
       - name: 🚀 Publish to Test-PyPi
-        uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4
+        uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
         with:
           repository-url: https://test.pypi.org/legacy/
           attestations: true

From 50358e96b6e3efc6626d9efc5c705338a38673f4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 22 Sep 2025 00:11:46 +0000
Subject: [PATCH 059/124] :arrow_up: Bump actions/github-script from 7.0.1 to
 8.0.0

Bumps [actions/github-script](https://github.com/actions/github-script) from 7.0.1 to 8.0.0.
- [Release notes](https://github.com/actions/github-script/releases)
- [Commits](https://github.com/actions/github-script/compare/60a0d83039c74a4aee543508d2ffcb1c3799cdea...ed597411d8f924073f98dfc5c65a23a2325f34cd)

---
updated-dependencies:
- dependency-name: actions/github-script
  dependency-version: 8.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/clear-cache.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/clear-cache.yml b/.github/workflows/clear-cache.yml
index a9cf4544fe..4696895ecd 100644
--- a/.github/workflows/clear-cache.yml
+++ b/.github/workflows/clear-cache.yml
@@ -16,7 +16,7 @@ jobs:
     timeout-minutes: 10
     steps:
       - name: Clear cache
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
+        uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
         with:
           script: |
             console.log("Starting cache cleanup...")

From 421ad21c98ea26be7738c451d2f10e63a98df057 Mon Sep 17 00:00:00 2001
From: Kshitij Aucharmal <kshitijaucharmal21@gmail.com>
Date: Wed, 24 Sep 2025 23:25:12 +0530
Subject: [PATCH 060/124] Fixed: random print("hm") in merge_metadata

---
 supervision/detection/utils/internal.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/supervision/detection/utils/internal.py b/supervision/detection/utils/internal.py
index bc6579a8b9..f5d6dc9fbf 100644
--- a/supervision/detection/utils/internal.py
+++ b/supervision/detection/utils/internal.py
@@ -271,7 +271,6 @@ def merge_metadata(metadata_list: list[dict[str, Any]]) -> dict[str, Any]:
                     "{type(value)}, {type(other_value)}."
                 )
             else:
-                print("hm")
                 if merged_metadata[key] != value:
                     raise ValueError(f"Conflicting metadata for key: '{key}'.")
 

From 371f8ecc5b977858de5f37ad9dfb242e0331d65e Mon Sep 17 00:00:00 2001
From: Kshitij Aucharmal <kshitijaucharmal21@gmail.com>
Date: Fri, 3 Oct 2025 19:46:46 +0530
Subject: [PATCH 061/124] fix: added precommit under dev group

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9bf3b24aa4..a65799adf8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -67,7 +67,8 @@ dev = [
     "ipywidgets>=8.1.1",
     "jupytext>=1.16.1",
     "nbconvert>=7.14.2",
-    "docutils!=0.21"
+    "docutils!=0.21",
+    "pre-commit>=3.8.0"
 ]
 docs = [
     "mkdocs-material[imaging]>=9.5.5",

From 272d9ed5bfaf653be28b7e1e080cbe69923c23ff Mon Sep 17 00:00:00 2001
From: Ernest Lim <ernestlimwoonteng@gmail.com>
Date: Thu, 16 Oct 2025 16:12:28 +0800
Subject: [PATCH 062/124] fix: Overlapmetric should check string using
 uppercase conversion

---
 supervision/detection/utils/iou_and_nms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/detection/utils/iou_and_nms.py b/supervision/detection/utils/iou_and_nms.py
index 1a6f80bc58..1eccd34b18 100644
--- a/supervision/detection/utils/iou_and_nms.py
+++ b/supervision/detection/utils/iou_and_nms.py
@@ -72,7 +72,7 @@ def from_value(cls, value: OverlapMetric | str) -> OverlapMetric:
         if isinstance(value, cls):
             return value
         if isinstance(value, str):
-            value = value.lower()
+            value = value.upper()
             try:
                 return cls(value)
             except ValueError:

From 03eed3fd64cbf40112f377c094b0789f3e66fc8b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Oct 2025 00:05:24 +0000
Subject: [PATCH 063/124] :arrow_up: Bump astral-sh/setup-uv from 6.4.3 to
 7.1.2

Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 6.4.3 to 7.1.2.
- [Release notes](https://github.com/astral-sh/setup-uv/releases)
- [Commits](https://github.com/astral-sh/setup-uv/compare/e92bafb6253dcd438e0484186d7669ea7a8ca1cc...85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41)

---
updated-dependencies:
- dependency-name: astral-sh/setup-uv
  dependency-version: 7.1.2
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/publish-docs.yml        | 2 +-
 .github/workflows/publish-pre-release.yml | 2 +-
 .github/workflows/publish-release.yml     | 2 +-
 .github/workflows/publish-testpypi.yml    | 2 +-
 .github/workflows/test-doc.yml            | 2 +-
 .github/workflows/uv-test.yml             | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml
index af0d6b1aba..e3b3f038a0 100644
--- a/.github/workflows/publish-docs.yml
+++ b/.github/workflows/publish-docs.yml
@@ -34,7 +34,7 @@ jobs:
           fetch-depth: 0
 
       - name: 🐍 Install uv and set Python ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-pre-release.yml b/.github/workflows/publish-pre-release.yml
index 7253538603..8feb282913 100644
--- a/.github/workflows/publish-pre-release.yml
+++ b/.github/workflows/publish-pre-release.yml
@@ -29,7 +29,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml
index 2b90a3e9ee..d24c31d88d 100644
--- a/.github/workflows/publish-release.yml
+++ b/.github/workflows/publish-release.yml
@@ -27,7 +27,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/publish-testpypi.yml b/.github/workflows/publish-testpypi.yml
index c15a71f9b8..612ee0caff 100644
--- a/.github/workflows/publish-testpypi.yml
+++ b/.github/workflows/publish-testpypi.yml
@@ -24,7 +24,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/test-doc.yml b/.github/workflows/test-doc.yml
index 54d9339713..cd41018d0b 100644
--- a/.github/workflows/test-doc.yml
+++ b/.github/workflows/test-doc.yml
@@ -24,7 +24,7 @@ jobs:
           fetch-depth: 0
 
       - name: 🐍 Install uv and set Python ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true
diff --git a/.github/workflows/uv-test.yml b/.github/workflows/uv-test.yml
index 8e6bb65968..65be9c17e0 100644
--- a/.github/workflows/uv-test.yml
+++ b/.github/workflows/uv-test.yml
@@ -19,7 +19,7 @@ jobs:
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
       - name: 🐍 Install uv and set Python version ${{ matrix.python-version }}
-        uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
+        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
         with:
           python-version: ${{ matrix.python-version }}
           activate-environment: true

From 35ff87b08d88a18a855c5325b97fe8fa3072adb0 Mon Sep 17 00:00:00 2001
From: AnonymDevOSS <rafel.bennasar+github@proton.me>
Date: Tue, 28 Oct 2025 13:51:28 +0100
Subject: [PATCH 064/124] feat: speed up box iou batch using 2d in-place ops
 instead of 3d without creating intermediary (N, M, 2) arrays.

---
 supervision/detection/utils/iou_and_nms.py | 90 ++++++++++++++++++++++
 test/detection/utils/functions.py          | 38 +++++++++
 test/detection/utils/test_iou_and_nms.py   | 14 ++++
 3 files changed, 142 insertions(+)
 create mode 100644 test/detection/utils/functions.py

diff --git a/supervision/detection/utils/iou_and_nms.py b/supervision/detection/utils/iou_and_nms.py
index 1a6f80bc58..b55eea876f 100644
--- a/supervision/detection/utils/iou_and_nms.py
+++ b/supervision/detection/utils/iou_and_nms.py
@@ -231,6 +231,96 @@ def box_area(box):
     return ious
 
 
+def box_iou_batch_alt(
+    boxes_true: np.ndarray,
+    boxes_detection: np.ndarray,
+    overlap_metric: OverlapMetric = OverlapMetric.IOU,
+) -> np.ndarray:
+    """
+    Compute Intersection over Union (IoU) of two sets of bounding boxes -
+        `boxes_true` and `boxes_detection`. Both sets
+        of boxes are expected to be in `(x_min, y_min, x_max, y_max)` format.
+
+    Note:
+        Use `box_iou` when computing IoU between two individual boxes.
+        For comparing multiple boxes (arrays of boxes), use `box_iou_batch` for better
+        performance.
+
+    Args:
+        boxes_true (np.ndarray): 2D `np.ndarray` representing ground-truth boxes.
+            `shape = (N, 4)` where `N` is number of true objects.
+        boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes.
+            `shape = (M, 4)` where `M` is number of detected objects.
+        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
+            between pairs of boxes (e.g., IoU, IoS).
+
+    Returns:
+        np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.
+            `shape = (N, M)` where `N` is number of true objects and
+            `M` is number of detected objects.
+
+    Examples:
+        ```python
+        import numpy as np
+        import supervision as sv
+
+        boxes_true = np.array([
+            [100, 100, 200, 200],
+            [300, 300, 400, 400]
+        ])
+        boxes_detection = np.array([
+            [150, 150, 250, 250],
+            [320, 320, 420, 420]
+        ])
+
+        sv.box_iou_batch(boxes_true=boxes_true, boxes_detection=boxes_detection)
+        # array([
+        #     [0.14285714, 0.        ],
+        #     [0.        , 0.47058824]
+        # ])
+        ```
+
+    """
+
+    tx1, ty1, tx2, ty2 = boxes_true.T
+    dx1, dy1, dx2, dy2 = boxes_detection.T
+    N, M = boxes_true.shape[0], boxes_detection.shape[0]
+
+    top_left_x = np.empty((N, M), dtype=np.float32)
+    bottom_right_x = np.empty_like(top_left_x)
+    top_left_y = np.empty_like(top_left_x)
+    bottom_right_y = np.empty_like(top_left_x)
+
+    np.maximum(tx1[:, None], dx1[None, :], out=top_left_x)
+    np.minimum(tx2[:, None], dx2[None, :], out=bottom_right_x)
+    np.maximum(ty1[:, None], dy1[None, :], out=top_left_y)
+    np.minimum(ty2[:, None], dy2[None, :], out=bottom_right_y)
+
+    np.subtract(bottom_right_x, top_left_x, out=bottom_right_x)  # W
+    np.subtract(bottom_right_y, top_left_y, out=bottom_right_y)  # H
+    np.clip(bottom_right_x, 0.0, None, out=bottom_right_x)
+    np.clip(bottom_right_y, 0.0, None, out=bottom_right_y)
+
+    area_inter = bottom_right_x * bottom_right_y
+
+    area_true = (tx2 - tx1) * (ty2 - ty1)
+    area_detection = (dx2 - dx1) * (dy2 - dy1)
+
+    if overlap_metric == OverlapMetric.IOU:
+        denom = area_true[:, None] + area_detection[None, :] - area_inter
+    elif overlap_metric == OverlapMetric.IOS:
+        denom = np.minimum(area_true[:, None], area_detection[None, :])
+    else:
+        raise ValueError(
+            f"overlap_metric {overlap_metric} is not supported, "
+            "only 'IOU' and 'IOS' are supported"
+        )
+
+    out = np.zeros_like(area_inter, dtype=np.float32)
+    np.divide(area_inter, denom, out=out, where=denom > 0)
+    return out
+
+
 def _jaccard(box_a: list[float], box_b: list[float], is_crowd: bool) -> float:
     """
     Calculate the Jaccard index (intersection over union) between two bounding boxes.
diff --git a/test/detection/utils/functions.py b/test/detection/utils/functions.py
new file mode 100644
index 0000000000..6b10dfa2fe
--- /dev/null
+++ b/test/detection/utils/functions.py
@@ -0,0 +1,38 @@
+import random
+
+import numpy as np
+
+
+def generate_boxes(
+    n: int,
+    W: int = 1920,
+    H: int = 1080,
+    min_size: int = 20,
+    max_size: int = 200,
+    seed: int | None = 1,
+):
+    """
+    Generate N valid bounding boxes of format [x_min, y_min, x_max, y_max].
+
+    Args:
+        n (int): Number of boexs to generate
+        W (int): Image width
+        H (int): Image height
+        min_size (int): Minimum box size (width/height)
+        max_size (int): Maximum box size (width/height)
+        seed (int | None): Random seed for reproducibility
+
+    Returns:
+        list[list[float]] | np.ndarray: List of boxes
+    """
+    random.seed(seed)
+    boxes = []
+    for _ in range(n):
+        w = random.uniform(min_size, max_size)
+        h = random.uniform(min_size, max_size)
+        x1 = random.uniform(0, W - w)
+        y1 = random.uniform(0, H - h)
+        x2 = x1 + w
+        y2 = y1 + h
+        boxes.append([x1, y1, x2, y2])
+    return np.array(boxes, dtype=np.float32)
diff --git a/test/detection/utils/test_iou_and_nms.py b/test/detection/utils/test_iou_and_nms.py
index 8039bf2425..87fd958ad8 100644
--- a/test/detection/utils/test_iou_and_nms.py
+++ b/test/detection/utils/test_iou_and_nms.py
@@ -7,10 +7,13 @@
 
 from supervision.detection.utils.iou_and_nms import (
     _group_overlapping_boxes,
+    box_iou_batch,
+    box_iou_batch_alt,
     box_non_max_suppression,
     mask_non_max_merge,
     mask_non_max_suppression,
 )
+from test.detection.utils.functions import generate_boxes
 
 
 @pytest.mark.parametrize(
@@ -631,3 +634,14 @@ def test_mask_non_max_merge(
         sorted_result = sorted([sorted(group) for group in result])
         sorted_expected_result = sorted([sorted(group) for group in expected_result])
         assert sorted_result == sorted_expected_result
+
+
+def test_box_iou_batch_and_alt_equivalence():
+    boxes_true = generate_boxes(20, seed=1)
+    boxes_detection = generate_boxes(30, seed=2)
+
+    iou_a = box_iou_batch(boxes_true, boxes_detection)
+    iou_b = box_iou_batch_alt(boxes_true, boxes_detection)
+
+    assert iou_a.shape == iou_b.shape
+    assert np.allclose(iou_a, iou_b, rtol=1e-6, atol=1e-6)

From 98476dfacffe5b64889ecef2ff0ca18753b4fccc Mon Sep 17 00:00:00 2001
From: AnonymDevOSS <rafel.bennasar+github@proton.me>
Date: Tue, 28 Oct 2025 14:05:37 +0100
Subject: [PATCH 065/124] feat: add threaded I/O pipeline for video processing

Implements pipeline with bounded queues to overlap decode,
compute and encode. Reduces I/O stalls.
---
 supervision/utils/video.py | 126 +++++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 3b281b4e22..5fb2c4bf97 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
+import threading
 import time
 from collections import deque
 from collections.abc import Callable, Generator
 from dataclasses import dataclass
+from queue import Queue
 
 import cv2
 import numpy as np
@@ -255,6 +257,130 @@ def callback(scene: np.ndarray, index: int) -> np.ndarray:
                 sink.write_frame(frame=result_frame)
 
 
+def process_video_threads(
+    source_path: str,
+    target_path: str,
+    callback: Callable[[np.ndarray, int], np.ndarray],
+    *,
+    max_frames: int | None = None,
+    prefetch: int = 32,
+    writer_buffer: int = 32,
+    show_progress: bool = False,
+    progress_message: str = "Processing video (with threads)",
+) -> None:
+    """
+    Process a video using a threaded pipeline that asynchronously
+    reads frames, applies a callback to each, and writes the results
+    to an output file.
+
+    Overview:
+    This function implements a three-stage pipeline designed to maximize
+    frame throughput.
+
+        │   Reader   │ >> │  Processor   │ >> │   Writer   │
+           (thread)           (main)             (thread)
+
+    - Reader thread: reads frames from disk into a bounded queue ('read_q')
+      until full, then blocks. This ensures we never load more than 'prefetch'
+      frames into memory at once.
+
+    - Main thread: dequeues frames, applies the 'callback(frame, idx)',
+      and enqueues the processed result into 'write_q'.
+      This is the compute stage. It's important to note that it's not threaded,
+      so you can safely use any detectors, trackers, or other stateful objects
+      without synchronization issues.
+
+    - Writer thread: dequeues frames and writes them to disk.
+
+    Both queues are bounded to enforce back-pressure:
+      - The reader cannot outpace processing (avoids unbounded RAM usage).
+      - The processor cannot outpace writing (avoids output buffer bloat).
+
+    Summary:
+    - It's thread-safe: because the callback runs only in the main thread,
+    using a single stateful detector/tracker inside callback does not require
+    synchronization with the reader/writer threads.
+
+    - While the main thread processes frame N, the reader is already decoding frame N+1,
+      and the writer is encoding frame N-1. They operate concurrently without blocking
+      each other.
+
+    - When is it fastest?
+        - When there's heavy computation in the callback function that releases
+          the Python GIL (for example, OpenCV filters, resizes, color conversions, ...)
+        - When using CUDA or GPU-accelerated inference.
+
+    - When is it better not to use it?
+        - When the callback function is Python-heavy and GIL-bound. In that case,
+          using a process-based approach is more effective.
+
+    Args:
+        source_path (str): The path to the source video file.
+        target_path (str): The path to the target video file.
+        callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in
+            a numpy ndarray representation of a video frame and an
+            int index of the frame and returns a processed numpy ndarray
+            representation of the frame.
+        max_frames (Optional[int]): The maximum number of frames to process.
+        prefetch (int): The maximum number of frames buffered by the reader thread.
+        writer_buffer (int): The maximum number of frames buffered before writing.
+        show_progress (bool): Whether to show a progress bar.
+        progress_message (str): The message to display in the progress bar.
+    """
+
+    source_video_info = VideoInfo.from_video_path(video_path=source_path)
+    total_frames = (
+        min(source_video_info.total_frames, max_frames)
+        if max_frames is not None
+        else source_video_info.total_frames
+    )
+
+    # Each queue includes frames + sentinel
+    read_q: Queue[tuple[int, np.ndarray] | None] = Queue(maxsize=prefetch)
+    write_q: Queue[np.ndarray | None] = Queue(maxsize=writer_buffer)
+
+    def reader_thread():
+        gen = get_video_frames_generator(source_path=source_path, end=max_frames)
+        for idx, frame in enumerate(gen):
+            read_q.put((idx, frame))
+        read_q.put(None)  # sentinel
+
+    def writer_thread(video_sink: VideoSink):
+        while True:
+            frame = write_q.get()
+            if frame is None:
+                break
+            video_sink.write_frame(frame=frame)
+
+    # Heads up! We set 'daemon=True' so this thread won't block program exit
+    # if the main thread finishes first.
+    t_reader = threading.Thread(target=reader_thread, daemon=True)
+    with VideoSink(target_path=target_path, video_info=source_video_info) as sink:
+        t_writer = threading.Thread(target=writer_thread, args=(sink,), daemon=True)
+        t_reader.start()
+        t_writer.start()
+
+        process_bar = tqdm(
+            total=total_frames, disable=not show_progress, desc=progress_message
+        )
+
+        # Main thread: we take a frame, apply function and update process bar.
+        while True:
+            item = read_q.get()
+            if item is None:
+                break
+            idx, frame = item
+            out = callback(frame, idx)
+            write_q.put(out)
+            if total_frames is not None:
+                process_bar.update(1)
+
+        write_q.put(None)
+        t_reader.join()
+        t_writer.join()
+        process_bar.close()
+
+
 class FPSMonitor:
     """
     A class for monitoring frames per second (FPS) to benchmark latency.

From 03f623920a045fb4a352b341ce84174e344bae3a Mon Sep 17 00:00:00 2001
From: AnonymDevOSS <rafel.bennasar+github@proton.me>
Date: Tue, 28 Oct 2025 14:05:52 +0100
Subject: [PATCH 066/124] feat: tests for add threaded I/O pipeline for video
 processing

---
 test/utils/test_process_video.py | 95 ++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 test/utils/test_process_video.py

diff --git a/test/utils/test_process_video.py b/test/utils/test_process_video.py
new file mode 100644
index 0000000000..2c3c68a9bb
--- /dev/null
+++ b/test/utils/test_process_video.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+
+import cv2
+import numpy as np
+import pytest
+
+import supervision as sv
+
+
+def make_video(
+    path: Path, w: int = 160, h: int = 96, fps: int = 20, frames: int = 24
+) -> None:
+    """Create a small synthetic test video with predictable frame-colors."""
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter(str(path), fourcc, fps, (w, h))
+    assert writer.isOpened(), "Failed to open VideoWriter"
+    for i in range(frames):
+        v = (i * 11) % 250
+        frame = np.full((h, w, 3), (v, 255 - v, (2 * v) % 255), np.uint8)
+        writer.write(frame)
+    writer.release()
+
+
+def read_frames(path: Path) -> list[np.ndarray]:
+    """Read all frames from a video into memory."""
+    cap = cv2.VideoCapture(str(path))
+    assert cap.isOpened(), f"Cannot open video: {path}"
+    out = []
+    while True:
+        ok, frame = cap.read()
+        if not ok:
+            break
+        out.append(frame)
+    cap.release()
+    return out
+
+
+def frames_equal(a: np.ndarray, b: np.ndarray, max_abs_tol: int = 0) -> bool:
+    """Return True if frames are the same within acertain tolerance."""
+    if a.shape != b.shape:
+        return False
+    diff = np.abs(a.astype(np.int16) - b.astype(np.int16))
+    return diff.max() <= max_abs_tol
+
+
+def callback_noop(frame: np.ndarray, idx: int) -> np.ndarray:
+    """No-op callback: validates pure pipeline correctness."""
+    return frame
+
+
+def callbackb_opencv(frame: np.ndarray, idx: int) -> np.ndarray:
+    """
+    Simulations some cv2 task...
+    """
+    g = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    return cv2.cvtColor(g, cv2.COLOR_GRAY2BGR)
+
+
+@pytest.mark.parametrize(
+    "callback", [callback_noop, callbackb_opencv], ids=["identity", "opencv"]
+)
+def test_process_video_vs_threads_same_output(callback, tmp_path: Path):
+    """
+    Ensure that process_video() and process_video_threads() produce identical
+    results for the same synthetic source video and callback.
+    """
+    name = callback.__name__
+    src = tmp_path / f"src_{name}.mp4"
+    dst_single = tmp_path / f"out_single_{name}.mp4"
+    dst_threads = tmp_path / f"out_threads_{name}.mp4"
+
+    make_video(src, frames=24)
+
+    sv.utils.video.process_video(
+        source_path=str(src),
+        target_path=str(dst_single),
+        callback=callback,
+        show_progress=False,
+    )
+    sv.utils.video.process_video_threads(
+        source_path=str(src),
+        target_path=str(dst_threads),
+        callback=callback,
+        prefetch=4,
+        writer_buffer=4,
+        show_progress=False,
+    )
+
+    frames_single = read_frames(dst_single)
+    frames_threads = read_frames(dst_threads)
+
+    assert len(frames_single) == len(frames_threads) != 0, "Frame count mismatch."
+
+    for i, (fs, ft) in enumerate(zip(frames_single, frames_threads)):
+        assert frames_equal(fs, ft), f"Frame {i} is different."

From 32dca290ef2f3f11aac832e061f8cf1c64cc48aa Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 13:00:11 +0100
Subject: [PATCH 067/124] `xyxy_to_mask` implementation + tests

---
 supervision/__init__.py                   |   2 +
 supervision/detection/utils/converters.py |  30 ++++
 test/detection/utils/test_converters.py   | 181 ++++++++++++++++++++++
 3 files changed, 213 insertions(+)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 04d3fb2543..17ed8c26d3 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -61,6 +61,7 @@
 from supervision.detection.utils.converters import (
     mask_to_polygons,
     mask_to_xyxy,
+    xyxy_to_mask,
     polygon_to_mask,
     polygon_to_xyxy,
     xcycwh_to_xyxy,
@@ -252,4 +253,5 @@
     "xyxy_to_polygons",
     "xyxy_to_xcycarh",
     "xyxy_to_xywh",
+    "xyxy_to_mask"
 ]
diff --git a/supervision/detection/utils/converters.py b/supervision/detection/utils/converters.py
index 9e02783a04..b167828d40 100644
--- a/supervision/detection/utils/converters.py
+++ b/supervision/detection/utils/converters.py
@@ -229,6 +229,36 @@ def mask_to_xyxy(masks: np.ndarray) -> np.ndarray:
     return xyxy
 
 
+def xyxy_to_mask(boxes: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
+    """
+    Converts a 2D `np.ndarray` of bounding boxes into a 3D `np.ndarray` of bool masks.
+
+    Parameters:
+        boxes (np.ndarray): A 2D `np.ndarray` of shape `(N, 4)`
+            containing bounding boxes `(x_min, y_min, x_max, y_max)`
+        resolution_wh (Tuple[int, int]): A tuple `(width, height)` specifying
+            the resolution of the output masks
+
+    Returns:
+        np.ndarray: A 3D `np.ndarray` of shape `(N, height, width)`
+            containing 2D bool masks for each bounding box
+    """
+    width, height = resolution_wh
+    n = boxes.shape[0]
+    masks = np.zeros((n, height, width), dtype=bool)
+
+    for i, (x_min, y_min, x_max, y_max) in enumerate(boxes):
+        x_min = max(0, int(x_min))
+        y_min = max(0, int(y_min))
+        x_max = min(width - 1, int(x_max))
+        y_max = min(height - 1, int(y_max))
+
+        if x_max >= x_min and y_max >= y_min:
+            masks[i, y_min:y_max + 1, x_min:x_max + 1] = True
+
+    return masks
+
+
 def mask_to_polygons(mask: np.ndarray) -> list[np.ndarray]:
     """
     Converts a binary mask to a list of polygons.
diff --git a/test/detection/utils/test_converters.py b/test/detection/utils/test_converters.py
index e13b150042..4dae899350 100644
--- a/test/detection/utils/test_converters.py
+++ b/test/detection/utils/test_converters.py
@@ -8,6 +8,7 @@
     xywh_to_xyxy,
     xyxy_to_xcycarh,
     xyxy_to_xywh,
+    xyxy_to_mask
 )
 
 
@@ -129,3 +130,183 @@ def test_xyxy_to_xcycarh(xyxy: np.ndarray, expected_result: np.ndarray) -> None:
 def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None:
     result = xcycwh_to_xyxy(xcycwh)
     np.testing.assert_array_equal(result, expected_result)
+
+
+@pytest.mark.parametrize(
+    "boxes,resolution_wh,expected",
+    [
+        # 0) Empty input
+        (
+            np.array([], dtype=float).reshape(0, 4),
+            (5, 4),
+            np.array([], dtype=bool).reshape(0, 4, 5),
+        ),
+
+        # 1) Single pixel box
+        (
+            np.array([[2, 1, 2, 1]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, False,  True, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+
+        # 2) Horizontal line, inclusive bounds
+        (
+            np.array([[1, 2, 3, 2]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False,  True,  True,  True, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+
+        # 3) Vertical line, inclusive bounds
+        (
+            np.array([[3, 0, 3, 2]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False,  True, False],
+                        [False, False, False,  True, False],
+                        [False, False, False,  True, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+
+        # 4) Proper rectangle fill
+        (
+            np.array([[1, 1, 3, 2]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False,  True,  True,  True, False],
+                        [False,  True,  True,  True, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+
+        # 5) Negative coordinates clipped to [0, 0]
+        (
+            np.array([[-2, -1, 1, 1]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [ True,  True, False, False, False],
+                        [ True,  True, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+
+        # 6) Overflow coordinates clipped to width-1 and height-1
+        (
+            np.array([[3, 2, 10, 10]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False,  True,  True],
+                        [False, False, False,  True,  True],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+
+        # 7) Invalid box where max < min after ints, mask stays empty
+        (
+            np.array([[3, 2, 1, 4]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+
+        # 8) Fractional coordinates are floored by int conversion
+        #    (0.2,0.2)-(2.8,1.9) -> (0,0)-(2,1)
+        (
+            np.array([[0.2, 0.2, 2.8, 1.9]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    [
+                        [ True,  True,  True, False, False],
+                        [ True,  True,  True, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ]
+                ],
+                dtype=bool,
+            ),
+        ),
+
+        # 9) Multiple boxes, separate masks
+        (
+            np.array([[0, 0, 1, 0], [2, 1, 4, 3]], dtype=float),
+            (5, 4),
+            np.array(
+                [
+                    # Box 0: row 0, cols 0..1
+                    [
+                        [ True,  True, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                        [False, False, False, False, False],
+                    ],
+                    # Box 1: rows 1..3, cols 2..4
+                    [
+                        [False, False, False, False, False],
+                        [False, False,  True,  True,  True],
+                        [False, False,  True,  True,  True],
+                        [False, False,  True,  True,  True],
+                    ],
+                ],
+                dtype=bool,
+            ),
+        ),
+    ],
+)
+def test_xyxy_to_mask(boxes: np.ndarray, resolution_wh, expected: np.ndarray) -> None:
+    result = xyxy_to_mask(boxes, resolution_wh)
+    assert result.dtype == np.bool_
+    assert result.shape == expected.shape
+    np.testing.assert_array_equal(result, expected)
\ No newline at end of file

From a3ea8cdb1f787bb5423e3827dfeeed50a5fdae20 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 13:01:30 +0100
Subject: [PATCH 068/124] bump version from `0.27.0rc1` to `0.27.0rc2`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9bf3b24aa4..a7910db36a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.27.0rc1"
+version = "0.27.0rc2"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [

From 3e7e4caf3f645a2cf3b064ef970f5251ca8b5db5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 5 Nov 2025 12:03:50 +0000
Subject: [PATCH 069/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py                   |  4 +-
 supervision/detection/utils/converters.py |  2 +-
 test/detection/utils/test_converters.py   | 47 +++++++++--------------
 3 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 17ed8c26d3..a70dd20feb 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -61,11 +61,11 @@
 from supervision.detection.utils.converters import (
     mask_to_polygons,
     mask_to_xyxy,
-    xyxy_to_mask,
     polygon_to_mask,
     polygon_to_xyxy,
     xcycwh_to_xyxy,
     xywh_to_xyxy,
+    xyxy_to_mask,
     xyxy_to_polygons,
     xyxy_to_xcycarh,
     xyxy_to_xywh,
@@ -250,8 +250,8 @@
     "tint_image",
     "xcycwh_to_xyxy",
     "xywh_to_xyxy",
+    "xyxy_to_mask",
     "xyxy_to_polygons",
     "xyxy_to_xcycarh",
     "xyxy_to_xywh",
-    "xyxy_to_mask"
 ]
diff --git a/supervision/detection/utils/converters.py b/supervision/detection/utils/converters.py
index b167828d40..7c39ed1efc 100644
--- a/supervision/detection/utils/converters.py
+++ b/supervision/detection/utils/converters.py
@@ -254,7 +254,7 @@ def xyxy_to_mask(boxes: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarra
         y_max = min(height - 1, int(y_max))
 
         if x_max >= x_min and y_max >= y_min:
-            masks[i, y_min:y_max + 1, x_min:x_max + 1] = True
+            masks[i, y_min : y_max + 1, x_min : x_max + 1] = True
 
     return masks
 
diff --git a/test/detection/utils/test_converters.py b/test/detection/utils/test_converters.py
index 4dae899350..52a3b52004 100644
--- a/test/detection/utils/test_converters.py
+++ b/test/detection/utils/test_converters.py
@@ -6,9 +6,9 @@
 from supervision.detection.utils.converters import (
     xcycwh_to_xyxy,
     xywh_to_xyxy,
+    xyxy_to_mask,
     xyxy_to_xcycarh,
     xyxy_to_xywh,
-    xyxy_to_mask
 )
 
 
@@ -141,7 +141,6 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
             (5, 4),
             np.array([], dtype=bool).reshape(0, 4, 5),
         ),
-
         # 1) Single pixel box
         (
             np.array([[2, 1, 2, 1]], dtype=float),
@@ -150,7 +149,7 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                 [
                     [
                         [False, False, False, False, False],
-                        [False, False,  True, False, False],
+                        [False, False, True, False, False],
                         [False, False, False, False, False],
                         [False, False, False, False, False],
                     ]
@@ -158,7 +157,6 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                 dtype=bool,
             ),
         ),
-
         # 2) Horizontal line, inclusive bounds
         (
             np.array([[1, 2, 3, 2]], dtype=float),
@@ -168,14 +166,13 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                     [
                         [False, False, False, False, False],
                         [False, False, False, False, False],
-                        [False,  True,  True,  True, False],
+                        [False, True, True, True, False],
                         [False, False, False, False, False],
                     ]
                 ],
                 dtype=bool,
             ),
         ),
-
         # 3) Vertical line, inclusive bounds
         (
             np.array([[3, 0, 3, 2]], dtype=float),
@@ -183,16 +180,15 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
             np.array(
                 [
                     [
-                        [False, False, False,  True, False],
-                        [False, False, False,  True, False],
-                        [False, False, False,  True, False],
+                        [False, False, False, True, False],
+                        [False, False, False, True, False],
+                        [False, False, False, True, False],
                         [False, False, False, False, False],
                     ]
                 ],
                 dtype=bool,
             ),
         ),
-
         # 4) Proper rectangle fill
         (
             np.array([[1, 1, 3, 2]], dtype=float),
@@ -201,15 +197,14 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                 [
                     [
                         [False, False, False, False, False],
-                        [False,  True,  True,  True, False],
-                        [False,  True,  True,  True, False],
+                        [False, True, True, True, False],
+                        [False, True, True, True, False],
                         [False, False, False, False, False],
                     ]
                 ],
                 dtype=bool,
             ),
         ),
-
         # 5) Negative coordinates clipped to [0, 0]
         (
             np.array([[-2, -1, 1, 1]], dtype=float),
@@ -217,8 +212,8 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
             np.array(
                 [
                     [
-                        [ True,  True, False, False, False],
-                        [ True,  True, False, False, False],
+                        [True, True, False, False, False],
+                        [True, True, False, False, False],
                         [False, False, False, False, False],
                         [False, False, False, False, False],
                     ]
@@ -226,7 +221,6 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                 dtype=bool,
             ),
         ),
-
         # 6) Overflow coordinates clipped to width-1 and height-1
         (
             np.array([[3, 2, 10, 10]], dtype=float),
@@ -236,14 +230,13 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                     [
                         [False, False, False, False, False],
                         [False, False, False, False, False],
-                        [False, False, False,  True,  True],
-                        [False, False, False,  True,  True],
+                        [False, False, False, True, True],
+                        [False, False, False, True, True],
                     ]
                 ],
                 dtype=bool,
             ),
         ),
-
         # 7) Invalid box where max < min after ints, mask stays empty
         (
             np.array([[3, 2, 1, 4]], dtype=float),
@@ -260,7 +253,6 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                 dtype=bool,
             ),
         ),
-
         # 8) Fractional coordinates are floored by int conversion
         #    (0.2,0.2)-(2.8,1.9) -> (0,0)-(2,1)
         (
@@ -269,8 +261,8 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
             np.array(
                 [
                     [
-                        [ True,  True,  True, False, False],
-                        [ True,  True,  True, False, False],
+                        [True, True, True, False, False],
+                        [True, True, True, False, False],
                         [False, False, False, False, False],
                         [False, False, False, False, False],
                     ]
@@ -278,7 +270,6 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                 dtype=bool,
             ),
         ),
-
         # 9) Multiple boxes, separate masks
         (
             np.array([[0, 0, 1, 0], [2, 1, 4, 3]], dtype=float),
@@ -287,7 +278,7 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                 [
                     # Box 0: row 0, cols 0..1
                     [
-                        [ True,  True, False, False, False],
+                        [True, True, False, False, False],
                         [False, False, False, False, False],
                         [False, False, False, False, False],
                         [False, False, False, False, False],
@@ -295,9 +286,9 @@ def test_xcycwh_to_xyxy(xcycwh: np.ndarray, expected_result: np.ndarray) -> None
                     # Box 1: rows 1..3, cols 2..4
                     [
                         [False, False, False, False, False],
-                        [False, False,  True,  True,  True],
-                        [False, False,  True,  True,  True],
-                        [False, False,  True,  True,  True],
+                        [False, False, True, True, True],
+                        [False, False, True, True, True],
+                        [False, False, True, True, True],
                     ],
                 ],
                 dtype=bool,
@@ -309,4 +300,4 @@ def test_xyxy_to_mask(boxes: np.ndarray, resolution_wh, expected: np.ndarray) ->
     result = xyxy_to_mask(boxes, resolution_wh)
     assert result.dtype == np.bool_
     assert result.shape == expected.shape
-    np.testing.assert_array_equal(result, expected)
\ No newline at end of file
+    np.testing.assert_array_equal(result, expected)

From 1c8398942a31ed34cc8ad6022d69f4525d615b9e Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 13:12:49 +0100
Subject: [PATCH 070/124] docs and examples

---
 docs/detection/utils/converters.md        |  6 ++++
 supervision/detection/utils/converters.py | 34 +++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/docs/detection/utils/converters.md b/docs/detection/utils/converters.md
index 48bec65fe4..3978f108d2 100644
--- a/docs/detection/utils/converters.md
+++ b/docs/detection/utils/converters.md
@@ -58,3 +58,9 @@ status: new
 </div>
 
 :::supervision.detection.utils.converters.polygon_to_xyxy
+
+<div class="md-typeset">
+  <h2><a href="#supervision.detection.utils.converters.xyxy_to_mask">xyxy_to_mask</a></h2>
+</div>
+
+:::supervision.detection.utils.converters.xyxy_to_mask
\ No newline at end of file
diff --git a/supervision/detection/utils/converters.py b/supervision/detection/utils/converters.py
index 7c39ed1efc..4aef2dc87c 100644
--- a/supervision/detection/utils/converters.py
+++ b/supervision/detection/utils/converters.py
@@ -242,6 +242,40 @@ def xyxy_to_mask(boxes: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarra
     Returns:
         np.ndarray: A 3D `np.ndarray` of shape `(N, height, width)`
             containing 2D bool masks for each bounding box
+
+    Examples:
+        ```python
+        import numpy as np
+        import supervision as sv
+
+        boxes = np.array([[0, 0, 2, 2]])
+
+        sv.xyxy_to_mask(boxes, (5, 5))
+        # array([
+        #     [[ True,  True,  True, False, False],
+        #      [ True,  True,  True, False, False],
+        #      [ True,  True,  True, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False, False, False]]
+        # ])
+
+        boxes = np.array([[0, 0, 1, 1], [3, 3, 4, 4]])
+
+        sv.xyxy_to_mask(boxes, (5, 5))
+        # array([
+        #     [[ True,  True, False, False, False],
+        #      [ True,  True, False, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False, False, False]],
+        #
+        #     [[False, False, False, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False, False, False],
+        #      [False, False, False,  True,  True],
+        #      [False, False, False,  True,  True]]
+        # ])
+        ```
     """
     width, height = resolution_wh
     n = boxes.shape[0]

From 89ee57f5199da87980944eb43173a4f80b0d3cb0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 5 Nov 2025 12:14:39 +0000
Subject: [PATCH 071/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/detection/utils/converters.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/detection/utils/converters.md b/docs/detection/utils/converters.md
index 3978f108d2..b6b1e2af6c 100644
--- a/docs/detection/utils/converters.md
+++ b/docs/detection/utils/converters.md
@@ -63,4 +63,4 @@ status: new
   <h2><a href="#supervision.detection.utils.converters.xyxy_to_mask">xyxy_to_mask</a></h2>
 </div>
 
-:::supervision.detection.utils.converters.xyxy_to_mask
\ No newline at end of file
+:::supervision.detection.utils.converters.xyxy_to_mask

From 0bd60874caa3e00e667d27b21988c181b894b4c1 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 13:39:13 +0100
Subject: [PATCH 072/124] bump version from `0.27.0rc2` to `0.27.0rc3`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a7910db36a..799e71d24d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.27.0rc2"
+version = "0.27.0rc3"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [

From 6578b4ea521823fb8689bd1d8aa0ffac0a453b58 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 20:18:10 +0100
Subject: [PATCH 073/124] initial version of `filter_segments_by_distance`

---
 docs/detection/utils/masks.md        |   6 +
 supervision/__init__.py              |   2 +
 supervision/detection/utils/masks.py | 137 +++++++++++++++++++
 test/detection/utils/test_masks.py   | 192 ++++++++++++++++++++++++++-
 4 files changed, 336 insertions(+), 1 deletion(-)

diff --git a/docs/detection/utils/masks.md b/docs/detection/utils/masks.md
index 9e53a6baa1..99097bef6f 100644
--- a/docs/detection/utils/masks.md
+++ b/docs/detection/utils/masks.md
@@ -22,3 +22,9 @@ status: new
 </div>
 
 :::supervision.detection.utils.masks.contains_multiple_segments
+
+<div class="md-typeset">
+  <h2><a href="#supervision.detection.utils.masks.filter_segments_by_distance">filter_segments_by_distance</a></h2>
+</div>
+
+:::supervision.detection.utils.masks.filter_segments_by_distance
diff --git a/supervision/__init__.py b/supervision/__init__.py
index a70dd20feb..15ebf2c796 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -88,6 +88,7 @@
     contains_holes,
     contains_multiple_segments,
     move_masks,
+    filter_segments_by_distance,
 )
 from supervision.detection.utils.polygons import (
     approximate_polygon,
@@ -219,6 +220,7 @@
     "draw_text",
     "edit_distance",
     "filter_polygons_by_area",
+    "filter_segments_by_distance",
     "fuzzy_match_index",
     "get_coco_class_index_mapping",
     "get_polygon_center",
diff --git a/supervision/detection/utils/masks.py b/supervision/detection/utils/masks.py
index c5cfee0172..119439508b 100644
--- a/supervision/detection/utils/masks.py
+++ b/supervision/detection/utils/masks.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from typing import Literal
+
 import cv2
 import numpy as np
 import numpy.typing as npt
@@ -260,3 +262,138 @@ def resize_masks(masks: np.ndarray, max_dimension: int = 640) -> np.ndarray:
     resized_masks = masks[:, yv, xv]
 
     return resized_masks.reshape(masks.shape[0], new_height, new_width)
+
+
+def filter_segments_by_distance(
+    mask: npt.NDArray[np.bool_],
+    absolute_distance: float | None = 100.0,
+    relative_distance: float | None = None,
+    connectivity: int = 8,
+    mode: Literal["edge", "centroid"] = "edge",
+) -> npt.NDArray[np.bool_]:
+    """
+    Keep the largest connected component and any other components within a distance threshold.
+
+    Distance can be absolute in pixels or relative to the image diagonal.
+
+    Args:
+        mask: Boolean mask HxW.
+        absolute_distance: Max allowed distance in pixels to the main component.
+            Ignored if `relative_distance` is provided.
+        relative_distance: Fraction of the diagonal. If set, threshold = fraction * sqrt(H^2 + W^2).
+        connectivity: Defines which neighboring pixels are considered connected.
+            - 4-connectedness: Only orthogonal neighbors.
+              ```
+              [ ][X][ ]
+              [X][O][X]
+              [ ][X][ ]
+              ```
+            - 8-connectedness: Includes diagonal neighbors.
+              ```
+              [X][X][X]
+              [X][O][X]
+              [X][X][X]
+              ```
+            Default is 8.
+        mode: Defines how distance between components is measured.
+            - "edge": Uses distance between nearest edges (via distance transform).
+            - "centroid": Uses distance between component centroids.
+
+    Returns:
+        Boolean mask after filtering.
+
+        Examples:
+        ```python
+        import numpy as np
+        import supervision as sv
+
+        mask = np.array([
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        ], dtype=bool)
+
+        sv.filter_segments_by_distance(
+            mask,
+            absolute_distance=2,
+            mode="edge",
+            connectivity=8
+        ).astype(int)
+
+        # np.array([
+        #     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0],
+        #     [0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
+        #     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        #     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+        # ], dtype=bool)
+
+        # The nearby 2×2 block at columns 6–7 is kept because its edge distance
+        # is within 2 pixels. The distant block at columns 9–10 is removed.
+        ```
+    """
+    if mask.dtype != bool:
+        raise TypeError("mask must be boolean")
+
+    height, width = mask.shape
+    if not np.any(mask):
+        return mask.copy()
+
+    image = mask.astype(np.uint8)
+    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
+        image, connectivity=connectivity
+    )
+
+    if num_labels <= 1:
+        return mask.copy()
+
+    areas = stats[1:, cv2.CC_STAT_AREA]
+    main_label = 1 + int(np.argmax(areas))
+
+    if relative_distance is not None:
+        diagonal = float(np.hypot(height, width))
+        threshold = float(relative_distance) * diagonal
+    else:
+        threshold = float(absolute_distance)
+
+    keep_labels = np.zeros(num_labels, dtype=bool)
+    keep_labels[main_label] = True
+
+    if mode == "centroid":
+        differences = centroids[1:] - centroids[main_label]
+        distances = np.sqrt(np.sum(differences**2, axis=1))
+        nearby = 1 + np.where(distances <= threshold)[0]
+        keep_labels[nearby] = True
+    elif mode == "edge":
+        main_mask = (labels == main_label).astype(np.uint8)
+        inverse = 1 - main_mask
+        distance_transform = cv2.distanceTransform(inverse, cv2.DIST_L2, 3)
+        for label in range(1, num_labels):
+            if label == main_label:
+                continue
+            component = labels == label
+            if not np.any(component):
+                continue
+            min_distance = float(distance_transform[component].min())
+            if min_distance <= threshold:
+                keep_labels[label] = True
+    else:
+        raise ValueError("mode must be 'edge' or 'centroid'")
+
+    return keep_labels[labels]
\ No newline at end of file
diff --git a/test/detection/utils/test_masks.py b/test/detection/utils/test_masks.py
index 2097f6082c..287dade77d 100644
--- a/test/detection/utils/test_masks.py
+++ b/test/detection/utils/test_masks.py
@@ -10,7 +10,7 @@
     calculate_masks_centroids,
     contains_holes,
     contains_multiple_segments,
-    move_masks,
+    move_masks, filter_segments_by_distance,
 )
 
 
@@ -500,3 +500,193 @@ def test_contains_multiple_segments(
     with exception:
         result = contains_multiple_segments(mask=mask, connectivity=connectivity)
         assert result == expected_result
+
+
+@pytest.mark.parametrize(
+    "mask, connectivity, mode, absolute_distance, relative_distance, expected_result, exception",
+    [
+        # single component, unchanged
+        (
+            np.array([
+                [0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 0],
+                [0, 1, 1, 1, 0, 0],
+                [0, 1, 1, 1, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            8,
+            "edge",
+            2.0,
+            None,
+            np.array([
+                [0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 0],
+                [0, 1, 1, 1, 0, 0],
+                [0, 1, 1, 1, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            DoesNotRaise(),
+        ),
+        # two components, edge distance 2, kept with abs=1
+        (
+            np.array([
+                [0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 1],
+                [0, 1, 1, 1, 0, 1],
+                [0, 1, 1, 1, 0, 1],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            8,
+            "edge",
+            2.0,
+            None,
+            np.array([
+                [0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 1],
+                [0, 1, 1, 1, 0, 1],
+                [0, 1, 1, 1, 0, 1],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            DoesNotRaise(),
+        ),
+        # centroid mode, far centroids, dropped with small relative threshold
+        (
+            np.array([
+                [1, 1, 1, 0, 0, 0],
+                [1, 1, 1, 0, 0, 0],
+                [1, 1, 1, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 1, 1, 1],
+                [0, 0, 0, 1, 1, 1],
+            ], dtype=bool),
+            8,
+            "centroid",
+            None,
+            0.3,  # diagonal ~8.49, threshold ~2.55, centroid gap ~4.24
+            np.array([
+                [1, 1, 1, 0, 0, 0],
+                [1, 1, 1, 0, 0, 0],
+                [1, 1, 1, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            DoesNotRaise(),
+        ),
+        # centroid mode, larger relative threshold, kept
+        (
+            np.array([
+                [1, 1, 1, 0, 0, 0],
+                [1, 1, 1, 0, 0, 0],
+                [1, 1, 1, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 1, 1, 1],
+                [0, 0, 0, 1, 1, 1],
+            ], dtype=bool),
+            8,
+            "centroid",
+            None,
+            0.6,  # diagonal ~8.49, threshold ~5.09, centroid gap ~4.24
+            np.array([
+                [1, 1, 1, 0, 0, 0],
+                [1, 1, 1, 0, 0, 0],
+                [1, 1, 1, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 1, 1, 1],
+                [0, 0, 0, 1, 1, 1],
+            ], dtype=bool),
+            DoesNotRaise(),
+        ),
+        # empty mask
+        (
+            np.zeros((4, 4), dtype=bool),
+            4,
+            "edge",
+            2.0,
+            None,
+            np.zeros((4, 4), dtype=bool),
+            DoesNotRaise(),
+        ),
+        # full mask
+        (
+            np.ones((4, 4), dtype=bool),
+            8,
+            "centroid",
+            None,
+            0.2,
+            np.ones((4, 4), dtype=bool),
+            DoesNotRaise(),
+        ),
+        # two components, pixel distance = 2, kept with abs=2
+        (
+            np.array([
+                [0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 1, 1, 1],
+                [0, 1, 1, 1, 0, 1, 1, 1],
+                [0, 1, 1, 1, 0, 1, 1, 1],
+                [0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            8,
+            "edge",
+            2.0,  # was 1.0
+            None,
+            np.array([
+                [0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 1, 1, 1],
+                [0, 1, 1, 1, 0, 1, 1, 1],
+                [0, 1, 1, 1, 0, 1, 1, 1],
+                [0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            DoesNotRaise(),
+        ),
+
+        # two components, pixel distance = 3, dropped with abs=2
+        (
+            np.array([
+                [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 0, 0, 1, 1],
+                [0, 1, 1, 1, 0, 0, 0, 1, 1],
+                [0, 1, 1, 1, 0, 0, 0, 1, 1],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            8,
+            "edge",
+            2.0,  # keep threshold below 3 so the right blob is removed
+            None,
+            np.array([
+                [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 0, 0, 0, 0],
+                [0, 1, 1, 1, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0],
+            ], dtype=bool),
+            DoesNotRaise(),
+        ),
+    ]
+)
+def test_filter_segments_by_distance_sweep(
+    mask: npt.NDArray,
+    connectivity: int,
+    mode: str,
+    absolute_distance: float | None,
+    relative_distance: float | None,
+    expected_result: npt.NDArray | None,
+    exception: Exception,
+) -> None:
+    with exception:
+        result = filter_segments_by_distance(
+            mask=mask,
+            connectivity=connectivity,
+            mode=mode,  # type: ignore[arg-type]
+            absolute_distance=absolute_distance,
+            relative_distance=relative_distance,
+        )
+        assert np.array_equal(result, expected_result)

From 9340cb962b5f2735a948ee7a07c72bb2cb4c9488 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 5 Nov 2025 19:22:56 +0000
Subject: [PATCH 074/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py              |   2 +-
 supervision/detection/utils/masks.py |   2 +-
 test/detection/utils/test_masks.py   | 234 +++++++++++++++------------
 3 files changed, 137 insertions(+), 101 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 15ebf2c796..ccd2729308 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -87,8 +87,8 @@
     calculate_masks_centroids,
     contains_holes,
     contains_multiple_segments,
-    move_masks,
     filter_segments_by_distance,
+    move_masks,
 )
 from supervision.detection.utils.polygons import (
     approximate_polygon,
diff --git a/supervision/detection/utils/masks.py b/supervision/detection/utils/masks.py
index 119439508b..49b403116f 100644
--- a/supervision/detection/utils/masks.py
+++ b/supervision/detection/utils/masks.py
@@ -396,4 +396,4 @@ def filter_segments_by_distance(
     else:
         raise ValueError("mode must be 'edge' or 'centroid'")
 
-    return keep_labels[labels]
\ No newline at end of file
+    return keep_labels[labels]
diff --git a/test/detection/utils/test_masks.py b/test/detection/utils/test_masks.py
index 287dade77d..3c61e40907 100644
--- a/test/detection/utils/test_masks.py
+++ b/test/detection/utils/test_masks.py
@@ -10,7 +10,8 @@
     calculate_masks_centroids,
     contains_holes,
     contains_multiple_segments,
-    move_masks, filter_segments_by_distance,
+    filter_segments_by_distance,
+    move_masks,
 )
 
 
@@ -507,98 +508,122 @@ def test_contains_multiple_segments(
     [
         # single component, unchanged
         (
-            np.array([
-                [0, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 0],
-                [0, 1, 1, 1, 0, 0],
-                [0, 1, 1, 1, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 0],
+                    [0, 1, 1, 1, 0, 0],
+                    [0, 1, 1, 1, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             8,
             "edge",
             2.0,
             None,
-            np.array([
-                [0, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 0],
-                [0, 1, 1, 1, 0, 0],
-                [0, 1, 1, 1, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 0],
+                    [0, 1, 1, 1, 0, 0],
+                    [0, 1, 1, 1, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             DoesNotRaise(),
         ),
         # two components, edge distance 2, kept with abs=1
         (
-            np.array([
-                [0, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 1],
-                [0, 1, 1, 1, 0, 1],
-                [0, 1, 1, 1, 0, 1],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 1],
+                    [0, 1, 1, 1, 0, 1],
+                    [0, 1, 1, 1, 0, 1],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             8,
             "edge",
             2.0,
             None,
-            np.array([
-                [0, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 1],
-                [0, 1, 1, 1, 0, 1],
-                [0, 1, 1, 1, 0, 1],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 1],
+                    [0, 1, 1, 1, 0, 1],
+                    [0, 1, 1, 1, 0, 1],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             DoesNotRaise(),
         ),
         # centroid mode, far centroids, dropped with small relative threshold
         (
-            np.array([
-                [1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 1, 1, 1],
-                [0, 0, 0, 1, 1, 1],
-            ], dtype=bool),
+            np.array(
+                [
+                    [1, 1, 1, 0, 0, 0],
+                    [1, 1, 1, 0, 0, 0],
+                    [1, 1, 1, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 1, 1, 1],
+                    [0, 0, 0, 1, 1, 1],
+                ],
+                dtype=bool,
+            ),
             8,
             "centroid",
             None,
             0.3,  # diagonal ~8.49, threshold ~2.55, centroid gap ~4.24
-            np.array([
-                [1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [1, 1, 1, 0, 0, 0],
+                    [1, 1, 1, 0, 0, 0],
+                    [1, 1, 1, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             DoesNotRaise(),
         ),
         # centroid mode, larger relative threshold, kept
         (
-            np.array([
-                [1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 1, 1, 1],
-                [0, 0, 0, 1, 1, 1],
-            ], dtype=bool),
+            np.array(
+                [
+                    [1, 1, 1, 0, 0, 0],
+                    [1, 1, 1, 0, 0, 0],
+                    [1, 1, 1, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 1, 1, 1],
+                    [0, 0, 0, 1, 1, 1],
+                ],
+                dtype=bool,
+            ),
             8,
             "centroid",
             None,
             0.6,  # diagonal ~8.49, threshold ~5.09, centroid gap ~4.24
-            np.array([
-                [1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 0, 0, 0],
-                [1, 1, 1, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 1, 1, 1],
-                [0, 0, 0, 1, 1, 1],
-            ], dtype=bool),
+            np.array(
+                [
+                    [1, 1, 1, 0, 0, 0],
+                    [1, 1, 1, 0, 0, 0],
+                    [1, 1, 1, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 1, 1, 1],
+                    [0, 0, 0, 1, 1, 1],
+                ],
+                dtype=bool,
+            ),
             DoesNotRaise(),
         ),
         # empty mask
@@ -623,54 +648,65 @@ def test_contains_multiple_segments(
         ),
         # two components, pixel distance = 2, kept with abs=2
         (
-            np.array([
-                [0, 0, 0, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 1, 1, 1],
-                [0, 1, 1, 1, 0, 1, 1, 1],
-                [0, 1, 1, 1, 0, 1, 1, 1],
-                [0, 0, 0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 1, 1, 1],
+                    [0, 1, 1, 1, 0, 1, 1, 1],
+                    [0, 1, 1, 1, 0, 1, 1, 1],
+                    [0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             8,
             "edge",
             2.0,  # was 1.0
             None,
-            np.array([
-                [0, 0, 0, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 1, 1, 1],
-                [0, 1, 1, 1, 0, 1, 1, 1],
-                [0, 1, 1, 1, 0, 1, 1, 1],
-                [0, 0, 0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 1, 1, 1],
+                    [0, 1, 1, 1, 0, 1, 1, 1],
+                    [0, 1, 1, 1, 0, 1, 1, 1],
+                    [0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             DoesNotRaise(),
         ),
-
         # two components, pixel distance = 3, dropped with abs=2
         (
-            np.array([
-                [0, 0, 0, 0, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 0, 0, 1, 1],
-                [0, 1, 1, 1, 0, 0, 0, 1, 1],
-                [0, 1, 1, 1, 0, 0, 0, 1, 1],
-                [0, 0, 0, 0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 0, 0, 1, 1],
+                    [0, 1, 1, 1, 0, 0, 0, 1, 1],
+                    [0, 1, 1, 1, 0, 0, 0, 1, 1],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             8,
             "edge",
             2.0,  # keep threshold below 3 so the right blob is removed
             None,
-            np.array([
-                [0, 0, 0, 0, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 0, 0, 0, 0],
-                [0, 1, 1, 1, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0, 0, 0, 0],
-                [0, 0, 0, 0, 0, 0, 0, 0, 0],
-            ], dtype=bool),
+            np.array(
+                [
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 0, 0, 0, 0],
+                    [0, 1, 1, 1, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                    [0, 0, 0, 0, 0, 0, 0, 0, 0],
+                ],
+                dtype=bool,
+            ),
             DoesNotRaise(),
         ),
-    ]
+    ],
 )
 def test_filter_segments_by_distance_sweep(
     mask: npt.NDArray,

From 4c3d51f186ad3c10f7b469169e3c61a6b540bc9b Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 20:28:55 +0100
Subject: [PATCH 075/124] making flake8 happy

---
 supervision/detection/utils/masks.py | 7 ++++---
 test/detection/utils/test_masks.py   | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/supervision/detection/utils/masks.py b/supervision/detection/utils/masks.py
index 49b403116f..35896f20c6 100644
--- a/supervision/detection/utils/masks.py
+++ b/supervision/detection/utils/masks.py
@@ -272,7 +272,8 @@ def filter_segments_by_distance(
     mode: Literal["edge", "centroid"] = "edge",
 ) -> npt.NDArray[np.bool_]:
     """
-    Keep the largest connected component and any other components within a distance threshold.
+    Keep the largest connected component and any other components within a distance
+    threshold.
 
     Distance can be absolute in pixels or relative to the image diagonal.
 
@@ -345,9 +346,9 @@ def filter_segments_by_distance(
         # ], dtype=bool)
 
         # The nearby 2×2 block at columns 6–7 is kept because its edge distance
-        # is within 2 pixels. The distant block at columns 9–10 is removed.
+        # is within 2 pixels. The distant block at columns 9-10 is removed.
         ```
-    """
+    """  # noqa E501 // docs
     if mask.dtype != bool:
         raise TypeError("mask must be boolean")
 
diff --git a/test/detection/utils/test_masks.py b/test/detection/utils/test_masks.py
index 3c61e40907..c23893ada9 100644
--- a/test/detection/utils/test_masks.py
+++ b/test/detection/utils/test_masks.py
@@ -503,7 +503,7 @@ def test_contains_multiple_segments(
         assert result == expected_result
 
 
-@pytest.mark.parametrize(
+@pytest.mark.parametrize( # noqa: E501
     "mask, connectivity, mode, absolute_distance, relative_distance, expected_result, exception",
     [
         # single component, unchanged

From 1fef946a19a385061abd45326eaf5188ee15150f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 5 Nov 2025 19:29:17 +0000
Subject: [PATCH 076/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/detection/utils/test_masks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/detection/utils/test_masks.py b/test/detection/utils/test_masks.py
index c23893ada9..3c61e40907 100644
--- a/test/detection/utils/test_masks.py
+++ b/test/detection/utils/test_masks.py
@@ -503,7 +503,7 @@ def test_contains_multiple_segments(
         assert result == expected_result
 
 
-@pytest.mark.parametrize( # noqa: E501
+@pytest.mark.parametrize(
     "mask, connectivity, mode, absolute_distance, relative_distance, expected_result, exception",
     [
         # single component, unchanged

From 65f6e418a1a136e9e01d87fca4d9da9e8596b5b1 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 20:34:22 +0100
Subject: [PATCH 077/124] making flake8 happy

---
 test/detection/utils/test_masks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/detection/utils/test_masks.py b/test/detection/utils/test_masks.py
index 3c61e40907..b41f208edb 100644
--- a/test/detection/utils/test_masks.py
+++ b/test/detection/utils/test_masks.py
@@ -504,7 +504,7 @@ def test_contains_multiple_segments(
 
 
 @pytest.mark.parametrize(
-    "mask, connectivity, mode, absolute_distance, relative_distance, expected_result, exception",
+    "mask, connectivity, mode, absolute_distance, relative_distance, expected_result, exception",  # noqa: E501
     [
         # single component, unchanged
         (

From 52be9dc81808f09023ed7579f03704a26b4a6913 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 21:19:33 +0100
Subject: [PATCH 078/124] fix `filter_segments_by_distance` docstring

---
 supervision/detection/utils/masks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/supervision/detection/utils/masks.py b/supervision/detection/utils/masks.py
index 35896f20c6..04502dba05 100644
--- a/supervision/detection/utils/masks.py
+++ b/supervision/detection/utils/masks.py
@@ -303,7 +303,7 @@ def filter_segments_by_distance(
     Returns:
         Boolean mask after filtering.
 
-        Examples:
+    Examples:
         ```python
         import numpy as np
         import supervision as sv

From b1b69ad97224d449bd1d485af473231da0ff6f28 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Wed, 5 Nov 2025 23:53:36 +0100
Subject: [PATCH 079/124] bump version from `0.27.0rc3` to `0.27.0rc4`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 799e71d24d..5ba402aab8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.27.0rc3"
+version = "0.27.0rc4"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [

From ceefe0bab2e96c84debd24a1b2a1a234e72817fb Mon Sep 17 00:00:00 2001
From: AnonymDevOSS <rafel.bennasar+github@proton.me>
Date: Thu, 6 Nov 2025 22:52:25 +0100
Subject: [PATCH 080/124] feat/ speed up box iou - replaced original function;
 added tests

---
 supervision/detection/utils/iou_and_nms.py |  87 ----------------
 test/detection/utils/functions.py          |  38 -------
 test/detection/utils/test_iou_and_nms.py   | 112 +++++++++++++++++++--
 test/test_utils.py                         |  36 +++++++
 4 files changed, 139 insertions(+), 134 deletions(-)
 delete mode 100644 test/detection/utils/functions.py

diff --git a/supervision/detection/utils/iou_and_nms.py b/supervision/detection/utils/iou_and_nms.py
index b55eea876f..299a61609d 100644
--- a/supervision/detection/utils/iou_and_nms.py
+++ b/supervision/detection/utils/iou_and_nms.py
@@ -172,93 +172,6 @@ def box_iou_batch(
             `shape = (N, M)` where `N` is number of true objects and
             `M` is number of detected objects.
 
-    Examples:
-        ```python
-        import numpy as np
-        import supervision as sv
-
-        boxes_true = np.array([
-            [100, 100, 200, 200],
-            [300, 300, 400, 400]
-        ])
-        boxes_detection = np.array([
-            [150, 150, 250, 250],
-            [320, 320, 420, 420]
-        ])
-
-        sv.box_iou_batch(boxes_true=boxes_true, boxes_detection=boxes_detection)
-        # array([
-        #     [0.14285714, 0.        ],
-        #     [0.        , 0.47058824]
-        # ])
-        ```
-    """
-
-    def box_area(box):
-        return (box[2] - box[0]) * (box[3] - box[1])
-
-    area_true = box_area(boxes_true.T)
-    area_detection = box_area(boxes_detection.T)
-
-    top_left = np.maximum(boxes_true[:, None, :2], boxes_detection[:, :2])
-    bottom_right = np.minimum(boxes_true[:, None, 2:], boxes_detection[:, 2:])
-
-    area_inter = np.prod(np.clip(bottom_right - top_left, a_min=0, a_max=None), 2)
-
-    if overlap_metric == OverlapMetric.IOU:
-        union_area = area_true[:, None] + area_detection - area_inter
-        ious = np.divide(
-            area_inter,
-            union_area,
-            out=np.zeros_like(area_inter, dtype=float),
-            where=union_area != 0,
-        )
-    elif overlap_metric == OverlapMetric.IOS:
-        small_area = np.minimum(area_true[:, None], area_detection)
-        ious = np.divide(
-            area_inter,
-            small_area,
-            out=np.zeros_like(area_inter, dtype=float),
-            where=small_area != 0,
-        )
-    else:
-        raise ValueError(
-            f"overlap_metric {overlap_metric} is not supported, "
-            "only 'IOU' and 'IOS' are supported"
-        )
-
-    ious = np.nan_to_num(ious)
-    return ious
-
-
-def box_iou_batch_alt(
-    boxes_true: np.ndarray,
-    boxes_detection: np.ndarray,
-    overlap_metric: OverlapMetric = OverlapMetric.IOU,
-) -> np.ndarray:
-    """
-    Compute Intersection over Union (IoU) of two sets of bounding boxes -
-        `boxes_true` and `boxes_detection`. Both sets
-        of boxes are expected to be in `(x_min, y_min, x_max, y_max)` format.
-
-    Note:
-        Use `box_iou` when computing IoU between two individual boxes.
-        For comparing multiple boxes (arrays of boxes), use `box_iou_batch` for better
-        performance.
-
-    Args:
-        boxes_true (np.ndarray): 2D `np.ndarray` representing ground-truth boxes.
-            `shape = (N, 4)` where `N` is number of true objects.
-        boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes.
-            `shape = (M, 4)` where `M` is number of detected objects.
-        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
-            between pairs of boxes (e.g., IoU, IoS).
-
-    Returns:
-        np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.
-            `shape = (N, M)` where `N` is number of true objects and
-            `M` is number of detected objects.
-
     Examples:
         ```python
         import numpy as np
diff --git a/test/detection/utils/functions.py b/test/detection/utils/functions.py
deleted file mode 100644
index 6b10dfa2fe..0000000000
--- a/test/detection/utils/functions.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import random
-
-import numpy as np
-
-
-def generate_boxes(
-    n: int,
-    W: int = 1920,
-    H: int = 1080,
-    min_size: int = 20,
-    max_size: int = 200,
-    seed: int | None = 1,
-):
-    """
-    Generate N valid bounding boxes of format [x_min, y_min, x_max, y_max].
-
-    Args:
-        n (int): Number of boexs to generate
-        W (int): Image width
-        H (int): Image height
-        min_size (int): Minimum box size (width/height)
-        max_size (int): Maximum box size (width/height)
-        seed (int | None): Random seed for reproducibility
-
-    Returns:
-        list[list[float]] | np.ndarray: List of boxes
-    """
-    random.seed(seed)
-    boxes = []
-    for _ in range(n):
-        w = random.uniform(min_size, max_size)
-        h = random.uniform(min_size, max_size)
-        x1 = random.uniform(0, W - w)
-        y1 = random.uniform(0, H - h)
-        x2 = x1 + w
-        y2 = y1 + h
-        boxes.append([x1, y1, x2, y2])
-    return np.array(boxes, dtype=np.float32)
diff --git a/test/detection/utils/test_iou_and_nms.py b/test/detection/utils/test_iou_and_nms.py
index 87fd958ad8..6536d29c08 100644
--- a/test/detection/utils/test_iou_and_nms.py
+++ b/test/detection/utils/test_iou_and_nms.py
@@ -6,14 +6,15 @@
 import pytest
 
 from supervision.detection.utils.iou_and_nms import (
+    OverlapMetric,
     _group_overlapping_boxes,
+    box_iou,
     box_iou_batch,
-    box_iou_batch_alt,
     box_non_max_suppression,
     mask_non_max_merge,
     mask_non_max_suppression,
 )
-from test.detection.utils.functions import generate_boxes
+from test.test_utils import mock_boxes
 
 
 @pytest.mark.parametrize(
@@ -636,12 +637,105 @@ def test_mask_non_max_merge(
         assert sorted_result == sorted_expected_result
 
 
-def test_box_iou_batch_and_alt_equivalence():
-    boxes_true = generate_boxes(20, seed=1)
-    boxes_detection = generate_boxes(30, seed=2)
+@pytest.mark.parametrize(
+    "boxes_true, boxes_detection, expected_iou, exception",
+    [
+        (
+            np.empty((0, 4), dtype=np.float32),
+            np.empty((0, 4), dtype=np.float32),
+            np.empty((0, 0), dtype=np.float32),
+            DoesNotRaise(),
+        ),  # empty
+        (
+            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.empty((0, 4), dtype=np.float32),
+            np.empty((1, 0), dtype=np.float32),
+            DoesNotRaise(),
+        ),  # one true box, no detections
+        (
+            np.empty((0, 4), dtype=np.float32),
+            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.empty((0, 1), dtype=np.float32),
+            DoesNotRaise(),
+        ),  # no true boxes, one detection
+        (
+            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.array([[1.0]]),
+            DoesNotRaise(),
+        ),  # perfect overlap
+        (
+            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.array([[20, 20, 30, 30]], dtype=np.float32),
+            np.array([[0.0]]),
+            DoesNotRaise(),
+        ),  # no overlap
+        (
+            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.array([[5, 5, 15, 15]], dtype=np.float32),
+            np.array([[25.0 / 175.0]]),  # intersection: 5x5=25, union: 100+100-25=175
+            DoesNotRaise(),
+        ),  # partial overlap
+        (
+            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.array([[0, 0, 5, 5]], dtype=np.float32),
+            np.array([[25.0 / 100.0]]),  # intersection: 5x5=25, union: 100
+            DoesNotRaise(),
+        ),  # detection inside true box
+        (
+            np.array([[0, 0, 5, 5]], dtype=np.float32),
+            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.array([[25.0 / 100.0]]),  # true box inside detection
+            DoesNotRaise(),
+        ),
+        (
+            np.array([[0, 0, 10, 10], [20, 20, 30, 30]], dtype=np.float32),
+            np.array([[0, 0, 10, 10], [20, 20, 30, 30]], dtype=np.float32),
+            np.array([[1.0, 0.0], [0.0, 1.0]]),
+            DoesNotRaise(),
+        ),  # two boxes, perfect matches
+    ],
+)
+def test_box_iou_batch(
+    boxes_true: np.ndarray,
+    boxes_detection: np.ndarray,
+    expected_iou: np.ndarray,
+    exception: Exception,
+) -> None:
+    with exception:
+        result = box_iou_batch(boxes_true, boxes_detection)
+        assert result.shape == expected_iou.shape
+        assert np.allclose(result, expected_iou, rtol=1e-5, atol=1e-5)
+
+
+def test_box_iou_batch_consistency_with_box_iou():
+    """Test that box_iou_batch gives same results as box_iou for single boxes."""
+    boxes_true = np.array(mock_boxes(5, seed=1), dtype=np.float32)
+    boxes_detection = np.array(mock_boxes(5, seed=2), dtype=np.float32)
+
+    batch_result = box_iou_batch(boxes_true, boxes_detection)
+
+    for i, box_true in enumerate(boxes_true):
+        for j, box_detection in enumerate(boxes_detection):
+            single_result = box_iou(box_true, box_detection)
+            assert np.allclose(
+                batch_result[i, j], single_result, rtol=1e-5, atol=1e-5
+            )
+
+
+def test_box_iou_batch_with_mock_detections():
+    """ Test box_iou_batch with generated boxes and verify results are valid. """
+    boxes_true = np.array(mock_boxes(10, seed=1), dtype=np.float32)
+    boxes_detection = np.array(mock_boxes(15, seed=2), dtype=np.float32)
 
-    iou_a = box_iou_batch(boxes_true, boxes_detection)
-    iou_b = box_iou_batch_alt(boxes_true, boxes_detection)
+    result = box_iou_batch(boxes_true, boxes_detection)
 
-    assert iou_a.shape == iou_b.shape
-    assert np.allclose(iou_a, iou_b, rtol=1e-6, atol=1e-6)
+    assert result.shape == (10, 15)
+    
+    assert np.all(result >= 0)
+    assert np.all(result <= 1.0)
+    
+    # and symetric
+    result_reversed = box_iou_batch(boxes_detection, boxes_true)
+    assert result_reversed.shape == (15, 10)
+    assert np.allclose(result.T, result_reversed, rtol=1e-5, atol=1e-5)
diff --git a/test/test_utils.py b/test/test_utils.py
index 0a97bf4bff..e512de6f62 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import random
 from typing import Any
 
 import numpy as np
@@ -52,5 +53,40 @@ def convert_data(data: dict[str, list[Any]]):
     )
 
 
+def mock_boxes(
+    n: int,
+    resolution_wh: tuple[int, int] = (1920, 1080),
+    min_size: int = 20,
+    max_size: int = 200,
+    seed: int | None = None,
+) -> list[list[float]]:
+    """
+    Generate N valid bounding boxes of format [x_min, y_min, x_max, y_max].
+
+    Args:
+        n: Number of boxes to generate.
+        resolution_wh: Image resolution as (width, height). Defaults to (1920, 1080).
+        min_size: Minimum box size (width/height). Defaults to 20.
+        max_size: Maximum box size (width/height). Defaults to 200.
+        seed: Random seed for reproducibility. Defaults to None.
+
+    Returns:
+        List of boxes, each as [x_min, y_min, x_max, y_max].
+    """
+    if seed is not None:
+        random.seed(seed)
+    width, height = resolution_wh
+    boxes = []
+    for _ in range(n):
+        w = random.uniform(min_size, max_size)
+        h = random.uniform(min_size, max_size)
+        x1 = random.uniform(0, width - w)
+        y1 = random.uniform(0, height - h)
+        x2 = x1 + w
+        y2 = y1 + h
+        boxes.append([x1, y1, x2, y2])
+    return boxes
+
+
 def assert_almost_equal(actual, expected, tolerance=1e-5):
     assert abs(actual - expected) < tolerance, f"Expected {expected}, but got {actual}."

From da7c5cd65ab58915793f6c50081d53a3cbeaf956 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 6 Nov 2025 21:57:23 +0000
Subject: [PATCH 081/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/detection/utils/test_iou_and_nms.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/test/detection/utils/test_iou_and_nms.py b/test/detection/utils/test_iou_and_nms.py
index 6536d29c08..765bc5a28c 100644
--- a/test/detection/utils/test_iou_and_nms.py
+++ b/test/detection/utils/test_iou_and_nms.py
@@ -6,7 +6,6 @@
 import pytest
 
 from supervision.detection.utils.iou_and_nms import (
-    OverlapMetric,
     _group_overlapping_boxes,
     box_iou,
     box_iou_batch,
@@ -718,23 +717,21 @@ def test_box_iou_batch_consistency_with_box_iou():
     for i, box_true in enumerate(boxes_true):
         for j, box_detection in enumerate(boxes_detection):
             single_result = box_iou(box_true, box_detection)
-            assert np.allclose(
-                batch_result[i, j], single_result, rtol=1e-5, atol=1e-5
-            )
+            assert np.allclose(batch_result[i, j], single_result, rtol=1e-5, atol=1e-5)
 
 
 def test_box_iou_batch_with_mock_detections():
-    """ Test box_iou_batch with generated boxes and verify results are valid. """
+    """Test box_iou_batch with generated boxes and verify results are valid."""
     boxes_true = np.array(mock_boxes(10, seed=1), dtype=np.float32)
     boxes_detection = np.array(mock_boxes(15, seed=2), dtype=np.float32)
 
     result = box_iou_batch(boxes_true, boxes_detection)
 
     assert result.shape == (10, 15)
-    
+
     assert np.all(result >= 0)
     assert np.all(result <= 1.0)
-    
+
     # and symetric
     result_reversed = box_iou_batch(boxes_detection, boxes_true)
     assert result_reversed.shape == (15, 10)

From bb87ab0a6ed6457f73ccc3fd4f5aeb1bb635563c Mon Sep 17 00:00:00 2001
From: AnonymDevOSS <rafel.bennasar+github@proton.me>
Date: Thu, 6 Nov 2025 23:00:33 +0100
Subject: [PATCH 082/124] fixed autoformat errors

---
 test/detection/utils/test_iou_and_nms.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/test/detection/utils/test_iou_and_nms.py b/test/detection/utils/test_iou_and_nms.py
index 6536d29c08..29c11ddedf 100644
--- a/test/detection/utils/test_iou_and_nms.py
+++ b/test/detection/utils/test_iou_and_nms.py
@@ -6,7 +6,6 @@
 import pytest
 
 from supervision.detection.utils.iou_and_nms import (
-    OverlapMetric,
     _group_overlapping_boxes,
     box_iou,
     box_iou_batch,
@@ -718,24 +717,22 @@ def test_box_iou_batch_consistency_with_box_iou():
     for i, box_true in enumerate(boxes_true):
         for j, box_detection in enumerate(boxes_detection):
             single_result = box_iou(box_true, box_detection)
-            assert np.allclose(
-                batch_result[i, j], single_result, rtol=1e-5, atol=1e-5
-            )
+            assert np.allclose(batch_result[i, j], single_result, rtol=1e-5, atol=1e-5)
 
 
 def test_box_iou_batch_with_mock_detections():
-    """ Test box_iou_batch with generated boxes and verify results are valid. """
+    """Test box_iou_batch with generated boxes and verify results are valid."""
     boxes_true = np.array(mock_boxes(10, seed=1), dtype=np.float32)
     boxes_detection = np.array(mock_boxes(15, seed=2), dtype=np.float32)
 
     result = box_iou_batch(boxes_true, boxes_detection)
 
     assert result.shape == (10, 15)
-    
+
     assert np.all(result >= 0)
     assert np.all(result <= 1.0)
-    
-    # and symetric
+
+    # and symmetric
     result_reversed = box_iou_batch(boxes_detection, boxes_true)
     assert result_reversed.shape == (15, 10)
     assert np.allclose(result.T, result_reversed, rtol=1e-5, atol=1e-5)

From 9ad850af46142f9b71e9cc8990df71bf712a5755 Mon Sep 17 00:00:00 2001
From: jackiehimel <jacqueline.himel@vanderbilt.edu>
Date: Wed, 12 Nov 2025 20:58:25 -0500
Subject: [PATCH 083/124] fix: correct numpy indexing in denormalize_boxes and
 add ultralytics validation

- Fix denormalize_boxes numpy indexing bug that caused IndexError with 3+ boxes
- Add validation for missing boxes attribute in from_ultralytics
- Add comprehensive test coverage (11 new tests)

Fixes #1959
Fixes #2000
---
 supervision/detection/core.py        | 27 ++++----
 supervision/detection/utils/boxes.py |  4 +-
 test/detection/test_core.py          | 80 ++++++++++++++++++++++++
 test/detection/utils/test_boxes.py   | 92 +++++++++++++++++++++++++++-
 4 files changed, 188 insertions(+), 15 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index ffe5ed3fca..ae6205dfbf 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -296,18 +296,21 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
                 class_id=np.arange(len(ultralytics_results)),
             )
 
-        class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
-        class_names = np.array([ultralytics_results.names[i] for i in class_id])
-        return cls(
-            xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
-            confidence=ultralytics_results.boxes.conf.cpu().numpy(),
-            class_id=class_id,
-            mask=extract_ultralytics_masks(ultralytics_results),
-            tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()
-            if ultralytics_results.boxes.id is not None
-            else None,
-            data={CLASS_NAME_DATA_FIELD: class_names},
-        )
+        if hasattr(ultralytics_results, "boxes") and ultralytics_results.boxes is not None:
+            class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
+            class_names = np.array([ultralytics_results.names[i] for i in class_id])
+            return cls(
+                xyxy=ultralytics_results.boxes.xyxy.cpu().numpy(),
+                confidence=ultralytics_results.boxes.conf.cpu().numpy(),
+                class_id=class_id,
+                mask=extract_ultralytics_masks(ultralytics_results),
+                tracker_id=ultralytics_results.boxes.id.int().cpu().numpy()
+                if ultralytics_results.boxes.id is not None
+                else None,
+                data={CLASS_NAME_DATA_FIELD: class_names},
+            )
+
+        return cls.empty()
 
     @classmethod
     def from_yolo_nas(cls, yolo_nas_results) -> Detections:
diff --git a/supervision/detection/utils/boxes.py b/supervision/detection/utils/boxes.py
index 3b01fcb68b..60542a43a9 100644
--- a/supervision/detection/utils/boxes.py
+++ b/supervision/detection/utils/boxes.py
@@ -147,8 +147,8 @@ def denormalize_boxes(
     width, height = resolution_wh
     result = normalized_xyxy.copy()
 
-    result[[0, 2]] = (result[[0, 2]] * width) / normalization_factor
-    result[[1, 3]] = (result[[1, 3]] * height) / normalization_factor
+    result[:, [0, 2]] = (result[:, [0, 2]] * width) / normalization_factor
+    result[:, [1, 3]] = (result[:, [1, 3]] * height) / normalization_factor
 
     return result
 
diff --git a/test/detection/test_core.py b/test/detection/test_core.py
index c57bea4098..1542f0a35c 100644
--- a/test/detection/test_core.py
+++ b/test/detection/test_core.py
@@ -815,3 +815,83 @@ def test_merge_inner_detection_object_pair(
     with exception:
         result = merge_inner_detection_object_pair(detection_1, detection_2)
         assert result == expected_result
+
+class TestFromUltralytics:
+    """Test suite for Detections.from_ultralytics method."""
+
+    def test_from_ultralytics_with_missing_boxes_attribute(self):
+        """Test that from_ultralytics handles missing boxes attribute gracefully.
+
+        Regression test for issue #2000.
+        """
+        # Create a mock ultralytics result without boxes attribute
+        class MockUltralyticsResult:
+            def __init__(self):
+                self.names = {0: "class1", 1: "class2"}
+                # Intentionally not setting 'boxes' or 'obb' attribute
+
+        mock_result = MockUltralyticsResult()
+        detections = Detections.from_ultralytics(mock_result)
+
+        # Should return empty detections instead of crashing
+        assert len(detections) == 0
+        assert detections.xyxy.shape == (0, 4)
+
+    def test_from_ultralytics_with_boxes_none(self):
+        """Test that from_ultralytics handles boxes=None (segmentation-only models)."""
+        # Create a mock ultralytics result with boxes=None
+        class MockUltralyticsResult:
+            def __init__(self):
+                self.boxes = None
+                self.names = {0: "class1"}
+                # Mock masks attribute for segmentation
+                self.masks = None
+
+        mock_result = MockUltralyticsResult()
+        # This should handle the segmentation-only case
+        # Note: Will fail if masks are not properly set, but that's expected behavior
+        try:
+            _ = Detections.from_ultralytics(mock_result)
+            # If masks are properly implemented, this should work
+        except (AttributeError, TypeError):
+            # Expected if masks aren't properly mocked
+            pass
+
+    def test_from_ultralytics_with_valid_boxes(self):
+        """Test that from_ultralytics works correctly with valid boxes."""
+        # Create a mock ultralytics result with valid boxes
+        class MockBoxes:
+            def __init__(self):
+                self.cls = self._MockTensor([0, 1])
+                self.xyxy = self._MockTensor([[10, 20, 30, 40], [50, 60, 70, 80]])
+                self.conf = self._MockTensor([0.9, 0.8])
+                self.id = None
+
+            class _MockTensor:
+                def __init__(self, data):
+                    self.data = np.array(data)
+
+                def cpu(self):
+                    return self
+
+                def numpy(self):
+                    return self.data
+
+                def astype(self, dtype):
+                    return self.data.astype(dtype)
+
+        class MockUltralyticsResult:
+            def __init__(self):
+                self.boxes = MockBoxes()
+                self.names = {0: "person", 1: "car"}
+                self.masks = None
+
+        mock_result = MockUltralyticsResult()
+        detections = Detections.from_ultralytics(mock_result)
+
+        assert len(detections) == 2
+        assert np.array_equal(
+            detections.xyxy, np.array([[10, 20, 30, 40], [50, 60, 70, 80]])
+        )
+        assert np.array_equal(detections.confidence, np.array([0.9, 0.8]))
+        assert np.array_equal(detections.class_id, np.array([0, 1]))
diff --git a/test/detection/utils/test_boxes.py b/test/detection/utils/test_boxes.py
index 919989287a..b27dd87d1e 100644
--- a/test/detection/utils/test_boxes.py
+++ b/test/detection/utils/test_boxes.py
@@ -5,7 +5,12 @@
 import numpy as np
 import pytest
 
-from supervision.detection.utils.boxes import clip_boxes, move_boxes, scale_boxes
+from supervision.detection.utils.boxes import (
+    clip_boxes,
+    denormalize_boxes,
+    move_boxes,
+    scale_boxes,
+)
 
 
 @pytest.mark.parametrize(
@@ -142,3 +147,88 @@ def test_scale_boxes(
     with exception:
         result = scale_boxes(xyxy=xyxy, factor=factor)
         assert np.array_equal(result, expected_result)
+
+
+@pytest.mark.parametrize(
+    "normalized_xyxy, resolution_wh, normalization_factor, expected_result, exception",
+    [
+        (
+            np.empty(shape=(0, 4)),
+            (1280, 720),
+            1.0,
+            np.empty(shape=(0, 4)),
+            DoesNotRaise(),
+        ),  # empty array
+        (
+            np.array([[0.1, 0.2, 0.5, 0.6]]),
+            (1280, 720),
+            1.0,
+            np.array([[128.0, 144.0, 640.0, 432.0]]),
+            DoesNotRaise(),
+        ),  # single box with default normalization
+        (
+            np.array([[0.1, 0.2, 0.5, 0.6], [0.3, 0.4, 0.7, 0.8]]),
+            (1280, 720),
+            1.0,
+            np.array([[128.0, 144.0, 640.0, 432.0], [384.0, 288.0, 896.0, 576.0]]),
+            DoesNotRaise(),
+        ),  # two boxes with default normalization
+        (
+            np.array(
+                [[0.1, 0.2, 0.5, 0.6], [0.3, 0.4, 0.7, 0.8], [0.2, 0.1, 0.6, 0.5]]
+            ),
+            (1280, 720),
+            1.0,
+            np.array(
+                [
+                    [128.0, 144.0, 640.0, 432.0],
+                    [384.0, 288.0, 896.0, 576.0],
+                    [256.0, 72.0, 768.0, 360.0],
+                ]
+            ),
+            DoesNotRaise(),
+        ),  # three boxes - regression test for issue #1959
+        (
+            np.array([[10.0, 20.0, 50.0, 60.0]]),
+            (100, 200),
+            100.0,
+            np.array([[10.0, 40.0, 50.0, 120.0]]),
+            DoesNotRaise(),
+        ),  # single box with custom normalization factor
+        (
+            np.array([[10.0, 20.0, 50.0, 60.0], [30.0, 40.0, 70.0, 80.0]]),
+            (100, 200),
+            100.0,
+            np.array([[10.0, 40.0, 50.0, 120.0], [30.0, 80.0, 70.0, 160.0]]),
+            DoesNotRaise(),
+        ),  # two boxes with custom normalization factor
+        (
+            np.array([[0.0, 0.0, 1.0, 1.0]]),
+            (1920, 1080),
+            1.0,
+            np.array([[0.0, 0.0, 1920.0, 1080.0]]),
+            DoesNotRaise(),
+        ),  # full frame box
+        (
+            np.array([[0.5, 0.5, 0.5, 0.5]]),
+            (640, 480),
+            1.0,
+            np.array([[320.0, 240.0, 320.0, 240.0]]),
+            DoesNotRaise(),
+        ),  # zero-area box (point)
+    ],
+)
+def test_denormalize_boxes(
+    normalized_xyxy: np.ndarray,
+    resolution_wh: tuple[int, int],
+    normalization_factor: float,
+    expected_result: np.ndarray,
+    exception: Exception,
+) -> None:
+    with exception:
+        result = denormalize_boxes(
+            normalized_xyxy=normalized_xyxy,
+            resolution_wh=resolution_wh,
+            normalization_factor=normalization_factor,
+        )
+        assert np.allclose(result, expected_result)

From 61cca76acc479124bad10599878f44b6250aa896 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 13 Nov 2025 02:10:19 +0000
Subject: [PATCH 084/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 5 ++++-
 test/detection/test_core.py   | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index ae6205dfbf..bda2e7de33 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -296,7 +296,10 @@ def from_ultralytics(cls, ultralytics_results) -> Detections:
                 class_id=np.arange(len(ultralytics_results)),
             )
 
-        if hasattr(ultralytics_results, "boxes") and ultralytics_results.boxes is not None:
+        if (
+            hasattr(ultralytics_results, "boxes")
+            and ultralytics_results.boxes is not None
+        ):
             class_id = ultralytics_results.boxes.cls.cpu().numpy().astype(int)
             class_names = np.array([ultralytics_results.names[i] for i in class_id])
             return cls(
diff --git a/test/detection/test_core.py b/test/detection/test_core.py
index 1542f0a35c..bf27f209e9 100644
--- a/test/detection/test_core.py
+++ b/test/detection/test_core.py
@@ -816,6 +816,7 @@ def test_merge_inner_detection_object_pair(
         result = merge_inner_detection_object_pair(detection_1, detection_2)
         assert result == expected_result
 
+
 class TestFromUltralytics:
     """Test suite for Detections.from_ultralytics method."""
 
@@ -824,6 +825,7 @@ def test_from_ultralytics_with_missing_boxes_attribute(self):
 
         Regression test for issue #2000.
         """
+
         # Create a mock ultralytics result without boxes attribute
         class MockUltralyticsResult:
             def __init__(self):
@@ -839,6 +841,7 @@ def __init__(self):
 
     def test_from_ultralytics_with_boxes_none(self):
         """Test that from_ultralytics handles boxes=None (segmentation-only models)."""
+
         # Create a mock ultralytics result with boxes=None
         class MockUltralyticsResult:
             def __init__(self):
@@ -859,6 +862,7 @@ def __init__(self):
 
     def test_from_ultralytics_with_valid_boxes(self):
         """Test that from_ultralytics works correctly with valid boxes."""
+
         # Create a mock ultralytics result with valid boxes
         class MockBoxes:
             def __init__(self):

From f6d52b53db721a1d3f5e21f8fc000a2533a46c39 Mon Sep 17 00:00:00 2001
From: Dominik Winecki <dominikwinecki@gmail.com>
Date: Thu, 13 Nov 2025 13:48:16 -0500
Subject: [PATCH 085/124] docs: Update JSONSink example path

---
 docs/how_to/save_detections.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/how_to/save_detections.md b/docs/how_to/save_detections.md
index e9e1484942..c998dee7e3 100644
--- a/docs/how_to/save_detections.md
+++ b/docs/how_to/save_detections.md
@@ -234,7 +234,7 @@ with
     model = get_model(model_id="yolov8n-640")
     frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
 
-    with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
+    with sv.JSONSink(<TARGET_JSON_PATH>) as sink:
         for frame_index, frame in enumerate(frames_generator):
 
             results = model.infer(image)[0]
@@ -250,7 +250,7 @@ with
     model = YOLO("yolov8n.pt")
     frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
 
-    with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
+    with sv.JSONSink(<TARGET_JSON_PATH>) as sink:
         for frame_index, frame in enumerate(frames_generator):
 
             results = model(frame)[0]
@@ -268,7 +268,7 @@ with
     model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
     frames_generator = sv.get_video_frames_generator(<SOURCE_VIDEO_PATH>)
 
-    with sv.JSONSink(<TARGET_CSV_PATH>) as sink:
+    with sv.JSONSink(<TARGET_JSON_PATH>) as sink:
         for frame_index, frame in enumerate(frames_generator):
 
             frame = sv.cv2_to_pillow(frame)

From f716b7fa3b6459de5ea0d63377faa29141d43dd6 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 11:57:31 +0100
Subject: [PATCH 086/124] improve docstrings, examples, variable naming
 conventions and unit tests

---
 supervision/detection/utils/iou_and_nms.py | 189 ++++----
 test/detection/utils/test_iou_and_nms.py   | 521 ++++++++++++++++++---
 test/test_utils.py                         |  57 ++-
 3 files changed, 599 insertions(+), 168 deletions(-)

diff --git a/supervision/detection/utils/iou_and_nms.py b/supervision/detection/utils/iou_and_nms.py
index 299a61609d..f4c68a03d0 100644
--- a/supervision/detection/utils/iou_and_nms.py
+++ b/supervision/detection/utils/iou_and_nms.py
@@ -64,7 +64,7 @@ class OverlapMetric(Enum):
     IOS = "IOS"
 
     @classmethod
-    def list(cls):
+    def list(cls) -> list[str]:
         return list(map(lambda c: c.value, cls))
 
     @classmethod
@@ -72,7 +72,7 @@ def from_value(cls, value: OverlapMetric | str) -> OverlapMetric:
         if isinstance(value, cls):
             return value
         if isinstance(value, str):
-            value = value.lower()
+            value = value.upper()
             try:
                 return cls(value)
             except ValueError:
@@ -86,91 +86,107 @@ def from_value(cls, value: OverlapMetric | str) -> OverlapMetric:
 def box_iou(
     box_true: list[float] | np.ndarray,
     box_detection: list[float] | np.ndarray,
+    overlap_metric: OverlapMetric | str = OverlapMetric.IOU,
 ) -> float:
-    r"""
-    Compute the Intersection over Union (IoU) between two bounding boxes.
-
-    \[
-    \text{IoU} = \frac{|\text{box}_{\text{true}} \cap \text{box}_{\text{detection}}|}{|\text{box}_{\text{true}} \cup \text{box}_{\text{detection}}|}
-    \]
+    """
+    Compute overlap metric between two bounding boxes.
 
-    Note:
-        Use `box_iou` when computing IoU between two individual boxes.
-        For comparing multiple boxes (arrays of boxes), use `box_iou_batch` for better
-        performance.
+    Supports standard IOU (intersection-over-union) and IOS
+    (intersection-over-smaller-area) metrics. Returns the overlap value in range
+    `[0, 1]`.
 
     Args:
-        box_true (Union[List[float], np.ndarray]): A single bounding box represented as
-            [x_min, y_min, x_max, y_max].
-        box_detection (Union[List[float], np.ndarray]):
-            A single bounding box represented as [x_min, y_min, x_max, y_max].
+        box_true (`list[float]` or `numpy.array`): Ground truth box in format
+          `(x_min, y_min, x_max, y_max)`.
+        box_detection (`list[float]` or `numpy.array`): Detected box in format
+          `(x_min, y_min, x_max, y_max)`.
+        overlap_metric (`OverlapMetric` or `str`): Overlap type.
+          Use `OverlapMetric.IOU` for IOU or
+          `OverlapMetric.IOS` for IOS. Defaults to `OverlapMetric.IOU`.
 
     Returns:
-        IoU (float): IoU score between the two boxes. Ranges from 0.0 (no overlap)
-            to 1.0 (perfect overlap).
+        (`float`): Overlap value between boxes in `[0, 1]`.
+
+    Raises:
+        ValueError: If `overlap_metric` is not IOU or IOS.
 
     Examples:
-        ```python
-        import numpy as np
+        ```
         import supervision as sv
 
-        box_true = np.array([100, 100, 200, 200])
-        box_detection = np.array([150, 150, 250, 250])
+        box_true = [100, 100, 200, 200]
+        box_detection = [150, 150, 250, 250]
 
-        sv.box_iou(box_true=box_true, box_detection=box_detection)
-        # 0.14285814285714285
+        sv.box_iou(box_true, box_detection, overlap_metric=sv.OverlapMetric.IOU)
+        # 0.14285714285714285
+
+        sv.box_iou(box_true, box_detection, overlap_metric=sv.OverlapMetric.IOS)
+        # 0.25
         ```
-    """  # noqa: E501
-    box_true = np.array(box_true)
-    box_detection = np.array(box_detection)
+    """
+    overlap_metric = OverlapMetric.from_value(overlap_metric)
+    x_min_true, y_min_true, x_max_true, y_max_true = np.array(box_true)
+    x_min_det, y_min_det, x_max_det, y_max_det = np.array(box_detection)
 
-    inter_x1 = max(box_true[0], box_detection[0])
-    inter_y1 = max(box_true[1], box_detection[1])
-    inter_x2 = min(box_true[2], box_detection[2])
-    inter_y2 = min(box_true[3], box_detection[3])
+    x_min_inter = max(x_min_true, x_min_det)
+    y_min_inter = max(y_min_true, y_min_det)
+    x_max_inter = min(x_max_true, x_max_det)
+    y_max_inter = min(y_max_true, y_max_det)
 
-    inter_w = max(0, inter_x2 - inter_x1)
-    inter_h = max(0, inter_y2 - inter_y1)
+    inter_w = max(0.0, x_max_inter - x_min_inter)
+    inter_h = max(0.0, y_max_inter - y_min_inter)
 
-    inter_area = inter_w * inter_h
+    area_inter = inter_w * inter_h
 
-    area_true = (box_true[2] - box_true[0]) * (box_true[3] - box_true[1])
-    area_detection = (box_detection[2] - box_detection[0]) * (
-        box_detection[3] - box_detection[1]
-    )
+    area_true = (x_max_true - x_min_true) * (y_max_true - y_min_true)
+    area_det = (x_max_det - x_min_det) * (y_max_det - y_min_det)
 
-    union_area = area_true + area_detection - inter_area
+    if overlap_metric == OverlapMetric.IOU:
+        area_norm = area_true + area_det - area_inter
+    elif overlap_metric == OverlapMetric.IOS:
+        area_norm = min(area_true, area_det)
+    else:
+        raise ValueError(
+            f"overlap_metric {overlap_metric} is not supported, "
+            "only 'IOU' and 'IOS' are supported"
+        )
+
+    if area_norm <= 0.0:
+        return 0.0
 
-    return inter_area / union_area + 1e-6
+    return float(area_inter / area_norm)
 
 
 def box_iou_batch(
     boxes_true: np.ndarray,
     boxes_detection: np.ndarray,
-    overlap_metric: OverlapMetric = OverlapMetric.IOU,
+    overlap_metric: OverlapMetric | str = OverlapMetric.IOU,
 ) -> np.ndarray:
     """
-    Compute Intersection over Union (IoU) of two sets of bounding boxes -
-        `boxes_true` and `boxes_detection`. Both sets
-        of boxes are expected to be in `(x_min, y_min, x_max, y_max)` format.
+    Compute pairwise overlap scores between batches of bounding boxes.
 
-    Note:
-        Use `box_iou` when computing IoU between two individual boxes.
-        For comparing multiple boxes (arrays of boxes), use `box_iou_batch` for better
-        performance.
+    Supports standard IOU (intersection-over-union) and IOS
+    (intersection-over-smaller-area) metrics for all `boxes_true` and
+    `boxes_detection` pairs. Returns a matrix of overlap values in range
+    `[0, 1]`, matching each box from the first batch to each from the second.
 
     Args:
-        boxes_true (np.ndarray): 2D `np.ndarray` representing ground-truth boxes.
-            `shape = (N, 4)` where `N` is number of true objects.
-        boxes_detection (np.ndarray): 2D `np.ndarray` representing detection boxes.
-            `shape = (M, 4)` where `M` is number of detected objects.
-        overlap_metric (OverlapMetric): Metric used to compute the degree of overlap
-            between pairs of boxes (e.g., IoU, IoS).
+        boxes_true (`numpy.array`): Array of reference boxes in
+            shape `(N, 4)` as `(x_min, y_min, x_max, y_max)`.
+        boxes_detection (`numpy.array`): Array of detected boxes in
+            shape `(M, 4)` as `(x_min, y_min, x_max, y_max)`.
+        overlap_metric (`OverlapMetric` or `str`): Overlap type.
+            Use `OverlapMetric.IOU` for intersection-over-union,
+            `OverlapMetric.IOS` for intersection-over-smaller-area.
+            Defaults to `OverlapMetric.IOU`.
 
     Returns:
-        np.ndarray: Pairwise IoU of boxes from `boxes_true` and `boxes_detection`.
-            `shape = (N, M)` where `N` is number of true objects and
-            `M` is number of detected objects.
+        (`numpy.array`): Overlap matrix of shape `(N, M)`, where entry
+            `[i, j]` is the overlap between `boxes_true[i]` and
+            `boxes_detection[j]`.
+
+    Raises:
+        ValueError: If `overlap_metric` is not IOU or IOS.
 
     Examples:
         ```python
@@ -186,43 +202,48 @@ def box_iou_batch(
             [320, 320, 420, 420]
         ])
 
-        sv.box_iou_batch(boxes_true=boxes_true, boxes_detection=boxes_detection)
-        # array([
-        #     [0.14285714, 0.        ],
-        #     [0.        , 0.47058824]
-        # ])
-        ```
+        sv.box_iou_batch(boxes_true, boxes_detection, overlap_metric=OverlapMetric.IOU)
+        # array([[0.14285715, 0.        ],
+        #        [0.        , 0.47058824]])
 
+        sv.box_iou_batch(boxes_true, boxes_detection, overlap_metric=OverlapMetric.IOS)
+        # array([[0.25, 0.  ],
+        #        [0.  , 0.64]])
+        ```
     """
+    overlap_metric = OverlapMetric.from_value(overlap_metric)
+    x_min_true, y_min_true, x_max_true, y_max_true = boxes_true.T
+    x_min_det, y_min_det, x_max_det, y_max_det = boxes_detection.T
+    count_true, count_det = boxes_true.shape[0], boxes_detection.shape[0]
 
-    tx1, ty1, tx2, ty2 = boxes_true.T
-    dx1, dy1, dx2, dy2 = boxes_detection.T
-    N, M = boxes_true.shape[0], boxes_detection.shape[0]
+    if count_true == 0 or count_det == 0:
+        return np.empty((count_true, count_det), dtype=np.float32)
 
-    top_left_x = np.empty((N, M), dtype=np.float32)
-    bottom_right_x = np.empty_like(top_left_x)
-    top_left_y = np.empty_like(top_left_x)
-    bottom_right_y = np.empty_like(top_left_x)
+    x_min_inter = np.empty((count_true, count_det), dtype=np.float32)
+    x_max_inter = np.empty_like(x_min_inter)
+    y_min_inter = np.empty_like(x_min_inter)
+    y_max_inter = np.empty_like(x_min_inter)
 
-    np.maximum(tx1[:, None], dx1[None, :], out=top_left_x)
-    np.minimum(tx2[:, None], dx2[None, :], out=bottom_right_x)
-    np.maximum(ty1[:, None], dy1[None, :], out=top_left_y)
-    np.minimum(ty2[:, None], dy2[None, :], out=bottom_right_y)
+    np.maximum(x_min_true[:, None], x_min_det[None, :], out=x_min_inter)
+    np.minimum(x_max_true[:, None], x_max_det[None, :], out=x_max_inter)
+    np.maximum(y_min_true[:, None], y_min_det[None, :], out=y_min_inter)
+    np.minimum(y_max_true[:, None], y_max_det[None, :], out=y_max_inter)
 
-    np.subtract(bottom_right_x, top_left_x, out=bottom_right_x)  # W
-    np.subtract(bottom_right_y, top_left_y, out=bottom_right_y)  # H
-    np.clip(bottom_right_x, 0.0, None, out=bottom_right_x)
-    np.clip(bottom_right_y, 0.0, None, out=bottom_right_y)
+    # we reuse x_max_inter and y_max_inter to store inter_w and inter_h
+    np.subtract(x_max_inter, x_min_inter, out=x_max_inter) # inter_w
+    np.subtract(y_max_inter, y_min_inter, out=y_max_inter) # inter_h
+    np.clip(x_max_inter, 0.0, None, out=x_max_inter)
+    np.clip(y_max_inter, 0.0, None, out=y_max_inter)
 
-    area_inter = bottom_right_x * bottom_right_y
+    area_inter = x_max_inter * y_max_inter # inter_w * inter_h
 
-    area_true = (tx2 - tx1) * (ty2 - ty1)
-    area_detection = (dx2 - dx1) * (dy2 - dy1)
+    area_true = (x_max_true - x_min_true) * (y_max_true - y_min_true)
+    area_det = (x_max_det - x_min_det) * (y_max_det - y_min_det)
 
     if overlap_metric == OverlapMetric.IOU:
-        denom = area_true[:, None] + area_detection[None, :] - area_inter
+        area_norm = area_true[:, None] + area_det[None, :] - area_inter
     elif overlap_metric == OverlapMetric.IOS:
-        denom = np.minimum(area_true[:, None], area_detection[None, :])
+        area_norm = np.minimum(area_true[:, None], area_det[None, :])
     else:
         raise ValueError(
             f"overlap_metric {overlap_metric} is not supported, "
@@ -230,7 +251,7 @@ def box_iou_batch(
         )
 
     out = np.zeros_like(area_inter, dtype=np.float32)
-    np.divide(area_inter, denom, out=out, where=denom > 0)
+    np.divide(area_inter, area_norm, out=out, where=area_norm > 0)
     return out
 
 
diff --git a/test/detection/utils/test_iou_and_nms.py b/test/detection/utils/test_iou_and_nms.py
index 29c11ddedf..964e7345bd 100644
--- a/test/detection/utils/test_iou_and_nms.py
+++ b/test/detection/utils/test_iou_and_nms.py
@@ -12,8 +12,9 @@
     box_non_max_suppression,
     mask_non_max_merge,
     mask_non_max_suppression,
+    OverlapMetric
 )
-from test.test_utils import mock_boxes
+from test.test_utils import random_boxes
 
 
 @pytest.mark.parametrize(
@@ -637,102 +638,502 @@ def test_mask_non_max_merge(
 
 
 @pytest.mark.parametrize(
-    "boxes_true, boxes_detection, expected_iou, exception",
+    "box_true, box_detection, overlap_metric, expected_overlap, exception",
     [
+        (
+            [100.0, 100.0, 200.0, 200.0],
+            [150.0, 150.0, 250.0, 250.0],
+            OverlapMetric.IOU,
+            0.14285714285714285,
+            DoesNotRaise(),
+        ),  # partial overlap, IOU
+        (
+            [100.0, 100.0, 200.0, 200.0],
+            [150.0, 150.0, 250.0, 250.0],
+            OverlapMetric.IOS,
+            0.25,
+            DoesNotRaise(),
+        ),  # partial overlap, IOS
+
+        (
+            np.array([0.0, 0.0, 10.0, 10.0], dtype=np.float32),
+            np.array([0.0, 0.0, 10.0, 10.0], dtype=np.float32),
+            OverlapMetric.IOU,
+            1.0,
+            DoesNotRaise(),
+        ),  # identical boxes, both boxes are arrays, IOU
+        (
+            np.array([0.0, 0.0, 10.0, 10.0], dtype=np.float32),
+            np.array([0.0, 0.0, 10.0, 10.0], dtype=np.float32),
+            OverlapMetric.IOS,
+            1.0,
+            DoesNotRaise(),
+        ),  # identical boxes, both boxes are arrays, IOS
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [0.0, 0.0, 10.0, 10.0],
+            "iou",
+            1.0,
+            DoesNotRaise(),
+        ),  # identical boxes, both boxes are arrays, IOU as lowercase string
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [0.0, 0.0, 10.0, 10.0],
+            "ios",
+            1.0,
+            DoesNotRaise(),
+        ),  # identical boxes, both boxes are arrays, IOS as lowercase string
+(
+            [0.0, 0.0, 10.0, 10.0],
+            [0.0, 0.0, 10.0, 10.0],
+            "IOU",
+            1.0,
+            DoesNotRaise(),
+        ),  # identical boxes, both boxes are arrays, IOU as uppercase string
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [0.0, 0.0, 10.0, 10.0],
+            "IOU",
+            1.0,
+            DoesNotRaise(),
+        ),  # identical boxes, both boxes are arrays, IOS as uppercase string
+
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [20.0, 20.0, 30.0, 30.0],
+            OverlapMetric.IOU,
+            0.0,
+            DoesNotRaise(),
+        ),  # no overlap, IOU
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [20.0, 20.0, 30.0, 30.0],
+            OverlapMetric.IOS,
+            0.0,
+            DoesNotRaise(),
+        ),  # no overlap, IOS
+
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [10.0, 0.0, 20.0, 10.0],
+            OverlapMetric.IOU,
+            0.0,
+            DoesNotRaise(),
+        ),  # boxes touch at edge, zero intersection, IOU
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [10.0, 0.0, 20.0, 10.0],
+            OverlapMetric.IOS,
+            0.0,
+            DoesNotRaise(),
+        ),  # boxes touch at edge, zero intersection, IOU
+
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [2.0, 2.0, 8.0, 8.0],
+            OverlapMetric.IOU,
+            0.36,
+            DoesNotRaise(),
+        ),  # one box inside another, IOU
+        (
+            [0.0, 0.0, 10.0, 10.0],
+            [2.0, 2.0, 8.0, 8.0],
+            OverlapMetric.IOS,
+            1.0,
+            DoesNotRaise(),
+        ),  # one box inside another, IOS
+
+        (
+            [0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 10.0, 10.0],
+            OverlapMetric.IOU,
+            0.0,
+            DoesNotRaise(),
+        ),  # degenerate true box with zero area, IOU
+        (
+            [0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 10.0, 10.0],
+            OverlapMetric.IOS,
+            0.0,
+            DoesNotRaise(),
+        ),  # degenerate true box with zero area, IOS
+
+        (
+            [0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0],
+            OverlapMetric.IOU,
+            0.0,
+            DoesNotRaise(),
+        ),  # both boxes fully degenerate, IOU
+        (
+            [0.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0],
+            OverlapMetric.IOS,
+            0.0,
+            DoesNotRaise(),
+        ),  # both boxes fully degenerate, IOS
+
+        (
+            [-5.0, 0.0, 5.0, 10.0],
+            [0.0, 0.0, 10.0, 10.0],
+            OverlapMetric.IOU,
+            1.0 / 3.0,
+            DoesNotRaise(),
+        ),  # negative x_min, overlapping boxes, IOU is 1/3
+        (
+            [-5.0, 0.0, 5.0, 10.0],
+            [0.0, 0.0, 10.0, 10.0],
+            OverlapMetric.IOS,
+            0.5,
+            DoesNotRaise(),
+        ),  # negative x_min, overlapping boxes, IOS is 0.5
+
+        (
+            [0.0, 0.0, 1.0, 1.0],
+            [0.5, 0.5, 1.5, 1.5],
+            OverlapMetric.IOU,
+            0.14285714285714285,
+            DoesNotRaise(),
+        ),  # partial overlap with fractional coordinates, IOU
+        (
+            [0.0, 0.0, 1.0, 1.0],
+            [0.5, 0.5, 1.5, 1.5],
+            OverlapMetric.IOS,
+            0.25,
+            DoesNotRaise(),
+        ),  # partial overlap with fractional coordinates, IOS
+    ],
+)
+def test_box_iou(
+    box_true: list[float] | np.ndarray,
+    box_detection: list[float] | np.ndarray,
+    overlap_metric: str | OverlapMetric,
+    expected_overlap: float,
+    exception: Exception,
+) -> None:
+    with exception:
+        result = box_iou(
+            box_true=box_true,
+            box_detection=box_detection,
+            overlap_metric=overlap_metric,
+        )
+        assert result == pytest.approx(expected_overlap, rel=1e-6, abs=1e-12)
+
+
+@pytest.mark.parametrize(
+    "boxes_true, boxes_detection, overlap_metric, expected_overlap, exception",
+    [
+        # both inputs empty
         (
             np.empty((0, 4), dtype=np.float32),
             np.empty((0, 4), dtype=np.float32),
+            OverlapMetric.IOU,
             np.empty((0, 0), dtype=np.float32),
             DoesNotRaise(),
-        ),  # empty
+        ),
+        # one true box, no detections
         (
-            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
             np.empty((0, 4), dtype=np.float32),
+            OverlapMetric.IOU,
             np.empty((1, 0), dtype=np.float32),
             DoesNotRaise(),
-        ),  # one true box, no detections
+        ),
+        # no true boxes, one detection
         (
             np.empty((0, 4), dtype=np.float32),
-            np.array([[0, 0, 10, 10]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            OverlapMetric.IOU,
             np.empty((0, 1), dtype=np.float32),
             DoesNotRaise(),
-        ),  # no true boxes, one detection
+        ),
+        # 1x1 partial overlap, IOU
         (
-            np.array([[0, 0, 10, 10]], dtype=np.float32),
-            np.array([[0, 0, 10, 10]], dtype=np.float32),
-            np.array([[1.0]]),
+            np.array([[100.0, 100.0, 200.0, 200.0]], dtype=np.float32),
+            np.array([[150.0, 150.0, 250.0, 250.0]], dtype=np.float32),
+            OverlapMetric.IOU,
+            np.array([[0.14285715]], dtype=np.float32),
             DoesNotRaise(),
-        ),  # perfect overlap
+        ),
+        # 1x1 partial overlap, IOS
         (
-            np.array([[0, 0, 10, 10]], dtype=np.float32),
-            np.array([[20, 20, 30, 30]], dtype=np.float32),
-            np.array([[0.0]]),
+            np.array([[100.0, 100.0, 200.0, 200.0]], dtype=np.float32),
+            np.array([[150.0, 150.0, 250.0, 250.0]], dtype=np.float32),
+            OverlapMetric.IOS,
+            np.array([[0.25]], dtype=np.float32),
             DoesNotRaise(),
-        ),  # no overlap
+        ),
+        # 1x1 identical boxes, IOU as lowercase string
         (
-            np.array([[0, 0, 10, 10]], dtype=np.float32),
-            np.array([[5, 5, 15, 15]], dtype=np.float32),
-            np.array([[25.0 / 175.0]]),  # intersection: 5x5=25, union: 100+100-25=175
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            "iou",
+            np.array([[1.0]], dtype=np.float32),
             DoesNotRaise(),
-        ),  # partial overlap
+        ),
+        # 1x1 identical boxes, IOS as lowercase string
         (
-            np.array([[0, 0, 10, 10]], dtype=np.float32),
-            np.array([[0, 0, 5, 5]], dtype=np.float32),
-            np.array([[25.0 / 100.0]]),  # intersection: 5x5=25, union: 100
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            "ios",
+            np.array([[1.0]], dtype=np.float32),
             DoesNotRaise(),
-        ),  # detection inside true box
+        ),
+        # 1x1 identical boxes, IOU as uppercase string
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            "IOU",
+            np.array([[1.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 identical boxes, IOS as uppercase string
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            "IOS",
+            np.array([[1.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 no overlap, IOU
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[20.0, 20.0, 30.0, 30.0]], dtype=np.float32),
+            OverlapMetric.IOU,
+            np.array([[0.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 no overlap, IOS
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[20.0, 20.0, 30.0, 30.0]], dtype=np.float32),
+            OverlapMetric.IOS,
+            np.array([[0.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 touching at edge, zero intersection, IOU
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[10.0, 0.0, 20.0, 10.0]], dtype=np.float32),
+            OverlapMetric.IOU,
+            np.array([[0.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 touching at edge, zero intersection, IOS
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[10.0, 0.0, 20.0, 10.0]], dtype=np.float32),
+            OverlapMetric.IOS,
+            np.array([[0.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 box inside another, IOU
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[2.0, 2.0, 8.0, 8.0]], dtype=np.float32),
+            OverlapMetric.IOU,
+            np.array([[0.36]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 box inside another, IOS
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[2.0, 2.0, 8.0, 8.0]], dtype=np.float32),
+            OverlapMetric.IOS,
+            np.array([[1.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 degenerate true box, IOU
+        (
+            np.array([[0.0, 0.0, 0.0, 0.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            OverlapMetric.IOU,
+            np.array([[0.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 degenerate true box, IOS
+        (
+            np.array([[0.0, 0.0, 0.0, 0.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            OverlapMetric.IOS,
+            np.array([[0.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 both boxes degenerate, IOU
+        (
+            np.array([[0.0, 0.0, 0.0, 0.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 0.0, 0.0]], dtype=np.float32),
+            OverlapMetric.IOU,
+            np.array([[0.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 both boxes degenerate, IOS
+        (
+            np.array([[0.0, 0.0, 0.0, 0.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 0.0, 0.0]], dtype=np.float32),
+            OverlapMetric.IOS,
+            np.array([[0.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 negative coordinate, partial overlap, IOU
+        (
+            np.array([[-5.0, 0.0, 5.0, 10.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            OverlapMetric.IOU,
+            np.array([[1.0 / 3.0]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 negative coordinate, partial overlap, IOS
+        (
+            np.array([[-5.0, 0.0, 5.0, 10.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            OverlapMetric.IOS,
+            np.array([[0.5]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 fractional coordinates, partial overlap, IOU
+        (
+            np.array([[0.0, 0.0, 1.0, 1.0]], dtype=np.float32),
+            np.array([[0.5, 0.5, 1.5, 1.5]], dtype=np.float32),
+            OverlapMetric.IOU,
+            np.array([[0.14285715]], dtype=np.float32),
+            DoesNotRaise(),
+        ),
+        # 1x1 fractional coordinates, partial overlap, IOS
         (
-            np.array([[0, 0, 5, 5]], dtype=np.float32),
-            np.array([[0, 0, 10, 10]], dtype=np.float32),
-            np.array([[25.0 / 100.0]]),  # true box inside detection
+            np.array([[0.0, 0.0, 1.0, 1.0]], dtype=np.float32),
+            np.array([[0.5, 0.5, 1.5, 1.5]], dtype=np.float32),
+            OverlapMetric.IOS,
+            np.array([[0.25]], dtype=np.float32),
             DoesNotRaise(),
         ),
+        # true batch case, 2x2, IOU
         (
-            np.array([[0, 0, 10, 10], [20, 20, 30, 30]], dtype=np.float32),
-            np.array([[0, 0, 10, 10], [20, 20, 30, 30]], dtype=np.float32),
-            np.array([[1.0, 0.0], [0.0, 1.0]]),
+            np.array(
+                [
+                    [0.0, 0.0, 10.0, 10.0],
+                    [10.0, 10.0, 20.0, 20.0],
+                ],
+                dtype=np.float32,
+            ),
+            np.array(
+                [
+                    [0.0, 0.0, 10.0, 10.0],
+                    [5.0, 5.0, 15.0, 15.0],
+                ],
+                dtype=np.float32,
+            ),
+            OverlapMetric.IOU,
+            np.array(
+                [
+                    [1.0, 0.14285715],
+                    [0.0, 0.14285715],
+                ],
+                dtype=np.float32,
+            ),
+            DoesNotRaise(),
+        ),
+        # true batch case, 2x2, IOS
+        (
+            np.array(
+                [
+                    [0.0, 0.0, 10.0, 10.0],
+                    [10.0, 10.0, 20.0, 20.0],
+                ],
+                dtype=np.float32,
+            ),
+            np.array(
+                [
+                    [0.0, 0.0, 10.0, 10.0],
+                    [5.0, 5.0, 15.0, 15.0],
+                ],
+                dtype=np.float32,
+            ),
+            OverlapMetric.IOS,
+            np.array(
+                [
+                    [1.0, 0.25],
+                    [0.0, 0.25],
+                ],
+                dtype=np.float32,
+            ),
             DoesNotRaise(),
-        ),  # two boxes, perfect matches
+        ),
+        # invalid overlap_metric
+        (
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            np.array([[0.0, 0.0, 10.0, 10.0]], dtype=np.float32),
+            "invalid",
+            None,
+            pytest.raises(ValueError),
+        ),
     ],
 )
 def test_box_iou_batch(
     boxes_true: np.ndarray,
     boxes_detection: np.ndarray,
-    expected_iou: np.ndarray,
+    overlap_metric: str | OverlapMetric,
+    expected_overlap: np.ndarray | None,
     exception: Exception,
 ) -> None:
     with exception:
-        result = box_iou_batch(boxes_true, boxes_detection)
-        assert result.shape == expected_iou.shape
-        assert np.allclose(result, expected_iou, rtol=1e-5, atol=1e-5)
-
-
-def test_box_iou_batch_consistency_with_box_iou():
-    """Test that box_iou_batch gives same results as box_iou for single boxes."""
-    boxes_true = np.array(mock_boxes(5, seed=1), dtype=np.float32)
-    boxes_detection = np.array(mock_boxes(5, seed=2), dtype=np.float32)
-
-    batch_result = box_iou_batch(boxes_true, boxes_detection)
-
-    for i, box_true in enumerate(boxes_true):
-        for j, box_detection in enumerate(boxes_detection):
-            single_result = box_iou(box_true, box_detection)
-            assert np.allclose(batch_result[i, j], single_result, rtol=1e-5, atol=1e-5)
-
+        result = box_iou_batch(
+            boxes_true=boxes_true,
+            boxes_detection=boxes_detection,
+            overlap_metric=overlap_metric,
+        )
 
-def test_box_iou_batch_with_mock_detections():
-    """Test box_iou_batch with generated boxes and verify results are valid."""
-    boxes_true = np.array(mock_boxes(10, seed=1), dtype=np.float32)
-    boxes_detection = np.array(mock_boxes(15, seed=2), dtype=np.float32)
+        assert isinstance(result, np.ndarray)
+        assert result.shape == expected_overlap.shape
+        assert np.allclose(
+            result,
+            expected_overlap,
+            rtol=1e-6,
+            atol=1e-12,
+        )
 
-    result = box_iou_batch(boxes_true, boxes_detection)
 
-    assert result.shape == (10, 15)
+@pytest.mark.parametrize(
+    "num_true, num_det",
+    [
+        (5, 5),
+        (5, 10),
+        (10, 5),
+        (10, 10),
+        (20, 30),
+        (30, 20),
+        (50, 50),
+        (100, 100),
+    ],
+)
+@pytest.mark.parametrize(
+    "overlap_metric",
+    [OverlapMetric.IOU, OverlapMetric.IOS],
+)
+def test_box_iou_batch_symmetric_large(
+    num_true: int,
+    num_det: int,
+    overlap_metric: OverlapMetric,
+) -> None:
+    boxes_true = random_boxes(num_true)
+    boxes_det = random_boxes(num_det)
 
-    assert np.all(result >= 0)
-    assert np.all(result <= 1.0)
+    result_ab = box_iou_batch(
+        boxes_true=boxes_true,
+        boxes_detection=boxes_det,
+        overlap_metric=overlap_metric,
+    )
+    result_ba = box_iou_batch(
+        boxes_true=boxes_det,
+        boxes_detection=boxes_true,
+        overlap_metric=overlap_metric,
+    )
 
-    # and symmetric
-    result_reversed = box_iou_batch(boxes_detection, boxes_true)
-    assert result_reversed.shape == (15, 10)
-    assert np.allclose(result.T, result_reversed, rtol=1e-5, atol=1e-5)
+    assert result_ab.shape == (num_true, num_det)
+    assert result_ba.shape == (num_det, num_true)
+    assert np.allclose(
+        result_ab,
+        result_ba.T,
+        rtol=1e-6,
+        atol=1e-12,
+    )
diff --git a/test/test_utils.py b/test/test_utils.py
index e512de6f62..424d4a3c45 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -53,39 +53,48 @@ def convert_data(data: dict[str, list[Any]]):
     )
 
 
-def mock_boxes(
-    n: int,
-    resolution_wh: tuple[int, int] = (1920, 1080),
-    min_size: int = 20,
-    max_size: int = 200,
+def random_boxes(
+    count: int,
+    image_size: tuple[int, int] = (1920, 1080),
+    min_box_size: int = 20,
+    max_box_size: int = 200,
     seed: int | None = None,
-) -> list[list[float]]:
+) -> np.ndarray:
     """
-    Generate N valid bounding boxes of format [x_min, y_min, x_max, y_max].
+    Generate random bounding boxes within given image dimensions and size constraints.
+
+    Creates `count` bounding boxes randomly positioned and sized, ensuring each
+    stays within image bounds and has width and height in the specified range.
 
     Args:
-        n: Number of boxes to generate.
-        resolution_wh: Image resolution as (width, height). Defaults to (1920, 1080).
-        min_size: Minimum box size (width/height). Defaults to 20.
-        max_size: Maximum box size (width/height). Defaults to 200.
-        seed: Random seed for reproducibility. Defaults to None.
+        count (`int`): Number of random bounding boxes to generate.
+        image_size (`tuple[int, int]`): Image size as `(width, height)`. Defaults to `(1920, 1080)`.
+        min_box_size (`int`): Minimum side length (pixels) for generated boxes. Defaults to `20`.
+        max_box_size (`int`): Maximum side length (pixels) for generated boxes. Defaults to `200`.
+        seed (`int` or `None`): Optional random seed for reproducibility. Defaults to `None`.
 
     Returns:
-        List of boxes, each as [x_min, y_min, x_max, y_max].
+        (`numpy.ndarray`): Array of shape `(count, 4)` with bounding boxes as
+            `(x_min, y_min, x_max, y_max)`.
     """
     if seed is not None:
         random.seed(seed)
-    width, height = resolution_wh
-    boxes = []
-    for _ in range(n):
-        w = random.uniform(min_size, max_size)
-        h = random.uniform(min_size, max_size)
-        x1 = random.uniform(0, width - w)
-        y1 = random.uniform(0, height - h)
-        x2 = x1 + w
-        y2 = y1 + h
-        boxes.append([x1, y1, x2, y2])
-    return boxes
+
+    img_w, img_h = image_size
+    out = np.zeros((count, 4), dtype=np.float32)
+
+    for i in range(count):
+        w = random.uniform(min_box_size, max_box_size)
+        h = random.uniform(min_box_size, max_box_size)
+
+        x_min = random.uniform(0, img_w - w)
+        y_min = random.uniform(0, img_h - h)
+        x_max = x_min + w
+        y_max = y_min + h
+
+        out[i] = (x_min, y_min, x_max, y_max)
+
+    return out
 
 
 def assert_almost_equal(actual, expected, tolerance=1e-5):

From 1e2dc8f88b6b22ce3da1232eb46d55bb0fd19353 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 14 Nov 2025 10:57:56 +0000
Subject: [PATCH 087/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/utils/iou_and_nms.py |  6 +++---
 test/detection/utils/test_iou_and_nms.py   | 12 ++----------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/supervision/detection/utils/iou_and_nms.py b/supervision/detection/utils/iou_and_nms.py
index f4c68a03d0..8a444cf16e 100644
--- a/supervision/detection/utils/iou_and_nms.py
+++ b/supervision/detection/utils/iou_and_nms.py
@@ -230,12 +230,12 @@ def box_iou_batch(
     np.minimum(y_max_true[:, None], y_max_det[None, :], out=y_max_inter)
 
     # we reuse x_max_inter and y_max_inter to store inter_w and inter_h
-    np.subtract(x_max_inter, x_min_inter, out=x_max_inter) # inter_w
-    np.subtract(y_max_inter, y_min_inter, out=y_max_inter) # inter_h
+    np.subtract(x_max_inter, x_min_inter, out=x_max_inter)  # inter_w
+    np.subtract(y_max_inter, y_min_inter, out=y_max_inter)  # inter_h
     np.clip(x_max_inter, 0.0, None, out=x_max_inter)
     np.clip(y_max_inter, 0.0, None, out=y_max_inter)
 
-    area_inter = x_max_inter * y_max_inter # inter_w * inter_h
+    area_inter = x_max_inter * y_max_inter  # inter_w * inter_h
 
     area_true = (x_max_true - x_min_true) * (y_max_true - y_min_true)
     area_det = (x_max_det - x_min_det) * (y_max_det - y_min_det)
diff --git a/test/detection/utils/test_iou_and_nms.py b/test/detection/utils/test_iou_and_nms.py
index 964e7345bd..ab7586483c 100644
--- a/test/detection/utils/test_iou_and_nms.py
+++ b/test/detection/utils/test_iou_and_nms.py
@@ -6,13 +6,13 @@
 import pytest
 
 from supervision.detection.utils.iou_and_nms import (
+    OverlapMetric,
     _group_overlapping_boxes,
     box_iou,
     box_iou_batch,
     box_non_max_suppression,
     mask_non_max_merge,
     mask_non_max_suppression,
-    OverlapMetric
 )
 from test.test_utils import random_boxes
 
@@ -654,7 +654,6 @@ def test_mask_non_max_merge(
             0.25,
             DoesNotRaise(),
         ),  # partial overlap, IOS
-
         (
             np.array([0.0, 0.0, 10.0, 10.0], dtype=np.float32),
             np.array([0.0, 0.0, 10.0, 10.0], dtype=np.float32),
@@ -683,7 +682,7 @@ def test_mask_non_max_merge(
             1.0,
             DoesNotRaise(),
         ),  # identical boxes, both boxes are arrays, IOS as lowercase string
-(
+        (
             [0.0, 0.0, 10.0, 10.0],
             [0.0, 0.0, 10.0, 10.0],
             "IOU",
@@ -697,7 +696,6 @@ def test_mask_non_max_merge(
             1.0,
             DoesNotRaise(),
         ),  # identical boxes, both boxes are arrays, IOS as uppercase string
-
         (
             [0.0, 0.0, 10.0, 10.0],
             [20.0, 20.0, 30.0, 30.0],
@@ -712,7 +710,6 @@ def test_mask_non_max_merge(
             0.0,
             DoesNotRaise(),
         ),  # no overlap, IOS
-
         (
             [0.0, 0.0, 10.0, 10.0],
             [10.0, 0.0, 20.0, 10.0],
@@ -727,7 +724,6 @@ def test_mask_non_max_merge(
             0.0,
             DoesNotRaise(),
         ),  # boxes touch at edge, zero intersection, IOU
-
         (
             [0.0, 0.0, 10.0, 10.0],
             [2.0, 2.0, 8.0, 8.0],
@@ -742,7 +738,6 @@ def test_mask_non_max_merge(
             1.0,
             DoesNotRaise(),
         ),  # one box inside another, IOS
-
         (
             [0.0, 0.0, 0.0, 0.0],
             [0.0, 0.0, 10.0, 10.0],
@@ -757,7 +752,6 @@ def test_mask_non_max_merge(
             0.0,
             DoesNotRaise(),
         ),  # degenerate true box with zero area, IOS
-
         (
             [0.0, 0.0, 0.0, 0.0],
             [0.0, 0.0, 0.0, 0.0],
@@ -772,7 +766,6 @@ def test_mask_non_max_merge(
             0.0,
             DoesNotRaise(),
         ),  # both boxes fully degenerate, IOS
-
         (
             [-5.0, 0.0, 5.0, 10.0],
             [0.0, 0.0, 10.0, 10.0],
@@ -787,7 +780,6 @@ def test_mask_non_max_merge(
             0.5,
             DoesNotRaise(),
         ),  # negative x_min, overlapping boxes, IOS is 0.5
-
         (
             [0.0, 0.0, 1.0, 1.0],
             [0.5, 0.5, 1.5, 1.5],

From 42d92ba9137a83aa4a5157ed14dfa04cacddc5d5 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 12:00:45 +0100
Subject: [PATCH 088/124] making `ruff` happy

---
 test/test_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_utils.py b/test/test_utils.py
index 424d4a3c45..961f8cee1b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -68,10 +68,10 @@ def random_boxes(
 
     Args:
         count (`int`): Number of random bounding boxes to generate.
-        image_size (`tuple[int, int]`): Image size as `(width, height)`. Defaults to `(1920, 1080)`.
-        min_box_size (`int`): Minimum side length (pixels) for generated boxes. Defaults to `20`.
-        max_box_size (`int`): Maximum side length (pixels) for generated boxes. Defaults to `200`.
-        seed (`int` or `None`): Optional random seed for reproducibility. Defaults to `None`.
+        image_size (`tuple[int, int]`): Image size as `(width, height)`.
+        min_box_size (`int`): Minimum side length (pixels) for generated boxes.
+        max_box_size (`int`): Maximum side length (pixels) for generated boxes.
+        seed (`int` or `None`): Optional random seed for reproducibility.
 
     Returns:
         (`numpy.ndarray`): Array of shape `(count, 4)` with bounding boxes as

From 35272b40d30b05effb13f62b13e10784f845fa11 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 12:38:55 +0100
Subject: [PATCH 089/124] remove ultralytics tests

---
 test/detection/test_core.py | 84 -------------------------------------
 1 file changed, 84 deletions(-)

diff --git a/test/detection/test_core.py b/test/detection/test_core.py
index bf27f209e9..c57bea4098 100644
--- a/test/detection/test_core.py
+++ b/test/detection/test_core.py
@@ -815,87 +815,3 @@ def test_merge_inner_detection_object_pair(
     with exception:
         result = merge_inner_detection_object_pair(detection_1, detection_2)
         assert result == expected_result
-
-
-class TestFromUltralytics:
-    """Test suite for Detections.from_ultralytics method."""
-
-    def test_from_ultralytics_with_missing_boxes_attribute(self):
-        """Test that from_ultralytics handles missing boxes attribute gracefully.
-
-        Regression test for issue #2000.
-        """
-
-        # Create a mock ultralytics result without boxes attribute
-        class MockUltralyticsResult:
-            def __init__(self):
-                self.names = {0: "class1", 1: "class2"}
-                # Intentionally not setting 'boxes' or 'obb' attribute
-
-        mock_result = MockUltralyticsResult()
-        detections = Detections.from_ultralytics(mock_result)
-
-        # Should return empty detections instead of crashing
-        assert len(detections) == 0
-        assert detections.xyxy.shape == (0, 4)
-
-    def test_from_ultralytics_with_boxes_none(self):
-        """Test that from_ultralytics handles boxes=None (segmentation-only models)."""
-
-        # Create a mock ultralytics result with boxes=None
-        class MockUltralyticsResult:
-            def __init__(self):
-                self.boxes = None
-                self.names = {0: "class1"}
-                # Mock masks attribute for segmentation
-                self.masks = None
-
-        mock_result = MockUltralyticsResult()
-        # This should handle the segmentation-only case
-        # Note: Will fail if masks are not properly set, but that's expected behavior
-        try:
-            _ = Detections.from_ultralytics(mock_result)
-            # If masks are properly implemented, this should work
-        except (AttributeError, TypeError):
-            # Expected if masks aren't properly mocked
-            pass
-
-    def test_from_ultralytics_with_valid_boxes(self):
-        """Test that from_ultralytics works correctly with valid boxes."""
-
-        # Create a mock ultralytics result with valid boxes
-        class MockBoxes:
-            def __init__(self):
-                self.cls = self._MockTensor([0, 1])
-                self.xyxy = self._MockTensor([[10, 20, 30, 40], [50, 60, 70, 80]])
-                self.conf = self._MockTensor([0.9, 0.8])
-                self.id = None
-
-            class _MockTensor:
-                def __init__(self, data):
-                    self.data = np.array(data)
-
-                def cpu(self):
-                    return self
-
-                def numpy(self):
-                    return self.data
-
-                def astype(self, dtype):
-                    return self.data.astype(dtype)
-
-        class MockUltralyticsResult:
-            def __init__(self):
-                self.boxes = MockBoxes()
-                self.names = {0: "person", 1: "car"}
-                self.masks = None
-
-        mock_result = MockUltralyticsResult()
-        detections = Detections.from_ultralytics(mock_result)
-
-        assert len(detections) == 2
-        assert np.array_equal(
-            detections.xyxy, np.array([[10, 20, 30, 40], [50, 60, 70, 80]])
-        )
-        assert np.array_equal(detections.confidence, np.array([0.9, 0.8]))
-        assert np.array_equal(detections.class_id, np.array([0, 1]))

From f563c5a63472380ca261e64656a95e6a0ad4f33c Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 12:53:58 +0100
Subject: [PATCH 090/124] update `denormalize_boxes` docstring and examples

---
 supervision/detection/utils/boxes.py | 57 ++++++++++++++++------------
 test/detection/utils/test_boxes.py   |  4 +-
 2 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/supervision/detection/utils/boxes.py b/supervision/detection/utils/boxes.py
index 60542a43a9..44b78cba85 100644
--- a/supervision/detection/utils/boxes.py
+++ b/supervision/detection/utils/boxes.py
@@ -95,24 +95,27 @@ def pad_boxes(xyxy: np.ndarray, px: int, py: int | None = None) -> np.ndarray:
 
 
 def denormalize_boxes(
-    normalized_xyxy: np.ndarray,
+    xyxy: np.ndarray,
     resolution_wh: tuple[int, int],
     normalization_factor: float = 1.0,
 ) -> np.ndarray:
     """
-    Converts normalized bounding box coordinates to absolute pixel values.
+    Convert normalized bounding box coordinates to absolute pixel coordinates.
+
+    Multiplies each bounding box coordinate by image size and divides by
+    `normalization_factor`, mapping values from normalized `[0, normalization_factor]`
+    to absolute pixel values for a given resolution.
 
     Args:
-        normalized_xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each row
-            contains normalized coordinates in the format `(x_min, y_min, x_max, y_max)`,
-            with values between 0 and `normalization_factor`.
-        resolution_wh (Tuple[int, int]): A tuple `(width, height)` representing the
-            target image resolution.
-        normalization_factor (float, optional): The normalization range of the input
-            coordinates. Defaults to 1.0.
+        xyxy (`numpy.ndarray`): Normalized bounding boxes of shape `(N, 4)`,
+            where each row is `(x_min, y_min, x_max, y_max)`, values in
+            `[0, normalization_factor]`.
+        resolution_wh (`tuple[int, int]`): Target image resolution as `(width, height)`.
+        normalization_factor (`float`): Maximum value of input coordinate range.
+            Defaults to `1.0`.
 
     Returns:
-        np.ndarray: An array of shape `(N, 4)` with absolute coordinates in
+        (`numpy.ndarray`): Array of shape `(N, 4)` with absolute coordinates in
             `(x_min, y_min, x_max, y_max)` format.
 
     Examples:
@@ -120,32 +123,38 @@ def denormalize_boxes(
         import numpy as np
         import supervision as sv
 
-        # Default normalization (0-1)
         normalized_xyxy = np.array([
             [0.1, 0.2, 0.5, 0.6],
-            [0.3, 0.4, 0.7, 0.8]
+            [0.3, 0.4, 0.7, 0.8],
+            [0.2, 0.1, 0.6, 0.5]
         ])
-        resolution_wh = (100, 200)
-        sv.denormalize_boxes(normalized_xyxy, resolution_wh)
+        sv.denormalize_boxes(normalized_xyxy, (1280, 720))
         # array([
-        #     [ 10.,  40.,  50., 120.],
-        #     [ 30.,  80.,  70., 160.]
+        #     [128., 144., 640., 432.],
+        #     [384., 288., 896., 576.],
+        #     [256.,  72., 768., 360.]
         # ])
+        ```
+
+        ```
+        import numpy as np
+        import supervision as sv
 
-        # Custom normalization (0-100)
         normalized_xyxy = np.array([
-            [10., 20., 50., 60.],
-            [30., 40., 70., 80.]
+            [256., 128., 768., 640.]
         ])
-        sv.denormalize_boxes(normalized_xyxy, resolution_wh, normalization_factor=100.0)
+        result = sv.denormalize_boxes(
+            normalized_xyxy,
+            (1280, 720),
+            normalization_factor=1024.0
+        )
         # array([
-        #     [ 10.,  40.,  50., 120.],
-        #     [ 30.,  80.,  70., 160.]
+        #     [320.,  90., 960., 450.]
         # ])
         ```
-    """  # noqa E501 // docs
+    """
     width, height = resolution_wh
-    result = normalized_xyxy.copy()
+    result = xyxy.copy()
 
     result[:, [0, 2]] = (result[:, [0, 2]] * width) / normalization_factor
     result[:, [1, 3]] = (result[:, [1, 3]] * height) / normalization_factor
diff --git a/test/detection/utils/test_boxes.py b/test/detection/utils/test_boxes.py
index b27dd87d1e..787c970f4c 100644
--- a/test/detection/utils/test_boxes.py
+++ b/test/detection/utils/test_boxes.py
@@ -219,7 +219,7 @@ def test_scale_boxes(
     ],
 )
 def test_denormalize_boxes(
-    normalized_xyxy: np.ndarray,
+    xyxy: np.ndarray,
     resolution_wh: tuple[int, int],
     normalization_factor: float,
     expected_result: np.ndarray,
@@ -227,7 +227,7 @@ def test_denormalize_boxes(
 ) -> None:
     with exception:
         result = denormalize_boxes(
-            normalized_xyxy=normalized_xyxy,
+            xyxy=xyxy,
             resolution_wh=resolution_wh,
             normalization_factor=normalization_factor,
         )

From dfd2a60855f0e3a70622ec36bfe3406728c5362b Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 13:12:39 +0100
Subject: [PATCH 091/124] update `denormalize_boxes` docstring and examples;
 update `from_google_gemini_2_0`, `from_google_gemini_2_5` and
 `from_moondream` to use latest version of `denormalize_boxes`

---
 supervision/detection/utils/boxes.py | 14 +++++-----
 supervision/detection/vlm.py         | 40 +++++++++++++---------------
 test/detection/utils/test_boxes.py   |  2 +-
 3 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/supervision/detection/utils/boxes.py b/supervision/detection/utils/boxes.py
index 44b78cba85..01fcdc7691 100644
--- a/supervision/detection/utils/boxes.py
+++ b/supervision/detection/utils/boxes.py
@@ -123,12 +123,13 @@ def denormalize_boxes(
         import numpy as np
         import supervision as sv
 
-        normalized_xyxy = np.array([
+        xyxy = np.array([
             [0.1, 0.2, 0.5, 0.6],
             [0.3, 0.4, 0.7, 0.8],
             [0.2, 0.1, 0.6, 0.5]
         ])
-        sv.denormalize_boxes(normalized_xyxy, (1280, 720))
+
+        sv.denormalize_boxes(xyxy, (1280, 720))
         # array([
         #     [128., 144., 640., 432.],
         #     [384., 288., 896., 576.],
@@ -140,14 +141,11 @@ def denormalize_boxes(
         import numpy as np
         import supervision as sv
 
-        normalized_xyxy = np.array([
+        xyxy = np.array([
             [256., 128., 768., 640.]
         ])
-        result = sv.denormalize_boxes(
-            normalized_xyxy,
-            (1280, 720),
-            normalization_factor=1024.0
-        )
+
+        sv.denormalize_boxes(xyxy, (1280, 720), normalization_factor=1024.0)
         # array([
         #     [320.,  90., 960., 450.]
         # ])
diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 71207554e4..2f9b60ddb5 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -538,7 +538,7 @@ def from_google_gemini_2_0(
         return np.empty((0, 4)), None, np.empty((0,), dtype=str)
 
     labels = []
-    boxes_list = []
+    xyxy = []
 
     for item in data:
         if "box_2d" not in item or "label" not in item:
@@ -546,18 +546,16 @@ def from_google_gemini_2_0(
         labels.append(item["label"])
         box = item["box_2d"]
         # Gemini bbox order is [y_min, x_min, y_max, x_max]
-        boxes_list.append(
-            denormalize_boxes(
-                np.array([box[1], box[0], box[3], box[2]]).astype(np.float64),
-                resolution_wh=(w, h),
-                normalization_factor=1000,
-            )
-        )
+        xyxy.append([box[1], box[0], box[3], box[2]])
 
-    if not boxes_list:
+    if len(xyxy) == 0:
         return np.empty((0, 4)), None, np.empty((0,), dtype=str)
 
-    xyxy = np.array(boxes_list)
+    xyxy = denormalize_boxes(
+        np.array(xyxy, dtype=np.float64),
+        resolution_wh=(w, h),
+        normalization_factor=1000,
+    )
     class_name = np.array(labels)
     class_id = None
 
@@ -649,10 +647,10 @@ def from_google_gemini_2_5(
         box = item["box_2d"]
         # Gemini bbox order is [y_min, x_min, y_max, x_max]
         absolute_bbox = denormalize_boxes(
-            np.array([box[1], box[0], box[3], box[2]]).astype(np.float64),
+            np.array([[box[1], box[0], box[3], box[2]]]).astype(np.float64),
             resolution_wh=(w, h),
             normalization_factor=1000,
-        )
+        )[0]
         boxes_list.append(absolute_bbox)
 
         if "mask" in item:
@@ -735,7 +733,7 @@ def from_google_gemini_2_5(
 def from_moondream(
     result: dict,
     resolution_wh: tuple[int, int],
-) -> tuple[np.ndarray]:
+) -> np.ndarray:
     """
     Parse and scale bounding boxes from moondream JSON output.
 
@@ -773,7 +771,7 @@ def from_moondream(
     if "objects" not in result or not isinstance(result["objects"], list):
         return np.empty((0, 4), dtype=float)
 
-    denormalize_xyxy = []
+    xyxy = []
 
     for item in result["objects"]:
         if not all(k in item for k in ["x_min", "y_min", "x_max", "y_max"]):
@@ -784,14 +782,12 @@ def from_moondream(
         x_max = item["x_max"]
         y_max = item["y_max"]
 
-        denormalize_xyxy.append(
-            denormalize_boxes(
-                np.array([x_min, y_min, x_max, y_max]).astype(np.float64),
-                resolution_wh=(w, h),
-            )
-        )
+        xyxy.append([x_min, y_min, x_max, y_max])
 
-    if not denormalize_xyxy:
+    if len(xyxy) == 0:
         return np.empty((0, 4))
 
-    return np.array(denormalize_xyxy, dtype=float)
+    return denormalize_boxes(
+        np.array(xyxy).astype(np.float64),
+        resolution_wh=(w, h),
+    )
diff --git a/test/detection/utils/test_boxes.py b/test/detection/utils/test_boxes.py
index 787c970f4c..66d0d999c8 100644
--- a/test/detection/utils/test_boxes.py
+++ b/test/detection/utils/test_boxes.py
@@ -150,7 +150,7 @@ def test_scale_boxes(
 
 
 @pytest.mark.parametrize(
-    "normalized_xyxy, resolution_wh, normalization_factor, expected_result, exception",
+    "xyxy, resolution_wh, normalization_factor, expected_result, exception",
     [
         (
             np.empty(shape=(0, 4)),

From db0892f3e975c944a9a21e470b7b981fd2e15551 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 14:52:03 +0100
Subject: [PATCH 092/124] remove unnecessary package-level import

---
 supervision/detection/utils/__init__.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/supervision/detection/utils/__init__.py b/supervision/detection/utils/__init__.py
index c4b0f38706..e69de29bb2 100644
--- a/supervision/detection/utils/__init__.py
+++ b/supervision/detection/utils/__init__.py
@@ -1,3 +0,0 @@
-from supervision.detection.utils.iou_and_nms import box_iou_batch
-
-__all__ = ["box_iou_batch"]

From 4663fab4e40f6d6560d064e41dcf5dfd9aa66bee Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 14:52:39 +0100
Subject: [PATCH 093/124] update `new` docs page marks

---
 docs/detection/annotators.md           | 1 -
 docs/detection/core.md                 | 1 -
 docs/detection/utils/boxes.md          | 1 -
 docs/detection/utils/iou_and_nms.md    | 1 -
 docs/detection/utils/polygons.md       | 1 -
 docs/how_to/benchmark_a_model.md       | 1 -
 docs/how_to/track_objects.md           | 1 -
 docs/keypoint/core.md                  | 1 -
 docs/metrics/mean_average_precision.md | 1 -
 9 files changed, 9 deletions(-)

diff --git a/docs/detection/annotators.md b/docs/detection/annotators.md
index 938c49ff48..a0341eddf1 100644
--- a/docs/detection/annotators.md
+++ b/docs/detection/annotators.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 # Annotators
diff --git a/docs/detection/core.md b/docs/detection/core.md
index 475cdae1da..35225cec51 100644
--- a/docs/detection/core.md
+++ b/docs/detection/core.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 # Detections
diff --git a/docs/detection/utils/boxes.md b/docs/detection/utils/boxes.md
index 63a3231755..020cc8f99a 100644
--- a/docs/detection/utils/boxes.md
+++ b/docs/detection/utils/boxes.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 # Boxes Utils
diff --git a/docs/detection/utils/iou_and_nms.md b/docs/detection/utils/iou_and_nms.md
index 2b4e4fc334..7191656b7e 100644
--- a/docs/detection/utils/iou_and_nms.md
+++ b/docs/detection/utils/iou_and_nms.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 # IoU and NMS Utils
diff --git a/docs/detection/utils/polygons.md b/docs/detection/utils/polygons.md
index cd9525345a..8a7cf1e1ce 100644
--- a/docs/detection/utils/polygons.md
+++ b/docs/detection/utils/polygons.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 # Polygons Utils
diff --git a/docs/how_to/benchmark_a_model.md b/docs/how_to/benchmark_a_model.md
index bf23ee0890..aa707fa734 100644
--- a/docs/how_to/benchmark_a_model.md
+++ b/docs/how_to/benchmark_a_model.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 ![Corgi Example](https://media.roboflow.com/supervision/image-examples/how-to/benchmark-models/corgi-sorted-2.png)
diff --git a/docs/how_to/track_objects.md b/docs/how_to/track_objects.md
index 9bf17e8651..2acad7740d 100644
--- a/docs/how_to/track_objects.md
+++ b/docs/how_to/track_objects.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 # Track Objects
diff --git a/docs/keypoint/core.md b/docs/keypoint/core.md
index acb13e156c..e683ae873a 100644
--- a/docs/keypoint/core.md
+++ b/docs/keypoint/core.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 # Keypoint Detection
diff --git a/docs/metrics/mean_average_precision.md b/docs/metrics/mean_average_precision.md
index ce3e06a411..10f7a97771 100644
--- a/docs/metrics/mean_average_precision.md
+++ b/docs/metrics/mean_average_precision.md
@@ -1,6 +1,5 @@
 ---
 comments: true
-status: new
 ---
 
 # Mean Average Precision

From fae1bc26616e7bc4fc7804ae7c382fd7f869b66c Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 14:52:57 +0100
Subject: [PATCH 094/124] bump version from `0.27.0rc4` to `0.27.0rc5`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 81c12fb62c..2996af1fb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.27.0rc4"
+version = "0.27.0rc5"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [

From ce28ab7fc7f76bd17fb1f3477233d4276709b3c4 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 15:49:22 +0100
Subject: [PATCH 095/124] fix uti tests for `InferenceSlicer._generate_offset`

---
 .../detection/tools/inference_slicer.py       | 94 ++++++-------------
 test/detection/tools/test_inference_slicer.py | 53 +----------
 2 files changed, 32 insertions(+), 115 deletions(-)

diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index aaecccb3dc..3aa7c4ba78 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -13,8 +13,7 @@
 from supervision.detection.utils.masks import move_masks
 from supervision.utils.image import crop_image
 from supervision.utils.internal import (
-    SupervisionWarnings,
-    warn_deprecated,
+    SupervisionWarnings
 )
 
 
@@ -62,15 +61,9 @@ class InferenceSlicer:
     Args:
         slice_wh (Tuple[int, int]): Dimensions of each slice measured in pixels. The
             tuple should be in the format `(width, height)`.
-        overlap_ratio_wh (Optional[Tuple[float, float]]): [⚠️ Deprecated: please set
-                to `None` and use `overlap_wh`] A tuple representing the
-            desired overlap ratio for width and height between consecutive slices.
-            Each value should be in the range [0, 1), where 0 means no overlap and
-            a value close to 1 means high overlap.
-        overlap_wh (Optional[Tuple[int, int]]): A tuple representing the desired
+        overlap_wh (Tuple[int, int]): A tuple representing the desired
             overlap for width and height between consecutive slices measured in pixels.
-            Each value should be greater than or equal to 0. Takes precedence over
-            `overlap_ratio_wh`.
+            Each value must be greater than or equal to 0.
         overlap_filter (Union[OverlapFilter, str]): Strategy for
             filtering or merging overlapping detections in slices.
         iou_threshold (float): Intersection over Union (IoU) threshold
@@ -91,26 +84,16 @@ class InferenceSlicer:
     def __init__(
         self,
         callback: Callable[[np.ndarray], Detections],
-        slice_wh: tuple[int, int] = (320, 320),
-        overlap_ratio_wh: tuple[float, float] | None = (0.2, 0.2),
-        overlap_wh: tuple[int, int] | None = None,
+        slice_wh: tuple[int, int] = (640, 640),
+        overlap_wh: tuple[int, int] = (100, 100),
         overlap_filter: OverlapFilter | str = OverlapFilter.NON_MAX_SUPPRESSION,
         iou_threshold: float = 0.5,
         overlap_metric: OverlapMetric | str = OverlapMetric.IOU,
         thread_workers: int = 1,
     ):
-        if overlap_ratio_wh is not None:
-            warn_deprecated(
-                "`overlap_ratio_wh` in `InferenceSlicer.__init__` is deprecated and "
-                "will be removed in `supervision-0.27.0`. Please manually set it to "
-                "`None` and use `overlap_wh` instead."
-            )
-
-        self._validate_overlap(overlap_ratio_wh, overlap_wh)
-        self.overlap_ratio_wh = overlap_ratio_wh
         self.overlap_wh = overlap_wh
-
         self.slice_wh = slice_wh
+        self._validate_overlap(slice_wh=self.slice_wh, overlap_wh=overlap_wh)
         self.iou_threshold = iou_threshold
         self.overlap_metric = OverlapMetric.from_value(overlap_metric)
         self.overlap_filter = OverlapFilter.from_value(overlap_filter)
@@ -146,7 +129,7 @@ def callback(image_slice: np.ndarray) -> sv.Detections:
 
             slicer = sv.InferenceSlicer(
                 callback=callback,
-                overlap_filter_strategy=sv.OverlapFilter.NON_MAX_SUPPRESSION,
+                overlap_filter=sv.OverlapFilter.NON_MAX_SUPPRESSION,
             )
 
             detections = slicer(image)
@@ -157,7 +140,6 @@ def callback(image_slice: np.ndarray) -> sv.Detections:
         offsets = self._generate_offset(
             resolution_wh=resolution_wh,
             slice_wh=self.slice_wh,
-            overlap_ratio_wh=self.overlap_ratio_wh,
             overlap_wh=self.overlap_wh,
         )
 
@@ -211,25 +193,20 @@ def _run_callback(self, image, offset) -> Detections:
     def _generate_offset(
         resolution_wh: tuple[int, int],
         slice_wh: tuple[int, int],
-        overlap_ratio_wh: tuple[float, float] | None,
-        overlap_wh: tuple[int, int] | None,
+        overlap_wh: tuple[int, int],
     ) -> np.ndarray:
         """
         Generate offset coordinates for slicing an image based on the given resolution,
-        slice dimensions, and overlap ratios.
+        slice dimensions, and pixel overlap.
 
         Args:
             resolution_wh (Tuple[int, int]): A tuple representing the width and height
                 of the image to be sliced.
             slice_wh (Tuple[int, int]): Dimensions of each slice measured in pixels. The
             tuple should be in the format `(width, height)`.
-            overlap_ratio_wh (Optional[Tuple[float, float]]): A tuple representing the
-                desired overlap ratio for width and height between consecutive slices.
-                Each value should be in the range [0, 1), where 0 means no overlap and
-                a value close to 1 means high overlap.
-            overlap_wh (Optional[Tuple[int, int]]): A tuple representing the desired
+            overlap_wh (Tuple[int, int]): A tuple representing the desired
                 overlap for width and height between consecutive slices measured in
-                pixels. Each value should be greater than or equal to 0.
+                pixels. Each value must be greater than or equal to 0.
 
         Returns:
             np.ndarray: An array of shape `(n, 4)` containing coordinates for each
@@ -244,16 +221,7 @@ def _generate_offset(
         """
         slice_width, slice_height = slice_wh
         image_width, image_height = resolution_wh
-        overlap_width = (
-            overlap_wh[0]
-            if overlap_wh is not None
-            else int(overlap_ratio_wh[0] * slice_width)
-        )
-        overlap_height = (
-            overlap_wh[1]
-            if overlap_wh is not None
-            else int(overlap_ratio_wh[1] * slice_height)
-        )
+        overlap_width, overlap_height = overlap_wh
 
         width_stride = slice_width - overlap_width
         height_stride = slice_height - overlap_height
@@ -271,29 +239,27 @@ def _generate_offset(
 
     @staticmethod
     def _validate_overlap(
-        overlap_ratio_wh: tuple[float, float] | None,
-        overlap_wh: tuple[int, int] | None,
+            slice_wh: tuple[int, int],
+            overlap_wh: tuple[int, int],
     ) -> None:
-        if overlap_ratio_wh is not None and overlap_wh is not None:
+        if not isinstance(overlap_wh, tuple) or len(overlap_wh) != 2:
             raise ValueError(
-                "Both `overlap_ratio_wh` and `overlap_wh` cannot be provided. "
-                "Please provide only one of them."
+                "`overlap_wh` must be a tuple of two non-negative values "
+                "(overlap_w, overlap_h)."
             )
-        if overlap_ratio_wh is None and overlap_wh is None:
+
+        overlap_w, overlap_h = overlap_wh
+        slice_w, slice_h = slice_wh
+
+        if overlap_w < 0 or overlap_h < 0:
             raise ValueError(
-                "Either `overlap_ratio_wh` or `overlap_wh` must be provided. "
-                "Please provide one of them."
+                "Overlap values must be greater than or equal to 0. "
+                f"Received: {overlap_wh}"
             )
 
-        if overlap_ratio_wh is not None:
-            if not (0 <= overlap_ratio_wh[0] < 1 and 0 <= overlap_ratio_wh[1] < 1):
-                raise ValueError(
-                    "Overlap ratios must be in the range [0, 1). "
-                    f"Received: {overlap_ratio_wh}"
-                )
-        if overlap_wh is not None:
-            if not (overlap_wh[0] >= 0 and overlap_wh[1] >= 0):
-                raise ValueError(
-                    "Overlap values must be greater than or equal to 0. "
-                    f"Received: {overlap_wh}"
-                )
+        if overlap_w >= slice_w or overlap_h >= slice_h:
+            raise ValueError(
+                "`overlap_wh` must be smaller than `slice_wh` in both dimensions "
+                f"to keep a positive stride. Received overlap_wh={overlap_wh}, "
+                f"slice_wh={slice_wh}."
+            )
diff --git a/test/detection/tools/test_inference_slicer.py b/test/detection/tools/test_inference_slicer.py
index 2185b77f20..b32d3b2091 100644
--- a/test/detection/tools/test_inference_slicer.py
+++ b/test/detection/tools/test_inference_slicer.py
@@ -19,51 +19,6 @@ def callback(_: np.ndarray) -> Detections:
 
     return callback
 
-
-@pytest.mark.parametrize(
-    "slice_wh, overlap_ratio_wh, overlap_wh, expected_overlap, exception",
-    [
-        # Valid case: explicit overlap_wh in pixels
-        ((128, 128), None, (26, 26), (26, 26), DoesNotRaise()),
-        # Valid case: overlap_wh in pixels
-        ((128, 128), None, (20, 20), (20, 20), DoesNotRaise()),
-        # Invalid case: negative overlap_wh, should raise ValueError
-        ((128, 128), None, (-10, 20), None, pytest.raises(ValueError)),
-        # Invalid case: no overlaps defined
-        ((128, 128), None, None, None, pytest.raises(ValueError)),
-        # Valid case: overlap_wh = 50 pixels
-        ((256, 256), None, (50, 50), (50, 50), DoesNotRaise()),
-        # Valid case: overlap_wh = 60 pixels
-        ((200, 200), None, (60, 60), (60, 60), DoesNotRaise()),
-        # Valid case: small overlap_wh values
-        ((100, 100), None, (0.1, 0.1), (0.1, 0.1), DoesNotRaise()),
-        # Invalid case: negative overlap_wh values
-        ((128, 128), None, (-10, -10), None, pytest.raises(ValueError)),
-        # Invalid case: overlap_wh greater than slice size
-        ((128, 128), None, (150, 150), (150, 150), DoesNotRaise()),
-        # Valid case: zero overlap
-        ((128, 128), None, (0, 0), (0, 0), DoesNotRaise()),
-    ],
-)
-def test_inference_slicer_overlap(
-    mock_callback,
-    slice_wh: tuple[int, int],
-    overlap_ratio_wh: tuple[float, float] | None,
-    overlap_wh: tuple[int, int] | None,
-    expected_overlap: tuple[int, int] | None,
-    exception: Exception,
-) -> None:
-    with exception:
-        slicer = InferenceSlicer(
-            callback=mock_callback,
-            slice_wh=slice_wh,
-            overlap_ratio_wh=overlap_ratio_wh,
-            overlap_wh=overlap_wh,
-            overlap_filter=OverlapFilter.NONE,
-        )
-        assert slicer.overlap_wh == expected_overlap
-
-
 @pytest.mark.parametrize(
     "resolution_wh, slice_wh, overlap_wh, expected_offsets",
     [
@@ -163,24 +118,20 @@ def test_inference_slicer_overlap(
                 ]
             ),
         ),
-        # Case 6: Overlap_wh is greater than the slice size
-        ((256, 256), (128, 128), (150, 150), np.array([]).reshape(0, 4)),
     ],
 )
 def test_generate_offset(
     resolution_wh: tuple[int, int],
     slice_wh: tuple[int, int],
-    overlap_wh: tuple[int, int] | None,
+    overlap_wh: tuple[int, int],
     expected_offsets: np.ndarray,
 ) -> None:
     offsets = InferenceSlicer._generate_offset(
         resolution_wh=resolution_wh,
         slice_wh=slice_wh,
-        overlap_ratio_wh=None,
         overlap_wh=overlap_wh,
     )
 
-    # Verify that the generated offsets match the expected offsets
     assert np.array_equal(offsets, expected_offsets), (
         f"Expected {expected_offsets}, got {offsets}"
-    )
+    )
\ No newline at end of file

From 8d464aa1b527029efe57c48a407181b456bc2f60 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 16:42:10 +0100
Subject: [PATCH 096/124] improve `InferenceSlicer` implementation lowering the
 count of generated boxes

---
 .../detection/tools/inference_slicer.py       |  68 +++++---
 test/detection/tools/test_inference_slicer.py | 153 ++++++++++++------
 2 files changed, 149 insertions(+), 72 deletions(-)

diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index 3aa7c4ba78..dc8a11872c 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -200,40 +200,58 @@ def _generate_offset(
         slice dimensions, and pixel overlap.
 
         Args:
-            resolution_wh (Tuple[int, int]): A tuple representing the width and height
-                of the image to be sliced.
-            slice_wh (Tuple[int, int]): Dimensions of each slice measured in pixels. The
-            tuple should be in the format `(width, height)`.
-            overlap_wh (Tuple[int, int]): A tuple representing the desired
-                overlap for width and height between consecutive slices measured in
-                pixels. Each value must be greater than or equal to 0.
+            resolution_wh (Tuple[int, int]): Width and height of the image to be sliced.
+            slice_wh (Tuple[int, int]): Dimensions of each slice in pixels (width, height).
+            overlap_wh (Tuple[int, int]): Overlap in pixels (overlap_w, overlap_h).
 
         Returns:
-            np.ndarray: An array of shape `(n, 4)` containing coordinates for each
-                slice in the format `[xmin, ymin, xmax, ymax]`.
-
-        Note:
-            The function ensures that slices do not exceed the boundaries of the
-                original image. As a result, the final slices in the row and column
-                dimensions might be smaller than the specified slice dimensions if the
-                image's width or height is not a multiple of the slice's width or
-                height minus the overlap.
+            np.ndarray: Array of shape (n, 4) with [x_min, y_min, x_max, y_max] slices.
         """
         slice_width, slice_height = slice_wh
         image_width, image_height = resolution_wh
         overlap_width, overlap_height = overlap_wh
 
-        width_stride = slice_width - overlap_width
-        height_stride = slice_height - overlap_height
-
-        ws = np.arange(0, image_width, width_stride)
-        hs = np.arange(0, image_height, height_stride)
+        stride_x = slice_width - overlap_width
+        stride_y = slice_height - overlap_height
+
+        def _compute_axis_starts(
+            image_size: int,
+            slice_size: int,
+            stride: int,
+        ) -> list[int]:
+            if image_size <= slice_size:
+                return [0]
+
+            # No overlap case, preserve original behavior, no overlapping tiles
+            if stride == slice_size:
+                return np.arange(0, image_size, stride).tolist()
+
+            # Overlap case, ensure last tile touches the border without redundancy
+            last_start = image_size - slice_size
+            starts = np.arange(0, last_start, stride).tolist()
+            if not starts or starts[-1] != last_start:
+                starts.append(last_start)
+            return starts
+
+        x_starts = _compute_axis_starts(
+            image_size=image_width,
+            slice_size=slice_width,
+            stride=stride_x,
+        )
+        y_starts = _compute_axis_starts(
+            image_size=image_height,
+            slice_size=slice_height,
+            stride=stride_y,
+        )
 
-        xmin, ymin = np.meshgrid(ws, hs)
-        xmax = np.clip(xmin + slice_width, 0, image_width)
-        ymax = np.clip(ymin + slice_height, 0, image_height)
+        x_min, y_min = np.meshgrid(x_starts, y_starts)
+        x_max = np.clip(x_min + slice_width, 0, image_width)
+        y_max = np.clip(y_min + slice_height, 0, image_height)
 
-        offsets = np.stack([xmin, ymin, xmax, ymax], axis=-1).reshape(-1, 4)
+        offsets = np.stack(
+            [x_min, y_min, x_max, y_max],
+            axis=-1,
+        ).reshape(-1, 4)
 
         return offsets
 
diff --git a/test/detection/tools/test_inference_slicer.py b/test/detection/tools/test_inference_slicer.py
index b32d3b2091..d24b92b245 100644
--- a/test/detection/tools/test_inference_slicer.py
+++ b/test/detection/tools/test_inference_slicer.py
@@ -1,13 +1,10 @@
 from __future__ import annotations
 
-from contextlib import ExitStack as DoesNotRaise
-
 import numpy as np
 import pytest
 
 from supervision.detection.core import Detections
 from supervision.detection.tools.inference_slicer import InferenceSlicer
-from supervision.detection.utils.iou_and_nms import OverlapFilter
 
 
 @pytest.fixture
@@ -22,7 +19,7 @@ def callback(_: np.ndarray) -> Detections:
 @pytest.mark.parametrize(
     "resolution_wh, slice_wh, overlap_wh, expected_offsets",
     [
-        # Case 1: No overlap, exact slices fit within image dimensions
+        # Case 1: Square image, square slices, no overlap
         (
             (256, 256),
             (128, 128),
@@ -36,7 +33,7 @@ def callback(_: np.ndarray) -> Detections:
                 ]
             ),
         ),
-        # Case 2: Overlap of 64 pixels in both directions
+        # Case 2: Square image, square slices, non-zero overlap
         (
             (256, 256),
             (128, 128),
@@ -46,75 +43,137 @@ def callback(_: np.ndarray) -> Detections:
                     [0, 0, 128, 128],
                     [64, 0, 192, 128],
                     [128, 0, 256, 128],
-                    [192, 0, 256, 128],
                     [0, 64, 128, 192],
                     [64, 64, 192, 192],
                     [128, 64, 256, 192],
-                    [192, 64, 256, 192],
                     [0, 128, 128, 256],
                     [64, 128, 192, 256],
                     [128, 128, 256, 256],
-                    [192, 128, 256, 256],
-                    [0, 192, 128, 256],
-                    [64, 192, 192, 256],
-                    [128, 192, 256, 256],
-                    [192, 192, 256, 256],
                 ]
             ),
         ),
-        # Case 3: Image not perfectly divisible by slice size (no overlap)
+        # Case 3: Rectangle image (horizontal), square slices, no overlap
         (
-            (300, 300),
-            (128, 128),
+            (192, 128),
+            (64, 64),
             (0, 0),
             np.array(
                 [
-                    [0, 0, 128, 128],
-                    [128, 0, 256, 128],
-                    [256, 0, 300, 128],
-                    [0, 128, 128, 256],
-                    [128, 128, 256, 256],
-                    [256, 128, 300, 256],
-                    [0, 256, 128, 300],
-                    [128, 256, 256, 300],
-                    [256, 256, 300, 300],
+                    [0, 0, 64, 64],
+                    [64, 0, 128, 64],
+                    [128, 0, 192, 64],
+                    [0, 64, 64, 128],
+                    [64, 64, 128, 128],
+                    [128, 64, 192, 128],
                 ]
             ),
         ),
-        # Case 4: Overlap of 32 pixels, image not perfectly divisible by slice size
+        # Case 4: Rectangle image (horizontal), square slices, non-zero overlap
         (
-            (300, 300),
-            (128, 128),
+            (192, 128),
+            (64, 64),
             (32, 32),
             np.array(
                 [
-                    [0, 0, 128, 128],
-                    [96, 0, 224, 128],
-                    [192, 0, 300, 128],
-                    [288, 0, 300, 128],
-                    [0, 96, 128, 224],
-                    [96, 96, 224, 224],
-                    [192, 96, 300, 224],
-                    [288, 96, 300, 224],
-                    [0, 192, 128, 300],
-                    [96, 192, 224, 300],
-                    [192, 192, 300, 300],
-                    [288, 192, 300, 300],
-                    [0, 288, 128, 300],
-                    [96, 288, 224, 300],
-                    [192, 288, 300, 300],
-                    [288, 288, 300, 300],
+                    [0, 0, 64, 64],
+                    [32, 0, 96, 64],
+                    [64, 0, 128, 64],
+                    [96, 0, 160, 64],
+                    [128, 0, 192, 64],
+                    [0, 32, 64, 96],
+                    [32, 32, 96, 96],
+                    [64, 32, 128, 96],
+                    [96, 32, 160, 96],
+                    [128, 32, 192, 96],
+                    [0, 64, 64, 128],
+                    [32, 64, 96, 128],
+                    [64, 64, 128, 128],
+                    [96, 64, 160, 128],
+                    [128, 64, 192, 128],
                 ]
             ),
         ),
-        # Case 5: Image smaller than slice size (no overlap)
+        # Case 5: Rectangle image (vertical), square slices, no overlap
         (
-            (100, 100),
-            (128, 128),
+            (128, 192),
+            (64, 64),
             (0, 0),
             np.array(
                 [
-                    [0, 0, 100, 100],
+                    [0, 0, 64, 64],
+                    [64, 0, 128, 64],
+                    [0, 64, 64, 128],
+                    [64, 64, 128, 128],
+                    [0, 128, 64, 192],
+                    [64, 128, 128, 192],
+                ]
+            ),
+        ),
+        # Case 6: Rectangle image (vertical), square slices, non-zero overlap
+        (
+            (128, 192),
+            (64, 64),
+            (32, 32),
+            np.array(
+                [
+                    [0, 0, 64, 64],
+                    [32, 0, 96, 64],
+                    [64, 0, 128, 64],
+                    [0, 32, 64, 96],
+                    [32, 32, 96, 96],
+                    [64, 32, 128, 96],
+                    [0, 64, 64, 128],
+                    [32, 64, 96, 128],
+                    [64, 64, 128, 128],
+                    [0, 96, 64, 160],
+                    [32, 96, 96, 160],
+                    [64, 96, 128, 160],
+                    [0, 128, 64, 192],
+                    [32, 128, 96, 192],
+                    [64, 128, 128, 192],
+                ]
+            ),
+        ),
+        # Case 7: Square image, rectangular slices (horizontal), no overlap
+        (
+            (160, 160),
+            (80, 40),
+            (0, 0),
+            np.array(
+                [
+                    [0, 0, 80, 40],
+                    [80, 0, 160, 40],
+                    [0, 40, 80, 80],
+                    [80, 40, 160, 80],
+                    [0, 80, 80, 120],
+                    [80, 80, 160, 120],
+                    [0, 120, 80, 160],
+                    [80, 120, 160, 160],
+                ]
+            ),
+        ),
+        # Case 8: Square image, rectangular slices (vertical), non-zero overlap
+        (
+            (160, 160),
+            (40, 80),
+            (10, 20),
+            np.array(
+                [
+                    [0, 0, 40, 80],
+                    [30, 0, 70, 80],
+                    [60, 0, 100, 80],
+                    [90, 0, 130, 80],
+                    [120, 0, 160, 80],
+                    [0, 60, 40, 140],
+                    [30, 60, 70, 140],
+                    [60, 60, 100, 140],
+                    [90, 60, 130, 140],
+                    [120, 60, 160, 140],
+                    [0, 80, 40, 160],
+                    [30, 80, 70, 160],
+                    [60, 80, 100, 160],
+                    [90, 80, 130, 160],
+                    [120, 80, 160, 160],
                 ]
             ),
         ),

From c8f5c741fd1da66eda796c4746aaa12f87876bb6 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 17:55:07 +0100
Subject: [PATCH 097/124] reimplement `crop_image` to skip type casting when
 Pillow Image given

`get_image_resolution_wh` added
---
 docs/utils/image.md        |  6 ++++
 supervision/__init__.py    |  2 ++
 supervision/utils/image.py | 70 ++++++++++++++++++++++++++++++++++++--
 test/utils/test_image.py   | 68 +++++++++++++++++++++++++++++++++++-
 4 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/docs/utils/image.md b/docs/utils/image.md
index 17d94eac36..9d1c1895ca 100644
--- a/docs/utils/image.md
+++ b/docs/utils/image.md
@@ -41,6 +41,12 @@ status: new
 
 :::supervision.utils.image.grayscale_image
 
+<div class="md-typeset">
+    <h2><a href="#supervision.utils.image.get_image_resolution_wh">get_image_resolution_wh</a></h2>
+</div>
+
+:::supervision.utils.image.get_image_resolution_wh
+
 <div class="md-typeset">
     <h2><a href="#supervision.utils.image.ImageSink">ImageSink</a></h2>
 </div>
diff --git a/supervision/__init__.py b/supervision/__init__.py
index ccd2729308..1b6ac80bb8 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -129,6 +129,7 @@
     resize_image,
     scale_image,
     tint_image,
+    get_image_resolution_wh,
 )
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
@@ -163,6 +164,7 @@
     "DetectionsSmoother",
     "DotAnnotator",
     "EdgeAnnotator",
+    "get_image_resolution_wh",
     "EllipseAnnotator",
     "FPSMonitor",
     "HaloAnnotator",
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index e8931f2194..dad2027798 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -6,6 +6,7 @@
 import cv2
 import numpy as np
 import numpy.typing as npt
+from PIL import Image
 
 from supervision.annotators.base import ImageType
 from supervision.draw.color import Color, unify_to_bgr
@@ -15,7 +16,6 @@
 from supervision.utils.internal import deprecated
 
 
-@ensure_cv2_image_for_standalone_function
 def crop_image(
     image: ImageType,
     xyxy: npt.NDArray[int] | list[int] | tuple[int, int, int, int],
@@ -65,9 +65,20 @@ def crop_image(
     """  # noqa E501 // docs
     if isinstance(xyxy, (list, tuple)):
         xyxy = np.array(xyxy)
+
     xyxy = np.round(xyxy).astype(int)
     x_min, y_min, x_max, y_max = xyxy.flatten()
-    return image[y_min:y_max, x_min:x_max]
+
+    if isinstance(image, np.ndarray):
+        return image[y_min:y_max, x_min:x_max]
+
+    if isinstance(image, Image.Image):
+        return image.crop((x_min, y_min, x_max, y_max))
+
+    raise TypeError(
+        "`image` must be a numpy.ndarray or PIL.Image.Image. "
+        f"Received {type(image)}"
+    )
 
 
 @ensure_cv2_image_for_standalone_function
@@ -460,6 +471,61 @@ def grayscale_image(image: ImageType) -> ImageType:
     return cv2.cvtColor(grayscaled, cv2.COLOR_GRAY2BGR)
 
 
+def get_image_resolution_wh(image: ImageType) -> tuple[int, int]:
+    """
+    Get image width and height as a tuple `(width, height)` for various image formats.
+
+    Supports both `numpy.ndarray` images (with shape `(H, W, ...)`) and
+    `PIL.Image.Image` inputs.
+
+    Args:
+        image (`numpy.ndarray` or `PIL.Image.Image`): Input image.
+
+    Returns:
+        (`tuple[int, int]`): Image resolution as `(width, height)`.
+
+    Raises:
+        ValueError: If a `numpy.ndarray` image has fewer than 2 dimensions.
+        TypeError: If `image` is not a supported type (`numpy.ndarray` or
+            `PIL.Image.Image`).
+
+    Examples:
+        ```python
+        import cv2
+        import supervision as sv
+
+        image = cv2.imread("example.png")
+        sv.get_image_resolution_wh(image)
+        # (1920, 1080)
+        ```
+
+        ```python
+        from PIL import Image
+        import supervision as sv
+
+        image = Image.open("example.png")
+        sv.get_image_resolution_wh(image)
+        # (1920, 1080)
+        ```
+    """
+    if isinstance(image, np.ndarray):
+        if image.ndim < 2:
+            raise ValueError(
+                "NumPy image must have at least 2 dimensions (H, W, ...). "
+                f"Received shape: {image.shape}"
+            )
+        height, width = image.shape[:2]
+        return int(width), int(height)
+
+    if isinstance(image, Image.Image):
+        width, height = image.size
+        return int(width), int(height)
+
+    raise TypeError(
+        "`image` must be a numpy.ndarray or PIL.Image.Image. "
+        f"Received type: {type(image)}"
+    )
+
 class ImageSink:
     def __init__(
         self,
diff --git a/test/utils/test_image.py b/test/utils/test_image.py
index 6ae9567b99..8dbd5b5988 100644
--- a/test/utils/test_image.py
+++ b/test/utils/test_image.py
@@ -1,7 +1,9 @@
 import numpy as np
+import pytest
 from PIL import Image, ImageChops
 
-from supervision.utils.image import letterbox_image, resize_image
+from supervision.utils.image import letterbox_image, resize_image, crop_image, \
+    get_image_resolution_wh
 
 
 def test_resize_image_for_opencv_image() -> None:
@@ -94,3 +96,67 @@ def test_letterbox_image_for_pillow_image() -> None:
     assert difference.getbbox() is None, (
         "Expected padding to be added top and bottom with padding added top and bottom"
     )
+
+
+@pytest.mark.parametrize(
+    "image, xyxy, expected_size",
+    [
+        # NumPy RGB
+        (
+            np.zeros((4, 6, 3), dtype=np.uint8),
+            (2, 1, 5, 3),
+            (3, 2),   # width = 5-2, height = 3-1
+        ),
+
+        # NumPy grayscale
+        (
+            np.zeros((5, 5), dtype=np.uint8),
+            (1, 1, 4, 4),
+            (3, 3),
+        ),
+
+        # Pillow RGB
+        (
+            Image.new("RGB", (6, 4), color=0),
+            (2, 1, 5, 3),
+            (3, 2),
+        ),
+
+        # Pillow grayscale
+        (
+            Image.new("L", (5, 5), color=0),
+            (1, 1, 4, 4),
+            (3, 3),
+        ),
+    ],
+)
+def test_crop_image(image, xyxy, expected_size):
+    cropped = crop_image(image=image, xyxy=xyxy)
+    if isinstance(image, np.ndarray):
+        assert isinstance(cropped, np.ndarray)
+        assert cropped.shape[1] == expected_size[0]  # width
+        assert cropped.shape[0] == expected_size[1]  # height
+    else:
+        assert isinstance(cropped, Image.Image)
+        assert cropped.size == expected_size
+
+
+@pytest.mark.parametrize(
+    "image, expected",
+    [
+        # NumPy RGB
+        (np.zeros((4, 6, 3), dtype=np.uint8), (6, 4)),
+
+        # NumPy grayscale
+        (np.zeros((10, 20), dtype=np.uint8), (20, 10)),
+
+        # Pillow RGB
+        (Image.new("RGB", (6, 4), color=0), (6, 4)),
+
+        # Pillow grayscale
+        (Image.new("L", (20, 10), color=0), (20, 10)),
+    ],
+)
+def test_get_image_resolution_wh(image, expected):
+    resolution = get_image_resolution_wh(image)
+    assert resolution == expected
\ No newline at end of file

From e82b5b9d74811acb00d075b0f1f6244d5b0a9d35 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 14 Nov 2025 16:58:19 +0000
Subject: [PATCH 098/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py                        |  4 ++--
 .../detection/tools/inference_slicer.py        |  8 +++-----
 supervision/utils/image.py                     |  4 ++--
 test/detection/tools/test_inference_slicer.py  |  3 ++-
 test/utils/test_image.py                       | 18 ++++++++----------
 5 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 1b6ac80bb8..00820076fe 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -123,13 +123,13 @@
 from supervision.utils.image import (
     ImageSink,
     crop_image,
+    get_image_resolution_wh,
     grayscale_image,
     letterbox_image,
     overlay_image,
     resize_image,
     scale_image,
     tint_image,
-    get_image_resolution_wh,
 )
 from supervision.utils.notebook import plot_image, plot_images_grid
 from supervision.utils.video import (
@@ -164,7 +164,6 @@
     "DetectionsSmoother",
     "DotAnnotator",
     "EdgeAnnotator",
-    "get_image_resolution_wh",
     "EllipseAnnotator",
     "FPSMonitor",
     "HaloAnnotator",
@@ -225,6 +224,7 @@
     "filter_segments_by_distance",
     "fuzzy_match_index",
     "get_coco_class_index_mapping",
+    "get_image_resolution_wh",
     "get_polygon_center",
     "get_video_frames_generator",
     "grayscale_image",
diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index dc8a11872c..a0a5cab7c7 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -12,9 +12,7 @@
 from supervision.detection.utils.iou_and_nms import OverlapFilter, OverlapMetric
 from supervision.detection.utils.masks import move_masks
 from supervision.utils.image import crop_image
-from supervision.utils.internal import (
-    SupervisionWarnings
-)
+from supervision.utils.internal import SupervisionWarnings
 
 
 def move_detections(
@@ -257,8 +255,8 @@ def _compute_axis_starts(
 
     @staticmethod
     def _validate_overlap(
-            slice_wh: tuple[int, int],
-            overlap_wh: tuple[int, int],
+        slice_wh: tuple[int, int],
+        overlap_wh: tuple[int, int],
     ) -> None:
         if not isinstance(overlap_wh, tuple) or len(overlap_wh) != 2:
             raise ValueError(
diff --git a/supervision/utils/image.py b/supervision/utils/image.py
index dad2027798..4d4348d69f 100644
--- a/supervision/utils/image.py
+++ b/supervision/utils/image.py
@@ -76,8 +76,7 @@ def crop_image(
         return image.crop((x_min, y_min, x_max, y_max))
 
     raise TypeError(
-        "`image` must be a numpy.ndarray or PIL.Image.Image. "
-        f"Received {type(image)}"
+        f"`image` must be a numpy.ndarray or PIL.Image.Image. Received {type(image)}"
     )
 
 
@@ -526,6 +525,7 @@ def get_image_resolution_wh(image: ImageType) -> tuple[int, int]:
         f"Received type: {type(image)}"
     )
 
+
 class ImageSink:
     def __init__(
         self,
diff --git a/test/detection/tools/test_inference_slicer.py b/test/detection/tools/test_inference_slicer.py
index d24b92b245..7c313841f3 100644
--- a/test/detection/tools/test_inference_slicer.py
+++ b/test/detection/tools/test_inference_slicer.py
@@ -16,6 +16,7 @@ def callback(_: np.ndarray) -> Detections:
 
     return callback
 
+
 @pytest.mark.parametrize(
     "resolution_wh, slice_wh, overlap_wh, expected_offsets",
     [
@@ -193,4 +194,4 @@ def test_generate_offset(
 
     assert np.array_equal(offsets, expected_offsets), (
         f"Expected {expected_offsets}, got {offsets}"
-    )
\ No newline at end of file
+    )
diff --git a/test/utils/test_image.py b/test/utils/test_image.py
index 8dbd5b5988..688f938b70 100644
--- a/test/utils/test_image.py
+++ b/test/utils/test_image.py
@@ -2,8 +2,12 @@
 import pytest
 from PIL import Image, ImageChops
 
-from supervision.utils.image import letterbox_image, resize_image, crop_image, \
-    get_image_resolution_wh
+from supervision.utils.image import (
+    crop_image,
+    get_image_resolution_wh,
+    letterbox_image,
+    resize_image,
+)
 
 
 def test_resize_image_for_opencv_image() -> None:
@@ -105,23 +109,20 @@ def test_letterbox_image_for_pillow_image() -> None:
         (
             np.zeros((4, 6, 3), dtype=np.uint8),
             (2, 1, 5, 3),
-            (3, 2),   # width = 5-2, height = 3-1
+            (3, 2),  # width = 5-2, height = 3-1
         ),
-
         # NumPy grayscale
         (
             np.zeros((5, 5), dtype=np.uint8),
             (1, 1, 4, 4),
             (3, 3),
         ),
-
         # Pillow RGB
         (
             Image.new("RGB", (6, 4), color=0),
             (2, 1, 5, 3),
             (3, 2),
         ),
-
         # Pillow grayscale
         (
             Image.new("L", (5, 5), color=0),
@@ -146,17 +147,14 @@ def test_crop_image(image, xyxy, expected_size):
     [
         # NumPy RGB
         (np.zeros((4, 6, 3), dtype=np.uint8), (6, 4)),
-
         # NumPy grayscale
         (np.zeros((10, 20), dtype=np.uint8), (20, 10)),
-
         # Pillow RGB
         (Image.new("RGB", (6, 4), color=0), (6, 4)),
-
         # Pillow grayscale
         (Image.new("L", (20, 10), color=0), (20, 10)),
     ],
 )
 def test_get_image_resolution_wh(image, expected):
     resolution = get_image_resolution_wh(image)
-    assert resolution == expected
\ No newline at end of file
+    assert resolution == expected

From 15364ac9a3958a529ec2ad0db60fa43043d2f500 Mon Sep 17 00:00:00 2001
From: AnonymDevOSS <rafel.bennasar+github@proton.me>
Date: Fri, 14 Nov 2025 21:45:06 +0100
Subject: [PATCH 099/124] Rename process_video_threads to process_video and
 remove legacy implementation and redundant tests

---
 supervision/utils/video.py       | 65 +---------------------
 test/utils/test_process_video.py | 95 --------------------------------
 2 files changed, 1 insertion(+), 159 deletions(-)
 delete mode 100644 test/utils/test_process_video.py

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 5fb2c4bf97..dbf63a985d 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -195,69 +195,6 @@ def get_video_frames_generator(
 
 
 def process_video(
-    source_path: str,
-    target_path: str,
-    callback: Callable[[np.ndarray, int], np.ndarray],
-    max_frames: int | None = None,
-    show_progress: bool = False,
-    progress_message: str = "Processing video",
-) -> None:
-    """
-    Process a video file by applying a callback function on each frame
-        and saving the result to a target video file.
-
-    Args:
-        source_path (str): The path to the source video file.
-        target_path (str): The path to the target video file.
-        callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in
-            a numpy ndarray representation of a video frame and an
-            int index of the frame and returns a processed numpy ndarray
-            representation of the frame.
-        max_frames (Optional[int]): The maximum number of frames to process.
-        show_progress (bool): Whether to show a progress bar.
-        progress_message (str): The message to display in the progress bar.
-
-    Examples:
-        ```python
-        import supervision as sv
-
-        def callback(scene: np.ndarray, index: int) -> np.ndarray:
-            ...
-
-        process_video(
-            source_path=<SOURCE_VIDEO_PATH>,
-            target_path=<TARGET_VIDEO_PATH>,
-            callback=callback
-        )
-        ```
-    """
-    source_video_info = VideoInfo.from_video_path(video_path=source_path)
-    video_frames_generator = get_video_frames_generator(
-        source_path=source_path, end=max_frames
-    )
-    with VideoSink(target_path=target_path, video_info=source_video_info) as sink:
-        total_frames = (
-            min(source_video_info.total_frames, max_frames)
-            if max_frames is not None
-            else source_video_info.total_frames
-        )
-        for index, frame in enumerate(
-            tqdm(
-                video_frames_generator,
-                total=total_frames,
-                disable=not show_progress,
-                desc=progress_message,
-            )
-        ):
-            result_frame = callback(frame, index)
-            sink.write_frame(frame=result_frame)
-        else:
-            for index, frame in enumerate(video_frames_generator):
-                result_frame = callback(frame, index)
-                sink.write_frame(frame=result_frame)
-
-
-def process_video_threads(
     source_path: str,
     target_path: str,
     callback: Callable[[np.ndarray, int], np.ndarray],
@@ -266,7 +203,7 @@ def process_video_threads(
     prefetch: int = 32,
     writer_buffer: int = 32,
     show_progress: bool = False,
-    progress_message: str = "Processing video (with threads)",
+    progress_message: str = "Processing video",
 ) -> None:
     """
     Process a video using a threaded pipeline that asynchronously
diff --git a/test/utils/test_process_video.py b/test/utils/test_process_video.py
deleted file mode 100644
index 2c3c68a9bb..0000000000
--- a/test/utils/test_process_video.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from pathlib import Path
-
-import cv2
-import numpy as np
-import pytest
-
-import supervision as sv
-
-
-def make_video(
-    path: Path, w: int = 160, h: int = 96, fps: int = 20, frames: int = 24
-) -> None:
-    """Create a small synthetic test video with predictable frame-colors."""
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    writer = cv2.VideoWriter(str(path), fourcc, fps, (w, h))
-    assert writer.isOpened(), "Failed to open VideoWriter"
-    for i in range(frames):
-        v = (i * 11) % 250
-        frame = np.full((h, w, 3), (v, 255 - v, (2 * v) % 255), np.uint8)
-        writer.write(frame)
-    writer.release()
-
-
-def read_frames(path: Path) -> list[np.ndarray]:
-    """Read all frames from a video into memory."""
-    cap = cv2.VideoCapture(str(path))
-    assert cap.isOpened(), f"Cannot open video: {path}"
-    out = []
-    while True:
-        ok, frame = cap.read()
-        if not ok:
-            break
-        out.append(frame)
-    cap.release()
-    return out
-
-
-def frames_equal(a: np.ndarray, b: np.ndarray, max_abs_tol: int = 0) -> bool:
-    """Return True if frames are the same within acertain tolerance."""
-    if a.shape != b.shape:
-        return False
-    diff = np.abs(a.astype(np.int16) - b.astype(np.int16))
-    return diff.max() <= max_abs_tol
-
-
-def callback_noop(frame: np.ndarray, idx: int) -> np.ndarray:
-    """No-op callback: validates pure pipeline correctness."""
-    return frame
-
-
-def callbackb_opencv(frame: np.ndarray, idx: int) -> np.ndarray:
-    """
-    Simulations some cv2 task...
-    """
-    g = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-    return cv2.cvtColor(g, cv2.COLOR_GRAY2BGR)
-
-
-@pytest.mark.parametrize(
-    "callback", [callback_noop, callbackb_opencv], ids=["identity", "opencv"]
-)
-def test_process_video_vs_threads_same_output(callback, tmp_path: Path):
-    """
-    Ensure that process_video() and process_video_threads() produce identical
-    results for the same synthetic source video and callback.
-    """
-    name = callback.__name__
-    src = tmp_path / f"src_{name}.mp4"
-    dst_single = tmp_path / f"out_single_{name}.mp4"
-    dst_threads = tmp_path / f"out_threads_{name}.mp4"
-
-    make_video(src, frames=24)
-
-    sv.utils.video.process_video(
-        source_path=str(src),
-        target_path=str(dst_single),
-        callback=callback,
-        show_progress=False,
-    )
-    sv.utils.video.process_video_threads(
-        source_path=str(src),
-        target_path=str(dst_threads),
-        callback=callback,
-        prefetch=4,
-        writer_buffer=4,
-        show_progress=False,
-    )
-
-    frames_single = read_frames(dst_single)
-    frames_threads = read_frames(dst_threads)
-
-    assert len(frames_single) == len(frames_threads) != 0, "Frame count mismatch."
-
-    for i, (fs, ft) in enumerate(zip(frames_single, frames_threads)):
-        assert frames_equal(fs, ft), f"Frame {i} is different."

From fc0e133ea9f1dce65ffe7a6cfd4798ba44d6b058 Mon Sep 17 00:00:00 2001
From: AnonymDevOSS <rafel.bennasar+github@proton.me>
Date: Fri, 14 Nov 2025 21:53:14 +0100
Subject: [PATCH 100/124] added example as it was delited by mistake

---
 supervision/utils/video.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index dbf63a985d..7227b92b0e 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -251,6 +251,18 @@ def process_video(
         - When the callback function is Python-heavy and GIL-bound. In that case,
           using a process-based approach is more effective.
 
+    Examples:
+        ```python
+        import supervision as sv
+        def callback(scene: np.ndarray, index: int) -> np.ndarray:
+            ...
+        process_video(
+            source_path=<SOURCE_VIDEO_PATH>,
+            target_path=<TARGET_VIDEO_PATH>,
+            callback=callback
+        )
+        ```
+
     Args:
         source_path (str): The path to the source video file.
         target_path (str): The path to the target video file.

From 2c522a9931fd5cba05e50a578c00d61425a6b03c Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Fri, 14 Nov 2025 23:14:34 +0100
Subject: [PATCH 101/124] improved `InferenceSlicer` docstring

---
 .../detection/tools/inference_slicer.py       | 254 +++++++++++-------
 1 file changed, 156 insertions(+), 98 deletions(-)

diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index a0a5cab7c7..1946fcee75 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -11,7 +11,8 @@
 from supervision.detection.utils.boxes import move_boxes, move_oriented_boxes
 from supervision.detection.utils.iou_and_nms import OverlapFilter, OverlapMetric
 from supervision.detection.utils.masks import move_masks
-from supervision.utils.image import crop_image
+from supervision.draw.base import ImageType
+from supervision.utils.image import crop_image, get_image_resolution_wh
 from supervision.utils.internal import SupervisionWarnings
 
 
@@ -50,91 +51,98 @@ def move_detections(
 
 class InferenceSlicer:
     """
-    InferenceSlicer performs slicing-based inference for small target detection. This
-    method, often referred to as
-    [Slicing Adaptive Inference (SAHI)](https://ieeexplore.ieee.org/document/9897990),
-    involves dividing a larger image into smaller slices, performing inference on each
-    slice, and then merging the detections.
+    Perform tiled inference on large images by slicing them into overlapping patches.
+
+    This class divides an input image into overlapping slices of configurable size
+    and overlap, runs inference on each slice through a user-provided callback, and
+    merges the resulting detections. The slicing process allows efficient processing
+    of large images with limited resources while preserving detection accuracy via
+    configurable overlap and post-processing of overlaps. Uses multi-threading for
+    parallel slice inference.
 
     Args:
-        slice_wh (Tuple[int, int]): Dimensions of each slice measured in pixels. The
-            tuple should be in the format `(width, height)`.
-        overlap_wh (Tuple[int, int]): A tuple representing the desired
-            overlap for width and height between consecutive slices measured in pixels.
-            Each value must be greater than or equal to 0.
-        overlap_filter (Union[OverlapFilter, str]): Strategy for
-            filtering or merging overlapping detections in slices.
-        iou_threshold (float): Intersection over Union (IoU) threshold
-            used when filtering by overlap.
-        overlap_metric (Union[OverlapMetric, str]): Metric used for matching detections
-            in slices.
-        callback (Callable): A function that performs inference on a given image
-            slice and returns detections.
-        thread_workers (int): Number of threads for parallel execution.
-
-    Note:
-        The class ensures that slices do not exceed the boundaries of the original
-        image. As a result, the final slices in the row and column dimensions might be
-        smaller than the specified slice dimensions if the image's width or height is
-        not a multiple of the slice's width or height minus the overlap.
+        callback (Callable[[ImageType], Detections]): Inference function that takes
+            a sliced image and returns a `Detections` object.
+        slice_wh (int or tuple[int, int]): Size of each slice `(width, height)`.
+            If int, both width and height are set to this value.
+        overlap_wh (int or tuple[int, int]): Overlap size `(width, height)` between
+            slices. If int, both width and height are set to this value.
+        overlap_filter (OverlapFilter or str): Strategy to merge overlapping
+            detections (`NON_MAX_SUPPRESSION`, `NON_MAX_MERGE`, or `NONE`).
+        iou_threshold (float): IOU threshold used in merging overlap filtering.
+        overlap_metric (OverlapMetric or str): Metric to compute overlap
+            (`IOU` or `IOS`).
+        thread_workers (int): Number of threads for concurrent slice inference.
+
+    Raises:
+        ValueError: If `slice_wh` or `overlap_wh` are invalid or inconsistent.
+
+    Example:
+        ```python
+        import cv2
+        import supervision as sv
+        from rfdetr import RFDETRMedium
+
+        def callback(tile):
+            return model.predict(tile)
+
+        slicer = sv.InferenceSlicer(callback, slice_wh=640, overlap_wh=100)
+
+        image = cv2.imread("example.png")
+        detections = slicer(image)
+        ```
+
+        ```python
+        import supervision as sv
+        from PIL import Image
+        from ultralytics import YOLO
+
+        def callback(tile):
+            results = model(tile)[0]
+            return sv.Detections.from_ultralytics(results)
+
+        slicer = sv.InferenceSlicer(callback, slice_wh=640, overlap_wh=100)
+
+        image = Image.open("example.png")
+        detections = slicer(image)
+        ```
     """
-
     def __init__(
         self,
-        callback: Callable[[np.ndarray], Detections],
-        slice_wh: tuple[int, int] = (640, 640),
-        overlap_wh: tuple[int, int] = (100, 100),
+        callback: Callable[[ImageType], Detections],
+        slice_wh: int | tuple[int, int] = 640,
+        overlap_wh: int | tuple[int, int] = 100,
         overlap_filter: OverlapFilter | str = OverlapFilter.NON_MAX_SUPPRESSION,
         iou_threshold: float = 0.5,
         overlap_metric: OverlapMetric | str = OverlapMetric.IOU,
         thread_workers: int = 1,
     ):
-        self.overlap_wh = overlap_wh
-        self.slice_wh = slice_wh
-        self._validate_overlap(slice_wh=self.slice_wh, overlap_wh=overlap_wh)
+        slice_wh_norm = self._normalize_slice_wh(slice_wh)
+        overlap_wh_norm = self._normalize_overlap_wh(overlap_wh)
+
+        self._validate_overlap(slice_wh=slice_wh_norm, overlap_wh=overlap_wh_norm)
+
+        self.slice_wh = slice_wh_norm
+        self.overlap_wh = overlap_wh_norm
         self.iou_threshold = iou_threshold
         self.overlap_metric = OverlapMetric.from_value(overlap_metric)
         self.overlap_filter = OverlapFilter.from_value(overlap_filter)
         self.callback = callback
         self.thread_workers = thread_workers
 
-    def __call__(self, image: np.ndarray) -> Detections:
+    def __call__(self, image: ImageType) -> Detections:
         """
-        Performs slicing-based inference on the provided image using the specified
-            callback.
+        Perform tiled inference on the full image and return merged detections.
 
         Args:
-            image (np.ndarray): The input image on which inference needs to be
-                performed. The image should be in the format
-                `(height, width, channels)`.
+            image (ImageType): The full image to run inference on.
 
         Returns:
-            Detections: A collection of detections for the entire image after merging
-                results from all slices and applying NMS.
-
-        Example:
-            ```python
-            import cv2
-            import supervision as sv
-            from ultralytics import YOLO
-
-            image = cv2.imread(SOURCE_IMAGE_PATH)
-            model = YOLO(...)
-
-            def callback(image_slice: np.ndarray) -> sv.Detections:
-                result = model(image_slice)[0]
-                return sv.Detections.from_ultralytics(result)
-
-            slicer = sv.InferenceSlicer(
-                callback=callback,
-                overlap_filter=sv.OverlapFilter.NON_MAX_SUPPRESSION,
-            )
-
-            detections = slicer(image)
-            ```
+            Detections: Merged detections across all slices.
         """
-        detections_list = []
-        resolution_wh = (image.shape[1], image.shape[0])
+        detections_list: list[Detections] = []
+        resolution_wh = get_image_resolution_wh(image)
+
         offsets = self._generate_offset(
             resolution_wh=resolution_wh,
             slice_wh=self.slice_wh,
@@ -151,42 +159,100 @@ def callback(image_slice: np.ndarray) -> sv.Detections:
         merged = Detections.merge(detections_list=detections_list)
         if self.overlap_filter == OverlapFilter.NONE:
             return merged
-        elif self.overlap_filter == OverlapFilter.NON_MAX_SUPPRESSION:
+        if self.overlap_filter == OverlapFilter.NON_MAX_SUPPRESSION:
             return merged.with_nms(
-                threshold=self.iou_threshold, overlap_metric=self.overlap_metric
+                threshold=self.iou_threshold,
+                overlap_metric=self.overlap_metric,
             )
-        elif self.overlap_filter == OverlapFilter.NON_MAX_MERGE:
+        if self.overlap_filter == OverlapFilter.NON_MAX_MERGE:
             return merged.with_nmm(
-                threshold=self.iou_threshold, overlap_metric=self.overlap_metric
+                threshold=self.iou_threshold,
+                overlap_metric=self.overlap_metric,
             )
-        else:
-            warnings.warn(
-                f"Invalid overlap filter strategy: {self.overlap_filter}",
-                category=SupervisionWarnings,
-            )
-            return merged
 
-    def _run_callback(self, image, offset) -> Detections:
+        warnings.warn(
+            f"Invalid overlap filter strategy: {self.overlap_filter}",
+            category=SupervisionWarnings,
+        )
+        return merged
+
+    def _run_callback(self, image: ImageType, offset: np.ndarray) -> Detections:
         """
-        Run the provided callback on a slice of an image.
+        Run detection callback on a sliced portion of the image and adjust coordinates.
 
         Args:
-            image (np.ndarray): The input image on which inference needs to run
-            offset (np.ndarray): An array of shape `(4,)` containing coordinates
-                for the slice.
+            image (ImageType): The full image.
+            offset (numpy.ndarray): Coordinates `(x_min, y_min, x_max, y_max)` defining
+                the slice region.
 
         Returns:
-            Detections: A collection of detections for the slice.
+            Detections: Detections adjusted to the full image coordinate system.
         """
-        image_slice = crop_image(image=image, xyxy=offset)
+        image_slice: ImageType = crop_image(image=image, xyxy=offset)
         detections = self.callback(image_slice)
-        resolution_wh = (image.shape[1], image.shape[0])
+        resolution_wh = get_image_resolution_wh(image)
+
         detections = move_detections(
-            detections=detections, offset=offset[:2], resolution_wh=resolution_wh
+            detections=detections,
+            offset=offset[:2],
+            resolution_wh=resolution_wh,
         )
-
         return detections
 
+    @staticmethod
+    def _normalize_slice_wh(
+        slice_wh: int | tuple[int, int],
+    ) -> tuple[int, int]:
+        if isinstance(slice_wh, int):
+            if slice_wh <= 0:
+                raise ValueError(
+                    "`slice_wh` must be a positive integer. "
+                    f"Received: {slice_wh}"
+                )
+            return slice_wh, slice_wh
+
+        if isinstance(slice_wh, tuple) and len(slice_wh) == 2:
+            width, height = slice_wh
+            if width <= 0 or height <= 0:
+                raise ValueError(
+                    "`slice_wh` values must be positive. "
+                    f"Received: {slice_wh}"
+                )
+            return width, height
+
+        raise ValueError(
+            "`slice_wh` must be an int or a tuple of two positive integers "
+            "(slice_w, slice_h). "
+            f"Received: {slice_wh}"
+        )
+
+    @staticmethod
+    def _normalize_overlap_wh(
+        overlap_wh: int | tuple[int, int],
+    ) -> tuple[int, int]:
+        if isinstance(overlap_wh, int):
+            if overlap_wh < 0:
+                raise ValueError(
+                    "`overlap_wh` must be a non negative integer. "
+                    f"Received: {overlap_wh}"
+                )
+            return overlap_wh, overlap_wh
+
+        if isinstance(overlap_wh, tuple) and len(overlap_wh) == 2:
+            overlap_w, overlap_h = overlap_wh
+            if overlap_w < 0 or overlap_h < 0:
+                raise ValueError(
+                    "`overlap_wh` values must be non negative. "
+                    f"Received: {overlap_wh}"
+                )
+            return overlap_w, overlap_h
+
+        raise ValueError(
+            "`overlap_wh` must be an int or a tuple of two non negative integers "
+            "(overlap_w, overlap_h). "
+            f"Received: {overlap_wh}"
+        )
+
     @staticmethod
     def _generate_offset(
         resolution_wh: tuple[int, int],
@@ -194,16 +260,16 @@ def _generate_offset(
         overlap_wh: tuple[int, int],
     ) -> np.ndarray:
         """
-        Generate offset coordinates for slicing an image based on the given resolution,
-        slice dimensions, and pixel overlap.
+        Generate bounding boxes defining the coordinates of image slices with overlap.
 
         Args:
-            resolution_wh (Tuple[int, int]): Width and height of the image to be sliced.
-            slice_wh (Tuple[int, int]): Dimensions of each slice in pixels (width, height).
-            overlap_wh (Tuple[int, int]): Overlap in pixels (overlap_w, overlap_h).
+            resolution_wh (tuple[int, int]): Image resolution `(width, height)`.
+            slice_wh (tuple[int, int]): Size of each slice `(width, height)`.
+            overlap_wh (tuple[int, int]): Overlap size between slices `(width, height)`.
 
         Returns:
-            np.ndarray: Array of shape (n, 4) with [x_min, y_min, x_max, y_max] slices.
+            numpy.ndarray: Array of shape `(num_slices, 4)` with each row as
+                `(x_min, y_min, x_max, y_max)` coordinates for a slice.
         """
         slice_width, slice_height = slice_wh
         image_width, image_height = resolution_wh
@@ -220,11 +286,9 @@ def _compute_axis_starts(
             if image_size <= slice_size:
                 return [0]
 
-            # No overlap case, preserve original behavior, no overlapping tiles
             if stride == slice_size:
                 return np.arange(0, image_size, stride).tolist()
 
-            # Overlap case, ensure last tile touches the border without redundancy
             last_start = image_size - slice_size
             starts = np.arange(0, last_start, stride).tolist()
             if not starts or starts[-1] != last_start:
@@ -258,12 +322,6 @@ def _validate_overlap(
         slice_wh: tuple[int, int],
         overlap_wh: tuple[int, int],
     ) -> None:
-        if not isinstance(overlap_wh, tuple) or len(overlap_wh) != 2:
-            raise ValueError(
-                "`overlap_wh` must be a tuple of two non-negative values "
-                "(overlap_w, overlap_h)."
-            )
-
         overlap_w, overlap_h = overlap_wh
         slice_w, slice_h = slice_wh
 

From c8223cbab2182183e365ce349576c5a5391a508a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 14 Nov 2025 22:15:33 +0000
Subject: [PATCH 102/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/tools/inference_slicer.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index 1946fcee75..103b3fa466 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -107,6 +107,7 @@ def callback(tile):
         detections = slicer(image)
         ```
     """
+
     def __init__(
         self,
         callback: Callable[[ImageType], Detections],
@@ -206,8 +207,7 @@ def _normalize_slice_wh(
         if isinstance(slice_wh, int):
             if slice_wh <= 0:
                 raise ValueError(
-                    "`slice_wh` must be a positive integer. "
-                    f"Received: {slice_wh}"
+                    f"`slice_wh` must be a positive integer. Received: {slice_wh}"
                 )
             return slice_wh, slice_wh
 
@@ -215,8 +215,7 @@ def _normalize_slice_wh(
             width, height = slice_wh
             if width <= 0 or height <= 0:
                 raise ValueError(
-                    "`slice_wh` values must be positive. "
-                    f"Received: {slice_wh}"
+                    f"`slice_wh` values must be positive. Received: {slice_wh}"
                 )
             return width, height
 
@@ -242,8 +241,7 @@ def _normalize_overlap_wh(
             overlap_w, overlap_h = overlap_wh
             if overlap_w < 0 or overlap_h < 0:
                 raise ValueError(
-                    "`overlap_wh` values must be non negative. "
-                    f"Received: {overlap_wh}"
+                    f"`overlap_wh` values must be non negative. Received: {overlap_wh}"
                 )
             return overlap_w, overlap_h
 

From a296712000f73ff63763cc2de28db623d67b7a74 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 00:00:52 +0100
Subject: [PATCH 103/124] improved `InferenceSlicer` docstring

---
 supervision/detection/tools/inference_slicer.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/supervision/detection/tools/inference_slicer.py b/supervision/detection/tools/inference_slicer.py
index 103b3fa466..4cc05f19cc 100644
--- a/supervision/detection/tools/inference_slicer.py
+++ b/supervision/detection/tools/inference_slicer.py
@@ -83,6 +83,8 @@ class InferenceSlicer:
         import supervision as sv
         from rfdetr import RFDETRMedium
 
+        model = RFDETRMedium()
+
         def callback(tile):
             return model.predict(tile)
 
@@ -97,6 +99,8 @@ def callback(tile):
         from PIL import Image
         from ultralytics import YOLO
 
+        model = YOLO("yolo11m.pt")
+
         def callback(tile):
             results = model(tile)[0]
             return sv.Detections.from_ultralytics(results)

From 80d76e712a372105becf2a493e617f06cd5c7069 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 02:10:04 +0100
Subject: [PATCH 104/124] style and docstring improvements

---
 supervision/__init__.py              |   2 +
 supervision/detection/utils/boxes.py |  51 ++++++++
 supervision/utils/video.py           | 186 +++++++++++++--------------
 3 files changed, 141 insertions(+), 98 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index 04d3fb2543..aa23963bbe 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -57,6 +57,7 @@
     move_boxes,
     pad_boxes,
     scale_boxes,
+    box_aspect_ratio
 )
 from supervision.detection.utils.converters import (
     mask_to_polygons,
@@ -201,6 +202,7 @@
     "box_iou_batch_with_jaccard",
     "box_non_max_merge",
     "box_non_max_suppression",
+    "box_aspect_ratio",
     "calculate_masks_centroids",
     "calculate_optimal_line_thickness",
     "calculate_optimal_text_scale",
diff --git a/supervision/detection/utils/boxes.py b/supervision/detection/utils/boxes.py
index 3b01fcb68b..904739899f 100644
--- a/supervision/detection/utils/boxes.py
+++ b/supervision/detection/utils/boxes.py
@@ -6,6 +6,57 @@
 from supervision.detection.utils.iou_and_nms import box_iou_batch
 
 
+def box_aspect_ratio(xyxy: np.ndarray) -> np.ndarray:
+    """
+    Calculate aspect ratios of bounding boxes given in xyxy format.
+
+    Computes the width divided by height for each bounding box. Returns NaN
+    for boxes with zero height to avoid division errors.
+
+    Args:
+        xyxy (`numpy.ndarray`): Array of bounding boxes in `(x_min, y_min, x_max, y_max)`
+            format with shape `(N, 4)`.
+
+    Returns:
+        `numpy.ndarray`: Array of aspect ratios with shape `(N,)`, where each element is
+            the width divided by height of a box. Elements are NaN if height is zero.
+
+    Examples:
+        ```python
+        import numpy as np
+        import supervision as sv
+
+        xyxy = np.array([
+            [10, 20, 30, 50],
+            [0, 0, 40, 10],
+        ])
+
+        sv.box_aspect_ratio(xyxy)
+        # array([0.66666667, 4.        ])
+
+        xyxy = np.array([
+            [10, 10, 30, 10],
+            [5, 5, 25, 25],
+        ])
+
+        sv.box_aspect_ratio(xyxy)
+        # array([       nan, 1.        ])
+        ```
+    """
+    widths = xyxy[:, 2] - xyxy[:, 0]
+    heights = xyxy[:, 3] - xyxy[:, 1]
+
+    aspect_ratios = np.full_like(widths, np.nan, dtype=np.float64)
+    np.divide(
+        widths,
+        heights,
+        out=aspect_ratios,
+        where=heights != 0,
+    )
+
+    return aspect_ratios
+
+
 def clip_boxes(xyxy: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
     """
     Clips bounding boxes coordinates to fit within the frame resolution.
diff --git a/supervision/utils/video.py b/supervision/utils/video.py
index 7227b92b0e..0ece0916da 100644
--- a/supervision/utils/video.py
+++ b/supervision/utils/video.py
@@ -206,128 +206,118 @@ def process_video(
     progress_message: str = "Processing video",
 ) -> None:
     """
-    Process a video using a threaded pipeline that asynchronously
-    reads frames, applies a callback to each, and writes the results
-    to an output file.
+    Process video frames asynchronously using a threaded pipeline.
+
+    This function orchestrates a three-stage pipeline to optimize video processing
+    throughput:
+
+    1. Reader thread: Continuously reads frames from the source video file and
+       enqueues them into a bounded queue (`frame_read_queue`). The queue size is
+       limited by the `prefetch` parameter to control memory usage.
+    2. Main thread (Processor): Dequeues frames from `frame_read_queue`, applies the
+       user-defined `callback` function to process each frame, then enqueues the
+       processed frames into another bounded queue (`frame_write_queue`) for writing.
+       The processing happens in the main thread, simplifying use of stateful objects
+       without synchronization.
+    3. Writer thread: Dequeues processed frames from `frame_write_queue` and writes
+       them sequentially to the output video file.
 
-    Overview:
-    This function implements a three-stage pipeline designed to maximize
-    frame throughput.
-
-        │   Reader   │ >> │  Processor   │ >> │   Writer   │
-           (thread)           (main)             (thread)
-
-    - Reader thread: reads frames from disk into a bounded queue ('read_q')
-      until full, then blocks. This ensures we never load more than 'prefetch'
-      frames into memory at once.
-
-    - Main thread: dequeues frames, applies the 'callback(frame, idx)',
-      and enqueues the processed result into 'write_q'.
-      This is the compute stage. It's important to note that it's not threaded,
-      so you can safely use any detectors, trackers, or other stateful objects
-      without synchronization issues.
-
-    - Writer thread: dequeues frames and writes them to disk.
-
-    Both queues are bounded to enforce back-pressure:
-      - The reader cannot outpace processing (avoids unbounded RAM usage).
-      - The processor cannot outpace writing (avoids output buffer bloat).
+    Args:
+        source_path (str): Path to the input video file.
+        target_path (str): Path where the processed video will be saved.
+        callback (Callable[[numpy.ndarray, int], numpy.ndarray]): Function called for
+            each frame, accepting the frame as a numpy array and its zero-based index,
+            returning the processed frame.
+        max_frames (int | None): Optional maximum number of frames to process.
+            If None, the entire video is processed (default).
+        prefetch (int): Maximum number of frames buffered by the reader thread.
+            Controls memory use; default is 32.
+        writer_buffer (int): Maximum number of frames buffered before writing.
+            Controls output buffer size; default is 32.
+        show_progress (bool): Whether to display a tqdm progress bar during processing.
+            Default is False.
+        progress_message (str): Description shown in the progress bar.
 
-    Summary:
-    - It's thread-safe: because the callback runs only in the main thread,
-    using a single stateful detector/tracker inside callback does not require
-    synchronization with the reader/writer threads.
+    Returns:
+        None
 
-    - While the main thread processes frame N, the reader is already decoding frame N+1,
-      and the writer is encoding frame N-1. They operate concurrently without blocking
-      each other.
+    Example:
+        ```python
+        import cv2
+        import supervision as sv
+        from rfdetr import RFDETRMedium
 
-    - When is it fastest?
-        - When there's heavy computation in the callback function that releases
-          the Python GIL (for example, OpenCV filters, resizes, color conversions, ...)
-        - When using CUDA or GPU-accelerated inference.
+        model = RFDETRMedium()
 
-    - When is it better not to use it?
-        - When the callback function is Python-heavy and GIL-bound. In that case,
-          using a process-based approach is more effective.
+        def callback(frame, frame_index):
+            return model.predict(frame)
 
-    Examples:
-        ```python
-        import supervision as sv
-        def callback(scene: np.ndarray, index: int) -> np.ndarray:
-            ...
         process_video(
-            source_path=<SOURCE_VIDEO_PATH>,
-            target_path=<TARGET_VIDEO_PATH>,
-            callback=callback
+            source_path="source.mp4",
+            target_path="target.mp4",
+            callback=frame_callback,
         )
         ```
-
-    Args:
-        source_path (str): The path to the source video file.
-        target_path (str): The path to the target video file.
-        callback (Callable[[np.ndarray, int], np.ndarray]): A function that takes in
-            a numpy ndarray representation of a video frame and an
-            int index of the frame and returns a processed numpy ndarray
-            representation of the frame.
-        max_frames (Optional[int]): The maximum number of frames to process.
-        prefetch (int): The maximum number of frames buffered by the reader thread.
-        writer_buffer (int): The maximum number of frames buffered before writing.
-        show_progress (bool): Whether to show a progress bar.
-        progress_message (str): The message to display in the progress bar.
     """
-
-    source_video_info = VideoInfo.from_video_path(video_path=source_path)
+    video_info = VideoInfo.from_video_path(video_path=source_path)
     total_frames = (
-        min(source_video_info.total_frames, max_frames)
+        min(video_info.total_frames, max_frames)
         if max_frames is not None
-        else source_video_info.total_frames
+        else video_info.total_frames
     )
 
-    # Each queue includes frames + sentinel
-    read_q: Queue[tuple[int, np.ndarray] | None] = Queue(maxsize=prefetch)
-    write_q: Queue[np.ndarray | None] = Queue(maxsize=writer_buffer)
+    frame_read_queue: Queue[tuple[int, np.ndarray] | None] = Queue(maxsize=prefetch)
+    frame_write_queue: Queue[np.ndarray | None] = Queue(maxsize=writer_buffer)
 
-    def reader_thread():
-        gen = get_video_frames_generator(source_path=source_path, end=max_frames)
-        for idx, frame in enumerate(gen):
-            read_q.put((idx, frame))
-        read_q.put(None)  # sentinel
+    def reader_thread() -> None:
+        frame_generator = get_video_frames_generator(
+            source_path=source_path,
+            end=max_frames,
+        )
+        for frame_index, frame in enumerate(frame_generator):
+            frame_read_queue.put((frame_index, frame))
+        frame_read_queue.put(None)
 
-    def writer_thread(video_sink: VideoSink):
+    def writer_thread(video_sink: VideoSink) -> None:
         while True:
-            frame = write_q.get()
+            frame = frame_write_queue.get()
             if frame is None:
                 break
             video_sink.write_frame(frame=frame)
 
-    # Heads up! We set 'daemon=True' so this thread won't block program exit
-    # if the main thread finishes first.
-    t_reader = threading.Thread(target=reader_thread, daemon=True)
-    with VideoSink(target_path=target_path, video_info=source_video_info) as sink:
-        t_writer = threading.Thread(target=writer_thread, args=(sink,), daemon=True)
-        t_reader.start()
-        t_writer.start()
+    reader_worker = threading.Thread(target=reader_thread, daemon=True)
+    with VideoSink(target_path=target_path, video_info=video_info) as video_sink:
+        writer_worker = threading.Thread(
+            target=writer_thread,
+            args=(video_sink,),
+            daemon=True,
+        )
+
+        reader_worker.start()
+        writer_worker.start()
 
-        process_bar = tqdm(
-            total=total_frames, disable=not show_progress, desc=progress_message
+        progress_bar = tqdm(
+            total=total_frames,
+            disable=not show_progress,
+            desc=progress_message,
         )
 
-        # Main thread: we take a frame, apply function and update process bar.
-        while True:
-            item = read_q.get()
-            if item is None:
-                break
-            idx, frame = item
-            out = callback(frame, idx)
-            write_q.put(out)
-            if total_frames is not None:
-                process_bar.update(1)
-
-        write_q.put(None)
-        t_reader.join()
-        t_writer.join()
-        process_bar.close()
+        try:
+            while True:
+                read_item = frame_read_queue.get()
+                if read_item is None:
+                    break
+
+                frame_index, frame = read_item
+                processed_frame = callback(frame, frame_index)
+
+                frame_write_queue.put(processed_frame)
+                progress_bar.update(1)
+        finally:
+            frame_write_queue.put(None)
+            reader_worker.join()
+            writer_worker.join()
+            progress_bar.close()
 
 
 class FPSMonitor:

From 38f89125b1d6d4b5d7791c01dd0ecdc7f8984681 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 01:10:43 +0000
Subject: [PATCH 105/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index aa23963bbe..43d63cad59 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -52,12 +52,12 @@
 from supervision.detection.tools.polygon_zone import PolygonZone, PolygonZoneAnnotator
 from supervision.detection.tools.smoother import DetectionsSmoother
 from supervision.detection.utils.boxes import (
+    box_aspect_ratio,
     clip_boxes,
     denormalize_boxes,
     move_boxes,
     pad_boxes,
     scale_boxes,
-    box_aspect_ratio
 )
 from supervision.detection.utils.converters import (
     mask_to_polygons,
@@ -197,12 +197,12 @@
     "VideoInfo",
     "VideoSink",
     "approximate_polygon",
+    "box_aspect_ratio",
     "box_iou",
     "box_iou_batch",
     "box_iou_batch_with_jaccard",
     "box_non_max_merge",
     "box_non_max_suppression",
-    "box_aspect_ratio",
     "calculate_masks_centroids",
     "calculate_optimal_line_thickness",
     "calculate_optimal_text_scale",

From 70e9af9200ae73cc2be5756f09b16d29f6f821f1 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 02:30:55 +0100
Subject: [PATCH 106/124] make `ruff` happy

---
 supervision/detection/utils/boxes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/utils/boxes.py b/supervision/detection/utils/boxes.py
index 01fcdc7691..52e4b69569 100644
--- a/supervision/detection/utils/boxes.py
+++ b/supervision/detection/utils/boxes.py
@@ -14,8 +14,8 @@ def clip_boxes(xyxy: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
         xyxy (np.ndarray): A numpy array of shape `(N, 4)` where each
             row corresponds to a bounding box in
             the format `(x_min, y_min, x_max, y_max)`.
-        resolution_wh (Tuple[int, int]): A tuple of the form `(width, height)`
-            representing the resolution of the frame.
+        resolution_wh (Tuple[int, int]): A tuple of the form
+            `(width, height)` representing the resolution of the frame.
 
     Returns:
         np.ndarray: A numpy array of shape `(N, 4)` where each row

From 316cb6271d53028758b3bf5b92b8f1b59f2bafbf Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 02:33:11 +0100
Subject: [PATCH 107/124] make `ruff` happy

---
 supervision/detection/utils/boxes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/utils/boxes.py b/supervision/detection/utils/boxes.py
index 904739899f..1b1d9d2d32 100644
--- a/supervision/detection/utils/boxes.py
+++ b/supervision/detection/utils/boxes.py
@@ -14,8 +14,8 @@ def box_aspect_ratio(xyxy: np.ndarray) -> np.ndarray:
     for boxes with zero height to avoid division errors.
 
     Args:
-        xyxy (`numpy.ndarray`): Array of bounding boxes in `(x_min, y_min, x_max, y_max)`
-            format with shape `(N, 4)`.
+        xyxy (`numpy.ndarray`): Array of bounding boxes in
+            `(x_min, y_min, x_max, y_max)` format with shape `(N, 4)`.
 
     Returns:
         `numpy.ndarray`: Array of aspect ratios with shape `(N,)`, where each element is

From 2edcbdc4872f83abcb7992f350afe338c9325958 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 21:27:55 +0100
Subject: [PATCH 108/124] initial commit with `from_qwen_3_vl` added,
 `from_qwen_2_5_vl` improved

---
 supervision/detection/core.py |   1 +
 supervision/detection/vlm.py  | 137 ++++++++++++++++++++++++++++------
 test/detection/test_vlm.py    |  45 ++++++++---
 3 files changed, 151 insertions(+), 32 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index bda2e7de33..66610b9982 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1211,6 +1211,7 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
         | PaliGemma           | `PALIGEMMA`          | detection               | `resolution_wh`             | `classes`           |
         | PaliGemma 2         | `PALIGEMMA`          | detection               | `resolution_wh`             | `classes`           |
         | Qwen2.5-VL          | `QWEN_2_5_VL`        | detection               | `resolution_wh`, `input_wh` | `classes`           |
+        | Qwen3-VL            | `QWEN_3_VL`          | detection               | `resolution_wh`,            | `classes`           |
         | Google Gemini 2.0   | `GOOGLE_GEMINI_2_0`  | detection               | `resolution_wh`             | `classes`           |
         | Google Gemini 2.5   | `GOOGLE_GEMINI_2_5`  | detection, segmentation | `resolution_wh`             | `classes`           |
         | Moondream           | `MOONDREAM`          | detection               | `resolution_wh`             |                     |
diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 2f9b60ddb5..9892e40de1 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import ast
 import base64
 import io
 import json
@@ -27,7 +28,8 @@ class LMM(Enum):
     Attributes:
         PALIGEMMA: Google's PaliGemma vision-language model.
         FLORENCE_2: Microsoft's Florence-2 vision-language model.
-        QWEN_2_5_VL: Qwen2.5-VL open vision-language model from Alibaba.
+        QWEN_2_5_VL: Qwen2.5-VL open vision-language model from Alibaba.\
+        QWEN_3_VL: Qwen3-VL open vision-language model from Alibaba.
         GOOGLE_GEMINI_2_0: Google Gemini 2.0 vision-language model.
         GOOGLE_GEMINI_2_5: Google Gemini 2.5 vision-language model.
         MOONDREAM: The Moondream vision-language model.
@@ -36,6 +38,7 @@ class LMM(Enum):
     PALIGEMMA = "paligemma"
     FLORENCE_2 = "florence_2"
     QWEN_2_5_VL = "qwen_2_5_vl"
+    QWEN_3_VL = "qwen_3_vl"
     DEEPSEEK_VL_2 = "deepseek_vl_2"
     GOOGLE_GEMINI_2_0 = "gemini_2_0"
     GOOGLE_GEMINI_2_5 = "gemini_2_5"
@@ -69,6 +72,7 @@ class VLM(Enum):
         PALIGEMMA: Google's PaliGemma vision-language model.
         FLORENCE_2: Microsoft's Florence-2 vision-language model.
         QWEN_2_5_VL: Qwen2.5-VL open vision-language model from Alibaba.
+        QWEN_3_VL: Qwen3-VL open vision-language model from Alibaba.
         GOOGLE_GEMINI_2_0: Google Gemini 2.0 vision-language model.
         GOOGLE_GEMINI_2_5: Google Gemini 2.5 vision-language model.
         MOONDREAM: The Moondream vision-language model.
@@ -77,6 +81,7 @@ class VLM(Enum):
     PALIGEMMA = "paligemma"
     FLORENCE_2 = "florence_2"
     QWEN_2_5_VL = "qwen_2_5_vl"
+    QWEN_3_VL = "qwen_3_vl"
     DEEPSEEK_VL_2 = "deepseek_vl_2"
     GOOGLE_GEMINI_2_0 = "gemini_2_0"
     GOOGLE_GEMINI_2_5 = "gemini_2_5"
@@ -106,6 +111,7 @@ def from_value(cls, value: VLM | str) -> VLM:
     VLM.PALIGEMMA: str,
     VLM.FLORENCE_2: dict,
     VLM.QWEN_2_5_VL: str,
+    VLM.QWEN_3_VL: str,
     VLM.DEEPSEEK_VL_2: str,
     VLM.GOOGLE_GEMINI_2_0: str,
     VLM.GOOGLE_GEMINI_2_5: str,
@@ -116,6 +122,7 @@ def from_value(cls, value: VLM | str) -> VLM:
     VLM.PALIGEMMA: ["resolution_wh"],
     VLM.FLORENCE_2: ["resolution_wh"],
     VLM.QWEN_2_5_VL: ["input_wh", "resolution_wh"],
+    VLM.QWEN_3_VL: ["resolution_wh"],
     VLM.DEEPSEEK_VL_2: ["resolution_wh"],
     VLM.GOOGLE_GEMINI_2_0: ["resolution_wh"],
     VLM.GOOGLE_GEMINI_2_5: ["resolution_wh"],
@@ -126,6 +133,7 @@ def from_value(cls, value: VLM | str) -> VLM:
     VLM.PALIGEMMA: ["resolution_wh", "classes"],
     VLM.FLORENCE_2: ["resolution_wh"],
     VLM.QWEN_2_5_VL: ["input_wh", "resolution_wh", "classes"],
+    VLM.QWEN_3_VL: ["resolution_wh", "classes"],
     VLM.DEEPSEEK_VL_2: ["resolution_wh", "classes"],
     VLM.GOOGLE_GEMINI_2_0: ["resolution_wh", "classes"],
     VLM.GOOGLE_GEMINI_2_5: ["resolution_wh", "classes"],
@@ -235,6 +243,51 @@ def from_paligemma(
     return xyxy, class_id, class_name
 
 
+def recover_truncated_qwen_2_5_vl_response(text: str) -> Any | None:
+    """
+    Attempt to recover and parse a truncated or malformed JSON snippet from Qwen-2.5-VL
+    output.
+
+    This utility extracts a JSON-like portion from a string that may be truncated or
+    malformed, cleans trailing commas, and attempts to parse it into a Python object.
+
+    Args:
+        text (str): Raw text containing the JSON snippet possibly truncated or
+            incomplete.
+
+    Returns:
+        Parsed Python object (usually list) if recovery and parsing succeed;
+            otherwise `None`.
+    """
+    try:
+        first_bracket = text.find("[")
+        if first_bracket == -1:
+            return None
+        snippet = text[first_bracket:]
+
+        last_brace = snippet.rfind("}")
+        if last_brace == -1:
+            return None
+
+        snippet = snippet[: last_brace + 1]
+
+        prefix_end = snippet.find("[")
+        if prefix_end == -1:
+            return None
+
+        prefix = snippet[: prefix_end + 1]
+        body = snippet[prefix_end + 1 :].rstrip()
+
+        if body.endswith(","):
+            body = body[:-1].rstrip()
+
+        repaired = prefix + body + "]"
+
+        return json.loads(repaired)
+    except Exception:
+        return None
+
+
 def from_qwen_2_5_vl(
     result: str,
     input_wh: tuple[int, int],
@@ -242,7 +295,7 @@ def from_qwen_2_5_vl(
     classes: list[str] | None = None,
 ) -> tuple[np.ndarray, np.ndarray | None, np.ndarray]:
     """
-    Parse and scale bounding boxes from Qwen-2.5-VL style JSON output.
+    Parse and rescale bounding boxes and class labels from Qwen-2.5-VL JSON output.
 
     The JSON is expected to be enclosed in triple backticks with the format:
       ```json
@@ -253,37 +306,47 @@ def from_qwen_2_5_vl(
       ```
 
     Args:
-        result: String containing the JSON snippet enclosed by triple backticks.
-        input_wh: (input_width, input_height) describing the original bounding box
-            scale.
-        resolution_wh: (output_width, output_height) to which we rescale the boxes.
-        classes: Optional list of valid class names. If provided, returned boxes/labels
-            are filtered to only those classes found here.
+        result (str): String containing Qwen-2.5-VL JSON bounding box and label data.
+        input_wh (tuple[int, int]): Width and height of the coordinate space where boxes
+            are normalized.
+        resolution_wh (tuple[int, int]): Target width and height to scale bounding
+            boxes.
+        classes (list[str] or None): Optional list of valid class names to filter
+            results. If provided, only boxes with labels in this list are returned.
 
     Returns:
-        xyxy (np.ndarray): An array of shape `(n, 4)` containing
-            the bounding boxes coordinates in format `[x1, y1, x2, y2]`
-        class_id (Optional[np.ndarray]): An array of shape `(n,)` containing
-            the class indices for each bounding box (or None if `classes` is not
-            provided)
-        class_name (np.ndarray): An array of shape `(n,)` containing
-            the class labels for each bounding box
+        xyxy (np.ndarray): Array of shape `(N, 4)` with rescaled bounding boxes in
+            `(x_min, y_min, x_max, y_max)` format.
+        class_id (np.ndarray or None): Array of shape `(N,)` with indices of classes,
+            or `None` if no filtering applied.
+        class_name (np.ndarray): Array of shape `(N,)` with class names as strings.
     """
 
     in_w, in_h = validate_resolution(input_wh)
     out_w, out_h = validate_resolution(resolution_wh)
 
-    pattern = re.compile(r"```json\s*(.*?)\s*```", re.DOTALL)
-
-    match = pattern.search(result)
-    if not match:
-        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+    text = result.strip()
+    text = re.sub(r"^```(json)?", "", text, flags=re.IGNORECASE).strip()
+    text = re.sub(r"```$", "", text).strip()
 
-    json_snippet = match.group(1)
+    start = text.find("[")
+    end = text.rfind("]")
+    if start != -1 and end != -1 and end > start:
+        text = text[start: end + 1].strip()
 
     try:
-        data = json.loads(json_snippet)
+        data = json.loads(text)
     except json.JSONDecodeError:
+        repaired = recover_truncated_qwen_2_5_vl_response(text)
+        if repaired is not None:
+            data = repaired
+        else:
+            try:
+                data = ast.literal_eval(text)
+            except (ValueError, SyntaxError, TypeError):
+                return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+
+    if not isinstance(data, list):
         return np.empty((0, 4)), None, np.empty((0,), dtype=str)
 
     boxes_list = []
@@ -315,6 +378,36 @@ def from_qwen_2_5_vl(
     return xyxy, class_id, class_name
 
 
+def from_qwen_3_vl(
+    result: str,
+    resolution_wh: tuple[int, int],
+    classes: list[str] | None = None,
+) -> tuple[np.ndarray, np.ndarray | None, np.ndarray]:
+    """
+    Parse and scale bounding boxes from Qwen-3-VL style JSON output.
+
+    Args:
+        result (str): String containing the Qwen-3-VL JSON output.
+        resolution_wh (tuple[int, int]): Target resolution `(width, height)` to
+            scale bounding boxes.
+        classes (list[str] or None): Optional list of valid classes to filter
+            results.
+
+    Returns:
+        xyxy (np.ndarray): Array of bounding boxes with shape `(N, 4)` in
+            `(x_min, y_min, x_max, y_max)` format scaled to `resolution_wh`.
+        class_id (np.ndarray or None): Array of class indices for each box, or
+            None if no filtering by classes.
+        class_name (np.ndarray): Array of class names as strings.
+    """
+    return from_qwen_2_5_vl(
+        result=result,
+        input_wh=(1000, 1000),
+        resolution_wh=resolution_wh,
+        classes=classes
+    )
+
+
 def from_deepseek_vl_2(
     result: str, resolution_wh: tuple[int, int], classes: list[str] | None = None
 ) -> tuple[np.ndarray, np.ndarray | None, np.ndarray]:
diff --git a/test/detection/test_vlm.py b/test/detection/test_vlm.py
index 8a8240e98f..e93b66f3d9 100644
--- a/test/detection/test_vlm.py
+++ b/test/detection/test_vlm.py
@@ -320,18 +320,43 @@ def test_from_paligemma(
                 np.array(["dog"], dtype=str),
             ),
         ),  # out-of-bounds box
+(
+            does_not_raise(),
+            """[
+                {'bbox_2d': [10, 20, 110, 120], 'label': 'cat'}
+            ]""",
+            (640, 640),
+            (1280, 720),
+            None,
+            (
+                np.array([[20.0, 22.5, 220.0, 135.0]]),
+                None,
+                np.array(["cat"], dtype=str),
+            ),
+        ),  # python-style list, single quotes, no fences
         (
-            pytest.raises(ValueError),
+            does_not_raise(),
             """```json
             [
-                {"bbox_2d": [10, 20, 110, 120], "label": "cat"}
-            ]
-            ```""",
-            (0, 640),
-            (1280, 720),
+                {"bbox_2d": [0, 0, 64, 64], "label": "dog"},
+                {"bbox_2d": [10, 20, 110, 120], "label": "cat"},
+                {"bbox_2d": [30, 40, 130, 140], "label":
+            """,
+            (640, 640),
+            (640, 640),
             None,
-            None,  # won't be compared because we expect an exception
-        ),  # zero input width -> ValueError
+            (
+                    np.array(
+                        [
+                            [0.0, 0.0, 64.0, 64.0],
+                            [10.0, 20.0, 110.0, 120.0],
+                        ],
+                        dtype=float,
+                    ),
+                    None,
+                    np.array(["dog", "cat"], dtype=str),
+            ),
+        ),  # truncated response, last object unfinished, previous ones recovered
         (
             pytest.raises(ValueError),
             """```json
@@ -342,8 +367,8 @@ def test_from_paligemma(
             (640, 640),
             (1280, -100),
             None,
-            None,
-        ),  # negative resolution height -> ValueError
+            None,  # invalid resolution_wh
+        ),
     ],
 )
 def test_from_qwen_2_5_vl(

From e68c908077bb8bf901478bc087458fd218abb69f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 20:29:32 +0000
Subject: [PATCH 109/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/vlm.py |  4 ++--
 test/detection/test_vlm.py   | 20 ++++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 9892e40de1..371827668d 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -332,7 +332,7 @@ def from_qwen_2_5_vl(
     start = text.find("[")
     end = text.rfind("]")
     if start != -1 and end != -1 and end > start:
-        text = text[start: end + 1].strip()
+        text = text[start : end + 1].strip()
 
     try:
         data = json.loads(text)
@@ -404,7 +404,7 @@ def from_qwen_3_vl(
         result=result,
         input_wh=(1000, 1000),
         resolution_wh=resolution_wh,
-        classes=classes
+        classes=classes,
     )
 
 
diff --git a/test/detection/test_vlm.py b/test/detection/test_vlm.py
index e93b66f3d9..7bfc23131a 100644
--- a/test/detection/test_vlm.py
+++ b/test/detection/test_vlm.py
@@ -320,7 +320,7 @@ def test_from_paligemma(
                 np.array(["dog"], dtype=str),
             ),
         ),  # out-of-bounds box
-(
+        (
             does_not_raise(),
             """[
                 {'bbox_2d': [10, 20, 110, 120], 'label': 'cat'}
@@ -346,15 +346,15 @@ def test_from_paligemma(
             (640, 640),
             None,
             (
-                    np.array(
-                        [
-                            [0.0, 0.0, 64.0, 64.0],
-                            [10.0, 20.0, 110.0, 120.0],
-                        ],
-                        dtype=float,
-                    ),
-                    None,
-                    np.array(["dog", "cat"], dtype=str),
+                np.array(
+                    [
+                        [0.0, 0.0, 64.0, 64.0],
+                        [10.0, 20.0, 110.0, 120.0],
+                    ],
+                    dtype=float,
+                ),
+                None,
+                np.array(["dog", "cat"], dtype=str),
             ),
         ),  # truncated response, last object unfinished, previous ones recovered
         (

From 7301156e4613039415e99a5415dbffe842a20441 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 21:36:54 +0100
Subject: [PATCH 110/124] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/core.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 66610b9982..903134f82a 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1559,6 +1559,11 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             data = {CLASS_NAME_DATA_FIELD: class_name}
             return cls(xyxy=xyxy, class_id=class_id, data=data)
 
+        if vlm == VLM.QWEN_3_VL:
+            xyxy, class_id, class_name = from_qwen_2_5_vl(result, **kwargs)
+            data = {CLASS_NAME_DATA_FIELD: class_name}
+            return cls(xyxy=xyxy, class_id=class_id, data=data)
+
         if vlm == VLM.DEEPSEEK_VL_2:
             xyxy, class_id, class_name = from_deepseek_vl_2(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}

From 9ab9650763205bac575bee3ecdfe3d4ff5231a11 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 21:39:23 +0100
Subject: [PATCH 111/124] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 903134f82a..464855a3d5 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -47,7 +47,7 @@
     from_moondream,
     from_paligemma,
     from_qwen_2_5_vl,
-    validate_vlm_parameters,
+    validate_vlm_parameters, from_qwen_3_vl,
 )
 from supervision.geometry.core import Position
 from supervision.utils.internal import deprecated, get_instance_variables
@@ -1560,7 +1560,7 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             return cls(xyxy=xyxy, class_id=class_id, data=data)
 
         if vlm == VLM.QWEN_3_VL:
-            xyxy, class_id, class_name = from_qwen_2_5_vl(result, **kwargs)
+            xyxy, class_id, class_name = from_qwen_3_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
             return cls(xyxy=xyxy, class_id=class_id, data=data)
 

From 31db2ded112634d4661099f8da707a06f064c4c8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 20:40:01 +0000
Subject: [PATCH 112/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 464855a3d5..1fe602ea19 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -47,7 +47,8 @@
     from_moondream,
     from_paligemma,
     from_qwen_2_5_vl,
-    validate_vlm_parameters, from_qwen_3_vl,
+    from_qwen_3_vl,
+    validate_vlm_parameters,
 )
 from supervision.geometry.core import Position
 from supervision.utils.internal import deprecated, get_instance_variables

From 0422a66eaa0c81c73eb766058b0230041ae42fc1 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:05:46 +0100
Subject: [PATCH 113/124] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/core.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 464855a3d5..2f350be1fd 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1557,12 +1557,14 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
         if vlm == VLM.QWEN_2_5_VL:
             xyxy, class_id, class_name = from_qwen_2_5_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
-            return cls(xyxy=xyxy, class_id=class_id, data=data)
+            confidence = np.ones(len(class_id), dtype=float)
+            return cls(xyxy=xyxy, class_id=class_id, confidence=confidence, data=data)
 
         if vlm == VLM.QWEN_3_VL:
             xyxy, class_id, class_name = from_qwen_3_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
-            return cls(xyxy=xyxy, class_id=class_id, data=data)
+            confidence = np.ones(len(class_id), dtype=float)
+            return cls(xyxy=xyxy, class_id=class_id, confidence=confidence, data=data)
 
         if vlm == VLM.DEEPSEEK_VL_2:
             xyxy, class_id, class_name = from_deepseek_vl_2(result, **kwargs)

From e29e3832fb3b18ce38190bc3a34466a4a66afb59 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:19:28 +0100
Subject: [PATCH 114/124] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/vlm.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 371827668d..388820f5b7 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -344,10 +344,19 @@ def from_qwen_2_5_vl(
             try:
                 data = ast.literal_eval(text)
             except (ValueError, SyntaxError, TypeError):
-                return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+                return (
+                    np.empty((0, 4)),
+                    np.empty((0,), dtype=int),
+                    np.empty((0,), dtype=str)
+                )
 
     if not isinstance(data, list):
-        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+        return (
+            np.empty((0, 4)),
+            np.empty((0,), dtype=int),
+            np.empty((0,), dtype=str)
+        )
+
 
     boxes_list = []
     labels_list = []
@@ -359,7 +368,12 @@ def from_qwen_2_5_vl(
         labels_list.append(item["label"])
 
     if not boxes_list:
-        return np.empty((0, 4)), None, np.empty((0,), dtype=str)
+        return (
+            np.empty((0, 4)),
+            np.empty((0,), dtype=int),
+            np.empty((0,), dtype=str)
+        )
+
 
     xyxy = np.array(boxes_list, dtype=float)
     class_name = np.array(labels_list, dtype=str)

From 9bd8f71f26dfba13f80f32b10b4ae40da728aef5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 21:19:51 +0000
Subject: [PATCH 115/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/vlm.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/supervision/detection/vlm.py b/supervision/detection/vlm.py
index 388820f5b7..97988c9f09 100644
--- a/supervision/detection/vlm.py
+++ b/supervision/detection/vlm.py
@@ -347,16 +347,11 @@ def from_qwen_2_5_vl(
                 return (
                     np.empty((0, 4)),
                     np.empty((0,), dtype=int),
-                    np.empty((0,), dtype=str)
+                    np.empty((0,), dtype=str),
                 )
 
     if not isinstance(data, list):
-        return (
-            np.empty((0, 4)),
-            np.empty((0,), dtype=int),
-            np.empty((0,), dtype=str)
-        )
-
+        return (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty((0,), dtype=str))
 
     boxes_list = []
     labels_list = []
@@ -368,12 +363,7 @@ def from_qwen_2_5_vl(
         labels_list.append(item["label"])
 
     if not boxes_list:
-        return (
-            np.empty((0, 4)),
-            np.empty((0,), dtype=int),
-            np.empty((0,), dtype=str)
-        )
-
+        return (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty((0,), dtype=str))
 
     xyxy = np.array(boxes_list, dtype=float)
     class_name = np.array(labels_list, dtype=str)

From c35a35cefdf8b586bbe325d16d63403ebbb0d6f2 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:27:11 +0100
Subject: [PATCH 116/124] plug Qwen3-VL into `sv.Detections.from_vlm`

---
 supervision/detection/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index e67f906f58..ef88fbfec3 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -1558,13 +1558,13 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
         if vlm == VLM.QWEN_2_5_VL:
             xyxy, class_id, class_name = from_qwen_2_5_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
-            confidence = np.ones(len(class_id), dtype=float)
+            confidence = np.ones(len(xyxy), dtype=float)
             return cls(xyxy=xyxy, class_id=class_id, confidence=confidence, data=data)
 
         if vlm == VLM.QWEN_3_VL:
             xyxy, class_id, class_name = from_qwen_3_vl(result, **kwargs)
             data = {CLASS_NAME_DATA_FIELD: class_name}
-            confidence = np.ones(len(class_id), dtype=float)
+            confidence = np.ones(len(xyxy), dtype=float)
             return cls(xyxy=xyxy, class_id=class_id, confidence=confidence, data=data)
 
         if vlm == VLM.DEEPSEEK_VL_2:

From 5af13c0f8c71442acb90a053b65249810ca1de4c Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:48:20 +0100
Subject: [PATCH 117/124] add Qwen3-VL prompting example

---
 supervision/detection/core.py | 60 +++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index ef88fbfec3..153dbe47a2 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -951,6 +951,36 @@ def from_lmm(cls, lmm: LMM | str, result: str | dict, **kwargs: Any) -> Detectio
             detections.class_id
             # array([0, 1])
             ```
+            
+        !!! example "Qwen3-VL"
+
+            ```python
+            import supervision as sv
+
+            qwen_3_vl_result = \"\"\"```json
+            [
+                {"bbox_2d": [139, 768, 315, 954], "label": "cat"},
+                {"bbox_2d": [366, 679, 536, 849], "label": "dog"}
+            ]
+            ```\"\"\"
+            detections = sv.Detections.from_lmm(
+                sv.LMM.QWEN_3_VL,
+                qwen_3_vl_result,
+                resolution_wh=(1000, 1000),
+                classes=['cat', 'dog'],
+            )
+            detections.xyxy
+            # array([[139., 768., 315., 954.], [366., 679., 536., 849.]])
+
+            detections.class_id
+            # array([0, 1])
+
+            detections.data
+            # {'class_name': array(['cat', 'dog'], dtype='<U10')}
+
+            detections.class_id
+            # array([0, 1])
+            ```
 
         !!! example "Gemini 2.0"
             ```python
@@ -1329,6 +1359,36 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             detections.class_id
             # array([0, 1])
             ```
+            
+        !!! example "Qwen3-VL"
+
+            ```python
+            import supervision as sv
+
+            qwen_3_vl_result = \"\"\"```json
+            [
+                {"bbox_2d": [139, 768, 315, 954], "label": "cat"},
+                {"bbox_2d": [366, 679, 536, 849], "label": "dog"}
+            ]
+            ```\"\"\"
+            detections = sv.Detections.from_vlm(
+                sv.VLM.QWEN_3_VL,
+                qwen_3_vl_result,
+                resolution_wh=(1000, 1000),
+                classes=['cat', 'dog'],
+            )
+            detections.xyxy
+            # array([[139., 768., 315., 954.], [366., 679., 536., 849.]])
+
+            detections.class_id
+            # array([0, 1])
+
+            detections.data
+            # {'class_name': array(['cat', 'dog'], dtype='<U10')}
+
+            detections.class_id
+            # array([0, 1])
+            ```
 
         !!! example "Gemini 2.0"
             ```python

From 52c772d77891410e332a7157c986009b11a20bc7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 15 Nov 2025 21:48:42 +0000
Subject: [PATCH 118/124] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20aut?=
 =?UTF-8?q?o=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 supervision/detection/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index 153dbe47a2..e5e298bbb4 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -951,7 +951,7 @@ def from_lmm(cls, lmm: LMM | str, result: str | dict, **kwargs: Any) -> Detectio
             detections.class_id
             # array([0, 1])
             ```
-            
+
         !!! example "Qwen3-VL"
 
             ```python
@@ -1359,7 +1359,7 @@ def from_vlm(cls, vlm: VLM | str, result: str | dict, **kwargs: Any) -> Detectio
             detections.class_id
             # array([0, 1])
             ```
-            
+
         !!! example "Qwen3-VL"
 
             ```python

From a99015ac2da717477346fd825ba11cbed934b11f Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sat, 15 Nov 2025 22:51:22 +0100
Subject: [PATCH 119/124] more Qwen2.5-VL tests

---
 test/detection/test_vlm.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test/detection/test_vlm.py b/test/detection/test_vlm.py
index 7bfc23131a..9a0195f780 100644
--- a/test/detection/test_vlm.py
+++ b/test/detection/test_vlm.py
@@ -357,6 +357,18 @@ def test_from_paligemma(
                 np.array(["dog", "cat"], dtype=str),
             ),
         ),  # truncated response, last object unfinished, previous ones recovered
+        (
+            pytest.raises(ValueError),
+            """```json
+            [
+                {"bbox_2d": [10, 20, 110, 120], "label": "cat"}
+            ]
+            ```""",
+            (0, 640),
+            (1280, 720),
+            None,
+            None,  # invalid input_wh
+        ),
         (
             pytest.raises(ValueError),
             """```json

From e12bfe263d89c59d41be535191e1f964fd5a85bc Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sun, 16 Nov 2025 13:27:29 +0100
Subject: [PATCH 120/124] add `box_aspect_ratio` property to `sv.Detections`

---
 supervision/__init__.py              |  2 --
 supervision/detection/core.py        | 37 ++++++++++++++++++++
 supervision/detection/utils/boxes.py | 51 ----------------------------
 3 files changed, 37 insertions(+), 53 deletions(-)

diff --git a/supervision/__init__.py b/supervision/__init__.py
index b74d370546..00820076fe 100644
--- a/supervision/__init__.py
+++ b/supervision/__init__.py
@@ -52,7 +52,6 @@
 from supervision.detection.tools.polygon_zone import PolygonZone, PolygonZoneAnnotator
 from supervision.detection.tools.smoother import DetectionsSmoother
 from supervision.detection.utils.boxes import (
-    box_aspect_ratio,
     clip_boxes,
     denormalize_boxes,
     move_boxes,
@@ -200,7 +199,6 @@
     "VideoInfo",
     "VideoSink",
     "approximate_polygon",
-    "box_aspect_ratio",
     "box_iou",
     "box_iou_batch",
     "box_iou_batch_with_jaccard",
diff --git a/supervision/detection/core.py b/supervision/detection/core.py
index e5e298bbb4..6ca6abc567 100644
--- a/supervision/detection/core.py
+++ b/supervision/detection/core.py
@@ -2088,6 +2088,43 @@ def box_area(self) -> np.ndarray:
         """
         return (self.xyxy[:, 3] - self.xyxy[:, 1]) * (self.xyxy[:, 2] - self.xyxy[:, 0])
 
+    @property
+    def box_aspect_ratio(self) -> np.ndarray:
+        """
+        Compute the aspect ratio (width divided by height) for each bounding box.
+
+        Returns:
+            np.ndarray: Array of shape `(N,)` containing aspect ratios, where `N` is the
+            number of boxes (width / height for each box).
+
+        Examples:
+            ```python
+            import numpy as np
+            import supervision as sv
+
+            xyxy = np.array([
+                [10, 10, 50, 50],
+                [60, 10, 180, 50],
+                [10, 60, 50, 180],
+            ])
+
+            detections = sv.Detections(xyxy=xyxy)
+
+            detections.box_aspect_ratio
+            # array([1.0, 3.0, 0.33333333])
+
+            ar = detections.box_aspect_ratio
+            detections[(ar < 2.0) & (ar > 0.5)].xyxy
+            # array([[10., 10., 50., 50.]])
+            ```
+        """
+        widths = self.xyxy[:, 2] - self.xyxy[:, 0]
+        heights = self.xyxy[:, 3] - self.xyxy[:, 1]
+
+        aspect_ratios = np.full_like(widths, np.nan, dtype=np.float64)
+        np.divide(widths, heights, out=aspect_ratios, where=heights != 0)
+        return aspect_ratios
+
     def with_nms(
         self,
         threshold: float = 0.5,
diff --git a/supervision/detection/utils/boxes.py b/supervision/detection/utils/boxes.py
index c7f81b3e9e..52e4b69569 100644
--- a/supervision/detection/utils/boxes.py
+++ b/supervision/detection/utils/boxes.py
@@ -6,57 +6,6 @@
 from supervision.detection.utils.iou_and_nms import box_iou_batch
 
 
-def box_aspect_ratio(xyxy: np.ndarray) -> np.ndarray:
-    """
-    Calculate aspect ratios of bounding boxes given in xyxy format.
-
-    Computes the width divided by height for each bounding box. Returns NaN
-    for boxes with zero height to avoid division errors.
-
-    Args:
-        xyxy (`numpy.ndarray`): Array of bounding boxes in
-            `(x_min, y_min, x_max, y_max)` format with shape `(N, 4)`.
-
-    Returns:
-        `numpy.ndarray`: Array of aspect ratios with shape `(N,)`, where each element is
-            the width divided by height of a box. Elements are NaN if height is zero.
-
-    Examples:
-        ```python
-        import numpy as np
-        import supervision as sv
-
-        xyxy = np.array([
-            [10, 20, 30, 50],
-            [0, 0, 40, 10],
-        ])
-
-        sv.box_aspect_ratio(xyxy)
-        # array([0.66666667, 4.        ])
-
-        xyxy = np.array([
-            [10, 10, 30, 10],
-            [5, 5, 25, 25],
-        ])
-
-        sv.box_aspect_ratio(xyxy)
-        # array([       nan, 1.        ])
-        ```
-    """
-    widths = xyxy[:, 2] - xyxy[:, 0]
-    heights = xyxy[:, 3] - xyxy[:, 1]
-
-    aspect_ratios = np.full_like(widths, np.nan, dtype=np.float64)
-    np.divide(
-        widths,
-        heights,
-        out=aspect_ratios,
-        where=heights != 0,
-    )
-
-    return aspect_ratios
-
-
 def clip_boxes(xyxy: np.ndarray, resolution_wh: tuple[int, int]) -> np.ndarray:
     """
     Clips bounding boxes coordinates to fit within the frame resolution.

From bbc37b4b6a55d18bca94978f94f49602f53cae87 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sun, 16 Nov 2025 13:34:51 +0100
Subject: [PATCH 121/124] fix failing tests

---
 test/utils/test_internal.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/utils/test_internal.py b/test/utils/test_internal.py
index 07674f39cb..749d4be3a8 100644
--- a/test/utils/test_internal.py
+++ b/test/utils/test_internal.py
@@ -145,6 +145,7 @@ def __private_property(self):
                 "metadata",
                 "area",
                 "box_area",
+                "box_aspect_ratio",
             },
             DoesNotRaise(),
         ),

From 47574ed29d1b7f102bca3b8d21f9cf4869af6b1f Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sun, 16 Nov 2025 16:15:10 +0100
Subject: [PATCH 122/124] supervision 0.27.0 changelog

---
 docs/changelog.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/docs/changelog.md b/docs/changelog.md
index 4ebec50021..1974680c66 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,73 @@
 # Changelog
 
+### 0.27.0 <small>Nov 16, 2025</small>
+
+- Added [#2008](https://github.com/roboflow/supervision/pull/2008): [`sv.filter_segments_by_distance`](https://supervision.roboflow.com/0.27.0/detection/utils/masks/#supervision.detection.utils.masks.filter_segments_by_distance) to keep the largest connected component and nearby components within an absolute or relative distance threshold. Useful for cleaning segmentation predictions from models such as SAM, SAM2, YOLO segmentation, and RF-DETR segmentation.
+
+- Added [#2006](https://github.com/roboflow/supervision/pull/2006): [`sv.xyxy_to_mask`](https://supervision.roboflow.com/0.27.0/detection/utils/converters/#supervision.detection.utils.converters.xyxy_to_mask) to convert bounding boxes into 2D boolean masks, where each mask corresponds to a single box.
+
+- Added [#1943](https://github.com/roboflow/supervision/pull/1943): [`sv.tint_image`](https://supervision.roboflow.com/0.27.0/utils/image/#supervision.utils.image.tint_image) to apply a solid color overlay to an image at a given opacity. Works with both NumPy and PIL inputs.
+
+- Added [#1943](https://github.com/roboflow/supervision/pull/1943): [`sv.grayscale_image`](https://supervision.roboflow.com/0.27.0/utils/image/#supervision.utils.image.tint_image) to convert an image to 3 channel grayscale for compatibility with color based drawing utilities.
+
+- Added [#2014](https://github.com/roboflow/supervision/pull/2014): [`sv.get_image_resolution_wh`](https://supervision.roboflow.com/0.27.0/utils/image/#supervision.utils.image.get_image_resolution_wh) as a unified way to read image width and height from NumPy and PIL inputs.
+
+- Added [#1912](https://github.com/roboflow/supervision/pull/1912): [`sv.edit_distance`](https://supervision.roboflow.com/0.27.0/detection/utils/vlms/#supervision.detection.utils.vlms.edit_distance) for Levenshtein distance between two strings. Supports insert, delete, and substitute operations.
+
+- Added [#1912](https://github.com/roboflow/supervision/pull/1912): [`sv.fuzzy_match_index`](https://supervision.roboflow.com/0.27.0/detection/utils/vlms/#supervision.detection.utils.vlms.fuzzy_match_index) to find the first close match in a list using edit distance.
+
+- Changed [#2015](https://github.com/roboflow/supervision/pull/2015): [`sv.Detections.from_vlm`](https://supervision.roboflow.com/0.27.0/detection/core/#supervision.detection.core.Detections.from_vlm) and legacy `from_lmm` now support Qwen3 VL via `vlm=sv.VLM.QWEN_3_VL`.
+
+- Changed [#1884](https://github.com/roboflow/supervision/pull/1884): [`sv.Detections.from_vlm`](https://supervision.roboflow.com/0.27.0/detection/core/#supervision.detection.core.Detections.from_vlm) and legacy `from_lmm` now support DeepSeek VL 2 via `vlm=sv.VLM.DEEPSEEK_VL_2`.
+
+- Changed [#2015](https://github.com/roboflow/supervision/pull/2015): [`sv.Detections.from_vlm`](https://supervision.roboflow.com/0.27.0/detection/core/#supervision.detection.core.Detections.from_vlm) now parses Qwen 2.5 VL outputs more robustly and handles incomplete or truncated JSON responses.
+
+- Changed [#2014](https://github.com/roboflow/supervision/pull/2014): [`sv.InferenceSlicer`](https://supervision.roboflow.com/0.27.0/detection/tools/inference_slicer/#supervision.detection.tools.inference_slicer.InferenceSlicer) now uses a new offset generation logic that removes redundant tiles and aligns borders cleanly. This reduces the number of processed tiles and shortens inference time without hurting detection quality.
+
+- Changed [#2016](https://github.com/roboflow/supervision/pull/2016): [`sv.Detections`](https://supervision.roboflow.com/0.27.0/detection/core/#supervision.detection.core.Detections) now includes a `box_aspect_ratio` property for vectorized aspect ratio computation, useful for filtering detections based on box shape.
+
+- Changed [#2001](https://github.com/roboflow/supervision/pull/2001): Significantly improved the performance of [`sv.box_iou_batch`](https://supervision.roboflow.com/0.27.0/detection/utils/iou_and_nms/#supervision.detection.utils.iou_and_nms.box_iou_batch). On internal benchmarks, processing runs approximately 2x to 5x faster.
+
+- Changed [#1997](https://github.com/roboflow/supervision/pull/1997): [`sv.process_video`](https://supervision.roboflow.com/0.27.0/utils/video/#supervision.utils.video.process_video) now uses a threaded reader, processor, and writer pipeline. This removes I/O stalls and improves throughput while keeping the callback single threaded and safe for stateful models.
+
+- Changed: [`sv.denormalize_boxes`](https://supervision.roboflow.com/0.27.0/detection/utils/boxes/#supervision.detection.utils.boxes.denormalize_boxes) now supports batch conversion of bounding boxes. The function accepts arrays of shape `(N, 4)` and returns a batch of absolute pixel coordinates.
+
+- Changed [#1917](https://github.com/roboflow/supervision/pull/1917): [`sv.LabelAnnotator`](https://supervision.roboflow.com/develop/0.27.0/annotators/#supervision.annotators.core.LabelAnnotator) and [`sv.RichLabelAnnotator`](https://supervision.roboflow.com/develop/0.27.0/annotators/#supervision.annotators.core.LabelAnnotator) now accept `text_offset=(x, y)` to shift the label relative to `text_position`. Works with smart label position and line wrapping.
+
+!!! failure "Removed"
+    Removed the deprecated `overlap_ratio_wh` argument from `sv.InferenceSlicer`. Use the pixel based `overlap_wh` argument to control slice overlap.
+
+!!! info "Tip"
+    Convert your old ratio based overlap to pixel based overlap by multiplying each ratio by the slice dimensions.
+
+    ```python
+    # before
+
+    slice_wh = (640, 640)
+    overlap_ratio_wh = (0.25, 0.25)
+
+    slicer = sv.InferenceSlicer(
+        callback=callback,
+        slice_wh=slice_wh,
+        overlap_ratio_wh=overlap_ratio_wh,
+        overlap_filter=sv.OverlapFilter.NON_MAX_SUPPRESSION,
+    )
+
+    # after
+
+    overlap_wh = (
+        int(overlap_ratio_wh[0] * slice_wh[0]),
+        int(overlap_ratio_wh[1] * slice_wh[1]),
+    )
+
+    slicer = sv.InferenceSlicer(
+        callback=callback,
+        slice_wh=slice_wh,
+        overlap_wh=overlap_wh,
+        overlap_filter=sv.OverlapFilter.NON_MAX_SUPPRESSION,
+    )
+    ```
+
 ### 0.26.1 <small>Jul 22, 2025</small>
 
 - Fixed [1894](https://github.com/roboflow/supervision/pull/1894): Error in [`sv.MeanAveragePrecision`](https://supervision.roboflow.com/0.26.1/metrics/mean_average_precision/#supervision.metrics.mean_average_precision.MeanAveragePrecision) where the area used for size-specific evaluation (small / medium / large) was always zero unless explicitly provided in `sv.Detections.data`.

From 72506888c0d1f12c679a728066f201d4417be535 Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sun, 16 Nov 2025 16:31:22 +0100
Subject: [PATCH 123/124] bump version from `0.27.0rc5` to `0.27.0rc6`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2996af1fb2..3e6bb8fd5b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.27.0rc5"
+version = "0.27.0rc6"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [

From 7f7b25d7e72d44486596f5ffab188c8ef7da1a7c Mon Sep 17 00:00:00 2001
From: SkalskiP <piotr.skalski92@gmail.com>
Date: Sun, 16 Nov 2025 19:02:42 +0100
Subject: [PATCH 124/124] bump version from `0.27.0rc6` to `0.27.0`

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3e6bb8fd5b..e7f86b89ef 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "supervision"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 license = { text = "MIT" }
-version = "0.27.0rc6"
+version = "0.27.0"
 readme = "README.md"
 requires-python = ">=3.9"
 authors = [