diff --git a/docs/changelog.md b/docs/changelog.md
index e59cd17e08..92f1ba93fc 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,11 @@
# Changelog
+### develop
+
+- Changed [#2178](https://github.com/roboflow/supervision/pull/2178): [`sv.Detections.from_inference`](https://supervision.roboflow.com/latest/detection/core/#supervision.detection.core.Detections.from_inference) now supports compressed COCO RLE masks. Inference responses with `rle` or `rle_mask` fields containing a compressed counts string (as produced by `pycocotools`) are decoded directly into binary masks, avoiding a lossy polygon round-trip.
+
+- Changed [#2178](https://github.com/roboflow/supervision/pull/2178): [`sv.rle_to_mask`](https://supervision.roboflow.com/latest/detection/utils/converters/#supervision.detection.utils.converters.rle_to_mask) and [`sv.mask_to_rle`](https://supervision.roboflow.com/latest/detection/utils/converters/#supervision.detection.utils.converters.mask_to_rle) moved to `supervision.detection.utils.converters`. The old import path `supervision.dataset.utils` continues to work but is deprecated.
+
### 0.27.0 Nov 16, 2025
- Added [#2008](https://github.com/roboflow/supervision/pull/2008): [`sv.filter_segments_by_distance`](https://supervision.roboflow.com/0.27.0/detection/utils/masks/#supervision.detection.utils.masks.filter_segments_by_distance) to keep the largest connected component and nearby components within an absolute or relative distance threshold. Useful for cleaning segmentation predictions from models such as SAM, SAM2, YOLO segmentation, and RF-DETR segmentation.
diff --git a/docs/datasets/utils.md b/docs/datasets/utils.md
deleted file mode 100644
index c35b52572f..0000000000
--- a/docs/datasets/utils.md
+++ /dev/null
@@ -1,17 +0,0 @@
----
-comments: true
----
-
-# Datasets Utils
-
-
-
-:::supervision.dataset.utils.rle_to_mask
-
-
-
-:::supervision.dataset.utils.mask_to_rle
diff --git a/docs/detection/utils/converters.md b/docs/detection/utils/converters.md
index b6b1e2af6c..c4c6750e83 100644
--- a/docs/detection/utils/converters.md
+++ b/docs/detection/utils/converters.md
@@ -64,3 +64,15 @@ status: new
:::supervision.detection.utils.converters.xyxy_to_mask
+
+
+
+:::supervision.detection.utils.converters.rle_to_mask
+
+
+
+:::supervision.detection.utils.converters.mask_to_rle
diff --git a/mkdocs.yml b/mkdocs.yml
index 5223928d70..925c378ab8 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -63,7 +63,6 @@ nav:
- Trackers: trackers.md
- Datasets:
- Core: datasets/core.md
- - Utils: datasets/utils.md
- Metrics:
- mAP: metrics/mean_average_precision.md
- mAR: metrics/mean_average_recall.md
diff --git a/src/supervision/__init__.py b/src/supervision/__init__.py
index 1bda28164d..3904dd3cde 100644
--- a/src/supervision/__init__.py
+++ b/src/supervision/__init__.py
@@ -44,7 +44,6 @@
DetectionDataset,
)
from supervision.dataset.formats.coco import get_coco_class_index_mapping
-from supervision.dataset.utils import mask_to_rle, rle_to_mask
from supervision.detection.core import Detections
from supervision.detection.line_zone import (
LineZone,
@@ -65,9 +64,11 @@
)
from supervision.detection.utils.converters import (
mask_to_polygons,
+ mask_to_rle,
mask_to_xyxy,
polygon_to_mask,
polygon_to_xyxy,
+ rle_to_mask,
xcycwh_to_xyxy,
xywh_to_xyxy,
xyxy_to_mask,
diff --git a/src/supervision/dataset/formats/coco.py b/src/supervision/dataset/formats/coco.py
index b63979846d..b7e7654f0e 100644
--- a/src/supervision/dataset/formats/coco.py
+++ b/src/supervision/dataset/formats/coco.py
@@ -10,11 +10,13 @@
from supervision.dataset.utils import (
approximate_mask_with_polygons,
map_detections_class_id,
+)
+from supervision.detection.core import Detections
+from supervision.detection.utils.converters import (
mask_to_rle,
+ polygon_to_mask,
rle_to_mask,
)
-from supervision.detection.core import Detections
-from supervision.detection.utils.converters import polygon_to_mask
from supervision.detection.utils.masks import contains_holes, contains_multiple_segments
from supervision.utils.file import read_json_file, save_json_file
diff --git a/src/supervision/dataset/utils.py b/src/supervision/dataset/utils.py
index f52111a3e4..3858a45ee2 100644
--- a/src/supervision/dataset/utils.py
+++ b/src/supervision/dataset/utils.py
@@ -13,10 +13,28 @@
from supervision.detection.core import Detections
from supervision.detection.utils.converters import mask_to_polygons
+from supervision.detection.utils.converters import (
+ mask_to_rle as _mask_to_rle,
+)
+from supervision.detection.utils.converters import (
+ rle_to_mask as _rle_to_mask,
+)
from supervision.detection.utils.polygons import (
approximate_polygon,
filter_polygons_by_area,
)
+from supervision.utils.internal import deprecated
+
+
+@deprecated("Import mask_to_rle from supervision.detection.utils.converters instead.")
+def mask_to_rle(*args, **kwargs): # type: ignore[no-untyped-def]
+ return _mask_to_rle(*args, **kwargs)
+
+
+@deprecated("Import rle_to_mask from supervision.detection.utils.converters instead.")
+def rle_to_mask(*args, **kwargs): # type: ignore[no-untyped-def]
+ return _rle_to_mask(*args, **kwargs)
+
if TYPE_CHECKING:
from supervision.dataset.core import DetectionDataset
@@ -137,131 +155,3 @@ def train_test_split(
split_index = int(len(data) * train_ratio)
return data[:split_index], data[split_index:]
-
-
-def rle_to_mask(
- rle: npt.NDArray[np.int_] | list[int], resolution_wh: tuple[int, int]
-) -> npt.NDArray[np.bool_]:
- """
- Converts run-length encoding (RLE) to a binary mask.
-
- Args:
- rle: The 1D RLE array, the format
- used in the COCO dataset (column-wise encoding, values of an array with
- even indices represent the number of pixels assigned as background,
- values of an array with odd indices represent the number of pixels
- assigned as foreground object).
- resolution_wh: The width (w) and height (h)
- of the desired binary mask.
-
- Returns:
- The generated 2D Boolean mask of shape `(h, w)`, where the foreground object is
- marked with `True`'s and the rest is filled with `False`'s.
-
- Raises:
- AssertionError: If the sum of pixels encoded in RLE differs from the
- number of pixels in the expected mask (computed based on resolution_wh).
-
- Examples:
- ```pycon
- >>> import numpy as np
- >>> import supervision as sv
- >>> mask = sv.rle_to_mask([5, 2, 2, 2, 5], (4, 4))
- >>> mask # doctest: +NORMALIZE_WHITESPACE
- array([[0, 0, 0, 0],
- [0, 1, 1, 0],
- [0, 1, 1, 0],
- [0, 0, 0, 0]], dtype=uint8)
-
- ```
- """
- if isinstance(rle, list):
- rle = np.array(rle, dtype=int)
-
- width, height = resolution_wh
-
- assert width * height == np.sum(rle), (
- "the sum of the number of pixels in the RLE must be the same "
- "as the number of pixels in the expected mask"
- )
-
- zero_one_values = np.zeros(shape=(rle.size, 1), dtype=np.uint8)
- zero_one_values[1::2] = 1
-
- decoded_rle = np.repeat(zero_one_values, rle, axis=0)
- decoded_rle = np.append(
- decoded_rle, np.zeros(width * height - len(decoded_rle), dtype=np.uint8)
- )
- return decoded_rle.reshape((height, width), order="F")
-
-
-def mask_to_rle(mask: npt.NDArray[np.bool_]) -> list[int]:
- """
- Converts a binary mask into a run-length encoding (RLE).
-
- Args:
- mask: 2D binary mask where `True` indicates foreground
- object and `False` indicates background.
-
- Returns:
- The run-length encoded mask. Values of a list with even indices
- represent the number of pixels assigned as background (`False`), values
- of a list with odd indices represent the number of pixels assigned
- as foreground object (`True`).
-
- Raises:
- AssertionError: If input mask is not 2D or is empty.
-
- Examples:
- ```pycon
- >>> import numpy as np
- >>> import supervision as sv
- >>> mask = np.array([
- ... [True, True, True, True],
- ... [True, True, True, True],
- ... [True, True, True, True],
- ... [True, True, True, True],
- ... ])
- >>> rle = sv.mask_to_rle(mask)
- >>> [int(x) for x in rle]
- [0, 16]
-
- ```
-
- ```pycon
- >>> import numpy as np
- >>> import supervision as sv
- >>> mask = np.array([
- ... [False, False, False, False],
- ... [False, True, True, False],
- ... [False, True, True, False],
- ... [False, False, False, False],
- ... ])
- >>> rle = sv.mask_to_rle(mask)
- >>> [int(x) for x in rle]
- [5, 2, 2, 2, 5]
-
- ```
-
- { align=center width="800" }
- """
- assert mask.ndim == 2, "Input mask must be 2D"
- assert mask.size != 0, "Input mask cannot be empty"
-
- on_value_change_indices = np.where(
- mask.ravel(order="F") != np.roll(mask.ravel(order="F"), 1)
- )[0]
-
- on_value_change_indices = np.append(on_value_change_indices, mask.size)
- # need to add 0 at the beginning when the same value is in the first and
- # last element of the flattened mask
- if on_value_change_indices[0] != 0:
- on_value_change_indices = np.insert(on_value_change_indices, 0, 0)
-
- rle = np.diff(on_value_change_indices)
-
- if mask[0][0] == 1:
- rle = np.insert(rle, 0, 0)
-
- return list(rle)
diff --git a/src/supervision/detection/utils/converters.py b/src/supervision/detection/utils/converters.py
index 604ada29c7..0bfcaae2e9 100644
--- a/src/supervision/detection/utils/converters.py
+++ b/src/supervision/detection/utils/converters.py
@@ -1,3 +1,7 @@
+from __future__ import annotations
+
+from typing import Literal, overload
+
import cv2
import numpy as np
import numpy.typing as npt
@@ -304,6 +308,236 @@ def mask_to_polygons(mask: npt.NDArray[np.bool_]) -> list[npt.NDArray[np.int32]]
]
+def _decode_coco_rle_string(s: str) -> list[int]:
+ """Decode a COCO compressed RLE counts string to a list of run-length integers.
+
+ Implements the decoding algorithm from the COCO API (pycocotools) for
+ compressed RLE strings. Each character encodes 5 data bits in a base-48
+ scheme with continuation and sign flags, using delta encoding for indices
+ beyond the first two.
+
+ Args:
+ s: The compressed RLE counts string.
+
+ Returns:
+ A list of run-length integers (alternating background/foreground counts).
+ """
+ counts: list[int] = []
+ i = 0
+ while i < len(s):
+ x = 0
+ k = 0
+ more = True
+ while more:
+ if i >= len(s):
+ raise ValueError(
+ f"Malformed compressed RLE string: unexpected end at position {i}"
+ )
+ c = ord(s[i]) - 48
+ x |= (c & 0x1F) << (5 * k)
+ more = bool(c & 0x20)
+ i += 1
+ k += 1
+ if not more and (c & 0x10):
+ x |= ~0 << (5 * k)
+ if len(counts) > 2:
+ x += counts[-2]
+ counts.append(x)
+ return counts
+
+
+def _encode_coco_rle_string(counts: list[int]) -> str:
+ """Encode a list of run-length integers to a COCO compressed RLE string.
+
+ Implements the encoding algorithm from the COCO API (pycocotools).
+ The inverse of :func:`_decode_coco_rle_string`.
+
+ Args:
+ counts: A list of run-length integers (alternating background/foreground
+ counts).
+
+ Returns:
+ The compressed RLE counts string.
+ """
+ chars: list[str] = []
+ for i, cnt in enumerate(counts):
+ x = cnt - counts[i - 2] if i > 2 else cnt
+ more = True
+ while more:
+ c = x & 0x1F
+ x >>= 5
+ more = (x != -1) if (c & 0x10) else (x != 0)
+ if more:
+ c |= 0x20
+ chars.append(chr(c + 48))
+ return "".join(chars)
+
+
+def rle_to_mask(
+ rle: npt.NDArray[np.int_] | list[int] | str | bytes,
+ resolution_wh: tuple[int, int],
+) -> npt.NDArray[np.bool_]:
+ """
+ Converts run-length encoding (RLE) to a binary mask.
+
+ Args:
+ rle: The RLE data in one of the following formats:
+ - A 1D array or list of integers (uncompressed COCO RLE, where
+ values at even indices represent background pixel counts and
+ values at odd indices represent foreground pixel counts).
+ - A compressed COCO RLE string or bytes, as produced by
+ ``pycocotools.mask.encode``.
+ resolution_wh: The width (w) and height (h)
+ of the desired binary mask.
+
+ Returns:
+ The generated 2D Boolean mask of shape `(h, w)`, where the foreground object is
+ marked with `True`'s and the rest is filled with `False`'s.
+
+ Raises:
+ AssertionError: If the sum of pixels encoded in RLE differs from the
+ number of pixels in the expected mask (computed based on resolution_wh).
+
+ Examples:
+ ```pycon
+ >>> import numpy as np
+ >>> import supervision as sv
+ >>> mask = sv.rle_to_mask([5, 2, 2, 2, 5], (4, 4))
+ >>> mask # doctest: +NORMALIZE_WHITESPACE
+ array([[False, False, False, False],
+ [False, True, True, False],
+ [False, True, True, False],
+ [False, False, False, False]])
+
+ >>> mask = sv.rle_to_mask("52203", (4, 4))
+ >>> mask # doctest: +NORMALIZE_WHITESPACE
+ array([[False, False, False, False],
+ [False, True, True, False],
+ [False, True, True, False],
+ [False, False, False, False]])
+
+ ```
+ """
+ if isinstance(rle, bytes):
+ rle = rle.decode("utf-8")
+ if isinstance(rle, str):
+ rle = np.array(_decode_coco_rle_string(rle), dtype=int)
+ elif isinstance(rle, list):
+ rle = np.array(rle, dtype=int)
+
+ width, height = resolution_wh
+
+ assert width * height == np.sum(rle), (
+ "the sum of the number of pixels in the RLE must be the same "
+ "as the number of pixels in the expected mask"
+ )
+
+ zero_one_values = np.zeros(shape=(rle.size, 1), dtype=np.uint8)
+ zero_one_values[1::2] = 1
+
+ decoded_rle = np.repeat(zero_one_values, rle, axis=0)
+ decoded_rle = np.append(
+ decoded_rle, np.zeros(width * height - len(decoded_rle), dtype=np.uint8)
+ )
+ return decoded_rle.reshape((height, width), order="F").astype(bool)
+
+
+@overload
+def mask_to_rle(
+ mask: npt.NDArray[np.bool_], compressed: Literal[False] = ...
+) -> list[int]: ...
+
+
+@overload
+def mask_to_rle(
+ mask: npt.NDArray[np.bool_], compressed: Literal[True] = ...
+) -> str: ...
+
+
+def mask_to_rle(
+ mask: npt.NDArray[np.bool_], compressed: bool = False
+) -> list[int] | str:
+ """
+ Converts a binary mask into a run-length encoding (RLE).
+
+ Args:
+ mask: 2D binary mask where `True` indicates foreground
+ object and `False` indicates background.
+ compressed: If ``True``, return a compressed COCO RLE string
+ compatible with ``pycocotools``. If ``False`` (default),
+ return a list of integers.
+
+ Returns:
+ The run-length encoded mask. When ``compressed`` is ``False``,
+ values of a list with even indices represent the number of pixels
+ assigned as background (`False`), values of a list with odd indices
+ represent the number of pixels assigned as foreground object (`True`).
+ When ``compressed`` is ``True``, a COCO compressed RLE string.
+
+ Raises:
+ AssertionError: If input mask is not 2D or is empty.
+
+ Examples:
+ ```pycon
+ >>> import numpy as np
+ >>> import supervision as sv
+ >>> mask = np.array([
+ ... [True, True, True, True],
+ ... [True, True, True, True],
+ ... [True, True, True, True],
+ ... [True, True, True, True],
+ ... ])
+ >>> rle = sv.mask_to_rle(mask)
+ >>> [int(x) for x in rle]
+ [0, 16]
+
+ ```
+
+ ```pycon
+ >>> import numpy as np
+ >>> import supervision as sv
+ >>> mask = np.array([
+ ... [False, False, False, False],
+ ... [False, True, True, False],
+ ... [False, True, True, False],
+ ... [False, False, False, False],
+ ... ])
+ >>> rle = sv.mask_to_rle(mask)
+ >>> [int(x) for x in rle]
+ [5, 2, 2, 2, 5]
+
+ >>> sv.mask_to_rle(mask, compressed=True)
+ '52203'
+
+ ```
+
+ { align=center width="800" }
+ """
+ assert mask.ndim == 2, "Input mask must be 2D"
+ assert mask.size != 0, "Input mask cannot be empty"
+
+ on_value_change_indices = np.where(
+ mask.ravel(order="F") != np.roll(mask.ravel(order="F"), 1)
+ )[0]
+
+ on_value_change_indices = np.append(on_value_change_indices, mask.size)
+ # need to add 0 at the beginning when the same value is in the first and
+ # last element of the flattened mask
+ if on_value_change_indices[0] != 0:
+ on_value_change_indices = np.insert(on_value_change_indices, 0, 0)
+
+ rle = np.diff(on_value_change_indices)
+
+ if mask[0][0] == 1:
+ rle = np.insert(rle, 0, 0)
+
+ counts = list(rle)
+ if compressed:
+ return _encode_coco_rle_string(counts)
+ return counts
+
+
def polygon_to_xyxy(polygon: npt.NDArray[np.number]) -> npt.NDArray[np.number]:
"""
Converts a polygon represented by a NumPy array into a bounding box.
diff --git a/src/supervision/detection/utils/internal.py b/src/supervision/detection/utils/internal.py
index 937171209f..bf63cca3ce 100644
--- a/src/supervision/detection/utils/internal.py
+++ b/src/supervision/detection/utils/internal.py
@@ -8,7 +8,7 @@
import numpy.typing as npt
from supervision.config import CLASS_NAME_DATA_FIELD
-from supervision.detection.utils.converters import polygon_to_mask
+from supervision.detection.utils.converters import polygon_to_mask, rle_to_mask
from supervision.geometry.core import Vector
@@ -72,7 +72,7 @@ def process_roboflow_result(
confidence: list[float] = []
class_id: list[int] = []
class_name: list[str] = []
- masks: list[npt.NDArray[np.uint8]] = []
+ masks: list[npt.NDArray[np.bool_]] = []
tracker_ids: list[int] = []
image_width = int(roboflow_result["image"]["width"])
@@ -88,7 +88,27 @@ def process_roboflow_result(
x_max = x_min + width
y_max = y_min + height
- if "points" not in prediction:
+ rle_data = prediction.get("rle") or prediction.get("rle_mask")
+ if not isinstance(rle_data, dict) or not {
+ "size",
+ "counts",
+ }.issubset(rle_data):
+ rle_data = None
+ if rle_data is not None:
+ try:
+ h, w = rle_data["size"]
+ mask = rle_to_mask(rle_data["counts"], (w, h))
+ except (ValueError, AssertionError, KeyError, TypeError):
+ rle_data = None
+ if rle_data is not None:
+ xyxy.append([x_min, y_min, x_max, y_max])
+ class_id.append(prediction["class_id"])
+ class_name.append(prediction["class"])
+ confidence.append(prediction["confidence"])
+ masks.append(mask)
+ if "tracker_id" in prediction:
+ tracker_ids.append(prediction["tracker_id"])
+ elif "points" not in prediction:
xyxy.append([x_min, y_min, x_max, y_max])
class_id.append(prediction["class_id"])
class_name.append(prediction["class"])
diff --git a/tests/dataset/test_utils.py b/tests/dataset/test_utils.py
index b1e4980be1..febed32678 100644
--- a/tests/dataset/test_utils.py
+++ b/tests/dataset/test_utils.py
@@ -3,17 +3,13 @@
from contextlib import ExitStack as DoesNotRaise
from typing import TypeVar
-import numpy as np
-import numpy.typing as npt
import pytest
from supervision import Detections
from supervision.dataset.utils import (
build_class_index_mapping,
map_detections_class_id,
- mask_to_rle,
merge_class_lists,
- rle_to_mask,
train_test_split,
)
from tests.helpers import _create_detections
@@ -235,133 +231,3 @@ def test_map_detections_class_id(
source_to_target_mapping=source_to_target_mapping, detections=detections
)
assert result == expected_result
-
-
-@pytest.mark.parametrize(
- ("mask", "expected_rle", "exception"),
- [
- (
- np.zeros((3, 3)).astype(bool),
- [9],
- DoesNotRaise(),
- ), # mask with background only (mask with only False values)
- (
- np.ones((3, 3)).astype(bool),
- [0, 9],
- DoesNotRaise(),
- ), # mask with foreground only (mask with only True values)
- (
- np.array(
- [
- [0, 0, 0, 0, 0],
- [0, 1, 1, 1, 0],
- [0, 1, 0, 1, 0],
- [0, 1, 1, 1, 0],
- [0, 0, 0, 0, 0],
- ]
- ).astype(bool),
- [6, 3, 2, 1, 1, 1, 2, 3, 6],
- DoesNotRaise(),
- ), # mask where foreground object has hole
- (
- np.array(
- [
- [1, 0, 1, 0, 1],
- [1, 0, 1, 0, 1],
- [1, 0, 1, 0, 1],
- [1, 0, 1, 0, 1],
- [1, 0, 1, 0, 1],
- ]
- ).astype(bool),
- [0, 5, 5, 5, 5, 5],
- DoesNotRaise(),
- ), # mask where foreground consists of 3 separate components
- (
- np.array([[[]]]).astype(bool),
- None,
- pytest.raises(AssertionError, match="Input mask must be 2D"),
- ), # raises AssertionError because mask dimensionality is not 2D
- (
- np.array([[]]).astype(bool),
- None,
- pytest.raises(AssertionError, match="Input mask cannot be empty"),
- ), # raises AssertionError because mask is empty
- ],
-)
-def test_mask_to_rle(
- mask: npt.NDArray[np.bool_], expected_rle: list[int], exception: Exception
-) -> None:
- with exception:
- result = mask_to_rle(mask=mask)
- assert result == expected_rle
-
-
-@pytest.mark.parametrize(
- ("rle", "resolution_wh", "expected_mask", "exception"),
- [
- (
- np.array([9]),
- [3, 3],
- np.zeros((3, 3)).astype(bool),
- DoesNotRaise(),
- ), # mask with background only (mask with only False values); rle as array
- (
- [9],
- [3, 3],
- np.zeros((3, 3)).astype(bool),
- DoesNotRaise(),
- ), # mask with background only (mask with only False values); rle as list
- (
- np.array([0, 9]),
- [3, 3],
- np.ones((3, 3)).astype(bool),
- DoesNotRaise(),
- ), # mask with foreground only (mask with only True values)
- (
- np.array([6, 3, 2, 1, 1, 1, 2, 3, 6]),
- [5, 5],
- np.array(
- [
- [0, 0, 0, 0, 0],
- [0, 1, 1, 1, 0],
- [0, 1, 0, 1, 0],
- [0, 1, 1, 1, 0],
- [0, 0, 0, 0, 0],
- ]
- ).astype(bool),
- DoesNotRaise(),
- ), # mask where foreground object has hole
- (
- np.array([0, 5, 5, 5, 5, 5]),
- [5, 5],
- np.array(
- [
- [1, 0, 1, 0, 1],
- [1, 0, 1, 0, 1],
- [1, 0, 1, 0, 1],
- [1, 0, 1, 0, 1],
- [1, 0, 1, 0, 1],
- ]
- ).astype(bool),
- DoesNotRaise(),
- ), # mask where foreground consists of 3 separate components
- (
- np.array([0, 5, 5, 5, 5, 5]),
- [2, 2],
- None,
- pytest.raises(
- AssertionError, match="sum of the number of pixels in the RLE"
- ),
- ), # raises AssertionError because number of pixels in RLE does not match
- # number of pixels in expected mask (width x height).
- ],
-)
-def test_rle_to_mask(
- rle: npt.NDArray[np.int_],
- resolution_wh: tuple[int, int],
- expected_mask: npt.NDArray[np.bool_],
- exception: Exception,
-) -> None:
- with exception:
- result = rle_to_mask(rle=rle, resolution_wh=resolution_wh)
- assert np.all(result == expected_mask)
diff --git a/tests/detection/utils/test_converters.py b/tests/detection/utils/test_converters.py
index 5372343eb4..4e39448aac 100644
--- a/tests/detection/utils/test_converters.py
+++ b/tests/detection/utils/test_converters.py
@@ -1,9 +1,16 @@
from __future__ import annotations
+from contextlib import ExitStack as DoesNotRaise
+
import numpy as np
+import numpy.typing as npt
import pytest
from supervision.detection.utils.converters import (
+ _decode_coco_rle_string,
+ _encode_coco_rle_string,
+ mask_to_rle,
+ rle_to_mask,
xcycwh_to_xyxy,
xywh_to_xyxy,
xyxy_to_mask,
@@ -301,3 +308,224 @@ def test_xyxy_to_mask(boxes: np.ndarray, resolution_wh, expected: np.ndarray) ->
assert result.dtype == np.bool_
assert result.shape == expected.shape
np.testing.assert_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+ "counts",
+ [
+ [5, 2, 2, 2, 5],
+ [0, 16],
+ [9],
+ [0, 5, 5, 5, 5, 5],
+ [6, 3, 2, 1, 1, 1, 2, 3, 6],
+ [3, 1, 2, 4, 2, 1, 3],
+ ],
+)
+def test_coco_rle_encode_decode_round_trip(counts: list[int]) -> None:
+ encoded = _encode_coco_rle_string(counts)
+ decoded = _decode_coco_rle_string(encoded)
+ assert decoded == counts
+
+
+@pytest.mark.parametrize(
+ ("mask", "compressed", "expected_rle", "exception"),
+ [
+ (
+ np.zeros((3, 3)).astype(bool),
+ False,
+ [9],
+ DoesNotRaise(),
+ ), # mask with background only (mask with only False values)
+ (
+ np.ones((3, 3)).astype(bool),
+ False,
+ [0, 9],
+ DoesNotRaise(),
+ ), # mask with foreground only (mask with only True values)
+ (
+ np.array(
+ [
+ [0, 0, 0, 0, 0],
+ [0, 1, 1, 1, 0],
+ [0, 1, 0, 1, 0],
+ [0, 1, 1, 1, 0],
+ [0, 0, 0, 0, 0],
+ ]
+ ).astype(bool),
+ False,
+ [6, 3, 2, 1, 1, 1, 2, 3, 6],
+ DoesNotRaise(),
+ ), # mask where foreground object has hole
+ (
+ np.array(
+ [
+ [1, 0, 1, 0, 1],
+ [1, 0, 1, 0, 1],
+ [1, 0, 1, 0, 1],
+ [1, 0, 1, 0, 1],
+ [1, 0, 1, 0, 1],
+ ]
+ ).astype(bool),
+ False,
+ [0, 5, 5, 5, 5, 5],
+ DoesNotRaise(),
+ ), # mask where foreground consists of 3 separate components
+ (
+ np.array(
+ [
+ [False, False, False, False],
+ [False, True, True, False],
+ [False, True, True, False],
+ [False, False, False, False],
+ ]
+ ),
+ True,
+ "52203",
+ DoesNotRaise(),
+ ), # compressed RLE string
+ (
+ np.array([[[]]]).astype(bool),
+ False,
+ None,
+ pytest.raises(AssertionError, match="Input mask must be 2D"),
+ ), # raises AssertionError because mask dimensionality is not 2D
+ (
+ np.array([[]]).astype(bool),
+ False,
+ None,
+ pytest.raises(AssertionError, match="Input mask cannot be empty"),
+ ), # raises AssertionError because mask is empty
+ ],
+)
+def test_mask_to_rle(
+ mask: npt.NDArray[np.bool_],
+ compressed: bool,
+ expected_rle: list[int] | str | None,
+ exception: Exception,
+) -> None:
+ with exception:
+ result = mask_to_rle(mask=mask, compressed=compressed)
+ assert result == expected_rle
+
+
+@pytest.mark.parametrize(
+ ("rle", "resolution_wh", "expected_mask", "exception"),
+ [
+ (
+ np.array([9]),
+ [3, 3],
+ np.zeros((3, 3)).astype(bool),
+ DoesNotRaise(),
+ ), # mask with background only (mask with only False values); rle as array
+ (
+ [9],
+ [3, 3],
+ np.zeros((3, 3)).astype(bool),
+ DoesNotRaise(),
+ ), # mask with background only (mask with only False values); rle as list
+ (
+ np.array([0, 9]),
+ [3, 3],
+ np.ones((3, 3)).astype(bool),
+ DoesNotRaise(),
+ ), # mask with foreground only (mask with only True values)
+ (
+ np.array([6, 3, 2, 1, 1, 1, 2, 3, 6]),
+ [5, 5],
+ np.array(
+ [
+ [0, 0, 0, 0, 0],
+ [0, 1, 1, 1, 0],
+ [0, 1, 0, 1, 0],
+ [0, 1, 1, 1, 0],
+ [0, 0, 0, 0, 0],
+ ]
+ ).astype(bool),
+ DoesNotRaise(),
+ ), # mask where foreground object has hole
+ (
+ np.array([0, 5, 5, 5, 5, 5]),
+ [5, 5],
+ np.array(
+ [
+ [1, 0, 1, 0, 1],
+ [1, 0, 1, 0, 1],
+ [1, 0, 1, 0, 1],
+ [1, 0, 1, 0, 1],
+ [1, 0, 1, 0, 1],
+ ]
+ ).astype(bool),
+ DoesNotRaise(),
+ ), # mask where foreground consists of 3 separate components
+ (
+ np.array([0, 5, 5, 5, 5, 5]),
+ [2, 2],
+ None,
+ pytest.raises(
+ AssertionError, match="sum of the number of pixels in the RLE"
+ ),
+ ), # raises AssertionError because number of pixels in RLE does not match
+ # number of pixels in expected mask (width x height).
+ (
+ b"3124OM1",
+ [4, 4],
+ np.array(
+ [
+ [0, 0, 1, 1],
+ [0, 0, 1, 1],
+ [0, 1, 1, 0],
+ [1, 1, 0, 0],
+ ]
+ ).astype(bool),
+ DoesNotRaise(),
+ ), # compressed RLE bytes
+ (
+ "52203",
+ [4, 4],
+ np.array(
+ [
+ [0, 0, 0, 0],
+ [0, 1, 1, 0],
+ [0, 1, 1, 0],
+ [0, 0, 0, 0],
+ ]
+ ).astype(bool),
+ DoesNotRaise(),
+ ), # compressed RLE string
+ (
+ "!",
+ [4, 4],
+ None,
+ pytest.raises(ValueError, match="Malformed compressed RLE string"),
+ ), # malformed compressed RLE string with invalid character
+ (
+ "52P",
+ [4, 4],
+ None,
+ pytest.raises(ValueError, match="Malformed compressed RLE string"),
+ ), # malformed compressed RLE: unterminated continuation byte
+ ],
+)
+def test_rle_to_mask(
+ rle: npt.NDArray[np.int_],
+ resolution_wh: tuple[int, int],
+ expected_mask: npt.NDArray[np.bool_],
+ exception: Exception,
+) -> None:
+ with exception:
+ result = rle_to_mask(rle=rle, resolution_wh=resolution_wh)
+ assert np.all(result == expected_mask)
+
+
+def test_mask_rle_compressed_round_trip() -> None:
+ mask = np.array(
+ [
+ [False, False, False, False],
+ [False, True, True, False],
+ [False, True, True, False],
+ [False, False, False, False],
+ ]
+ )
+ compressed = mask_to_rle(mask, compressed=True)
+ recovered = rle_to_mask(compressed, (4, 4))
+ np.testing.assert_array_equal(mask, recovered)
diff --git a/tests/detection/utils/test_internal.py b/tests/detection/utils/test_internal.py
index c919182f5c..ccc9531dfa 100644
--- a/tests/detection/utils/test_internal.py
+++ b/tests/detection/utils/test_internal.py
@@ -17,6 +17,13 @@
TEST_MASK = np.zeros((1, 1000, 1000), dtype=bool)
TEST_MASK[:, 300:351, 200:251] = True
+TEST_RLE_MASK = np.zeros((1, 4, 4), dtype=bool)
+TEST_RLE_MASK[0, 1:3, 1:3] = True
+
+TEST_RLE_NONCONTIGUOUS_MASK = np.zeros((1, 4, 4), dtype=bool)
+TEST_RLE_NONCONTIGUOUS_MASK[0, 0:2, 0:2] = True
+TEST_RLE_NONCONTIGUOUS_MASK[0, 3, 2:4] = True
+
@pytest.mark.parametrize(
("roboflow_result", "expected_result", "exception"),
@@ -219,6 +226,189 @@
),
DoesNotRaise(),
), # two instance segmentation results - one correct, one incorrect
+ (
+ {
+ "predictions": [
+ {
+ "x": 1.5,
+ "y": 1.5,
+ "width": 2.0,
+ "height": 2.0,
+ "confidence": 0.9,
+ "class_id": 0,
+ "class": "person",
+ "rle": {"size": [4, 4], "counts": "52203"},
+ }
+ ],
+ "image": {"width": 4, "height": 4},
+ },
+ (
+ np.array([[0.5, 0.5, 2.5, 2.5]]),
+ np.array([0.9]),
+ np.array([0]),
+ TEST_RLE_MASK,
+ None,
+ {CLASS_NAME_DATA_FIELD: np.array(["person"])},
+ ),
+ DoesNotRaise(),
+ ), # single RLE prediction with compressed string counts
+ (
+ {
+ "predictions": [
+ {
+ "x": 2.0,
+ "y": 2.0,
+ "width": 4.0,
+ "height": 4.0,
+ "confidence": 0.85,
+ "class_id": 1,
+ "class": "cat",
+ "rle": {"size": [4, 4], "counts": "02203ON0"},
+ }
+ ],
+ "image": {"width": 4, "height": 4},
+ },
+ (
+ np.array([[0.0, 0.0, 4.0, 4.0]]),
+ np.array([0.85]),
+ np.array([1]),
+ TEST_RLE_NONCONTIGUOUS_MASK,
+ None,
+ {CLASS_NAME_DATA_FIELD: np.array(["cat"])},
+ ),
+ DoesNotRaise(),
+ ), # single RLE prediction with non-contiguous mask
+ (
+ {
+ "predictions": [
+ {
+ "x": 1.5,
+ "y": 1.5,
+ "width": 2.0,
+ "height": 2.0,
+ "confidence": 0.9,
+ "class_id": 0,
+ "class": "person",
+ "rle": {"size": [4, 4], "counts": "52203"},
+ "tracker_id": 5,
+ }
+ ],
+ "image": {"width": 4, "height": 4},
+ },
+ (
+ np.array([[0.5, 0.5, 2.5, 2.5]]),
+ np.array([0.9]),
+ np.array([0]),
+ TEST_RLE_MASK,
+ np.array([5]),
+ {CLASS_NAME_DATA_FIELD: np.array(["person"])},
+ ),
+ DoesNotRaise(),
+ ), # RLE prediction with tracker_id
+ (
+ {
+ "predictions": [
+ {
+ "x": 1.5,
+ "y": 1.5,
+ "width": 2.0,
+ "height": 2.0,
+ "confidence": 0.9,
+ "class_id": 0,
+ "class": "person",
+ "rle_mask": {"size": [4, 4], "counts": "52203"},
+ }
+ ],
+ "image": {"width": 4, "height": 4},
+ },
+ (
+ np.array([[0.5, 0.5, 2.5, 2.5]]),
+ np.array([0.9]),
+ np.array([0]),
+ TEST_RLE_MASK,
+ None,
+ {CLASS_NAME_DATA_FIELD: np.array(["person"])},
+ ),
+ DoesNotRaise(),
+ ), # single RLE prediction with compressed string counts under rle_mask key
+ (
+ {
+ "predictions": [
+ {
+ "x": 1.5,
+ "y": 1.5,
+ "width": 2.0,
+ "height": 2.0,
+ "confidence": 0.9,
+ "class_id": 0,
+ "class": "person",
+ "rle": "bad_string",
+ }
+ ],
+ "image": {"width": 4, "height": 4},
+ },
+ (
+ np.array([[0.5, 0.5, 2.5, 2.5]]),
+ np.array([0.9]),
+ np.array([0]),
+ None,
+ None,
+ {CLASS_NAME_DATA_FIELD: np.array(["person"])},
+ ),
+ DoesNotRaise(),
+ ), # malformed RLE payload should fall through to box-only detection
+ (
+ {
+ "predictions": [
+ {
+ "x": 1.5,
+ "y": 1.5,
+ "width": 2.0,
+ "height": 2.0,
+ "confidence": 0.9,
+ "class_id": 0,
+ "class": "person",
+ "rle": {"size": [4, 4]},
+ }
+ ],
+ "image": {"width": 4, "height": 4},
+ },
+ (
+ np.array([[0.5, 0.5, 2.5, 2.5]]),
+ np.array([0.9]),
+ np.array([0]),
+ None,
+ None,
+ {CLASS_NAME_DATA_FIELD: np.array(["person"])},
+ ),
+ DoesNotRaise(),
+ ), # RLE dict missing counts falls through to box-only detection
+ (
+ {
+ "predictions": [
+ {
+ "x": 1.5,
+ "y": 1.5,
+ "width": 2.0,
+ "height": 2.0,
+ "confidence": 0.9,
+ "class_id": 0,
+ "class": "person",
+ "rle": {"size": [4, 4], "counts": "!"},
+ }
+ ],
+ "image": {"width": 4, "height": 4},
+ },
+ (
+ np.array([[0.5, 0.5, 2.5, 2.5]]),
+ np.array([0.9]),
+ np.array([0]),
+ None,
+ None,
+ {CLASS_NAME_DATA_FIELD: np.array(["person"])},
+ ),
+ DoesNotRaise(),
+ ), # malformed compressed counts falls through to box-only detection
],
)
def test_process_roboflow_result(