diff --git a/docs/changelog.md b/docs/changelog.md index e59cd17e08..92f1ba93fc 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,11 @@ # Changelog +### develop + +- Changed [#2178](https://github.com/roboflow/supervision/pull/2178): [`sv.Detections.from_inference`](https://supervision.roboflow.com/latest/detection/core/#supervision.detection.core.Detections.from_inference) now supports compressed COCO RLE masks. Inference responses with `rle` or `rle_mask` fields containing a compressed counts string (as produced by `pycocotools`) are decoded directly into binary masks, avoiding a lossy polygon round-trip. + +- Changed [#2178](https://github.com/roboflow/supervision/pull/2178): [`sv.rle_to_mask`](https://supervision.roboflow.com/latest/detection/utils/converters/#supervision.detection.utils.converters.rle_to_mask) and [`sv.mask_to_rle`](https://supervision.roboflow.com/latest/detection/utils/converters/#supervision.detection.utils.converters.mask_to_rle) moved to `supervision.detection.utils.converters`. The old import path `supervision.dataset.utils` continues to work but is deprecated. + ### 0.27.0 Nov 16, 2025 - Added [#2008](https://github.com/roboflow/supervision/pull/2008): [`sv.filter_segments_by_distance`](https://supervision.roboflow.com/0.27.0/detection/utils/masks/#supervision.detection.utils.masks.filter_segments_by_distance) to keep the largest connected component and nearby components within an absolute or relative distance threshold. Useful for cleaning segmentation predictions from models such as SAM, SAM2, YOLO segmentation, and RF-DETR segmentation. diff --git a/docs/datasets/utils.md b/docs/datasets/utils.md deleted file mode 100644 index c35b52572f..0000000000 --- a/docs/datasets/utils.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -comments: true ---- - -# Datasets Utils - -
-

rle_to_mask

-
- -:::supervision.dataset.utils.rle_to_mask - -
-

mask_to_rle

-
- -:::supervision.dataset.utils.mask_to_rle diff --git a/docs/detection/utils/converters.md b/docs/detection/utils/converters.md index b6b1e2af6c..c4c6750e83 100644 --- a/docs/detection/utils/converters.md +++ b/docs/detection/utils/converters.md @@ -64,3 +64,15 @@ status: new :::supervision.detection.utils.converters.xyxy_to_mask + +
+

rle_to_mask

+
+ +:::supervision.detection.utils.converters.rle_to_mask + +
+

mask_to_rle

+
+ +:::supervision.detection.utils.converters.mask_to_rle diff --git a/mkdocs.yml b/mkdocs.yml index 5223928d70..925c378ab8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -63,7 +63,6 @@ nav: - Trackers: trackers.md - Datasets: - Core: datasets/core.md - - Utils: datasets/utils.md - Metrics: - mAP: metrics/mean_average_precision.md - mAR: metrics/mean_average_recall.md diff --git a/src/supervision/__init__.py b/src/supervision/__init__.py index 1bda28164d..3904dd3cde 100644 --- a/src/supervision/__init__.py +++ b/src/supervision/__init__.py @@ -44,7 +44,6 @@ DetectionDataset, ) from supervision.dataset.formats.coco import get_coco_class_index_mapping -from supervision.dataset.utils import mask_to_rle, rle_to_mask from supervision.detection.core import Detections from supervision.detection.line_zone import ( LineZone, @@ -65,9 +64,11 @@ ) from supervision.detection.utils.converters import ( mask_to_polygons, + mask_to_rle, mask_to_xyxy, polygon_to_mask, polygon_to_xyxy, + rle_to_mask, xcycwh_to_xyxy, xywh_to_xyxy, xyxy_to_mask, diff --git a/src/supervision/dataset/formats/coco.py b/src/supervision/dataset/formats/coco.py index b63979846d..b7e7654f0e 100644 --- a/src/supervision/dataset/formats/coco.py +++ b/src/supervision/dataset/formats/coco.py @@ -10,11 +10,13 @@ from supervision.dataset.utils import ( approximate_mask_with_polygons, map_detections_class_id, +) +from supervision.detection.core import Detections +from supervision.detection.utils.converters import ( mask_to_rle, + polygon_to_mask, rle_to_mask, ) -from supervision.detection.core import Detections -from supervision.detection.utils.converters import polygon_to_mask from supervision.detection.utils.masks import contains_holes, contains_multiple_segments from supervision.utils.file import read_json_file, save_json_file diff --git a/src/supervision/dataset/utils.py b/src/supervision/dataset/utils.py index f52111a3e4..3858a45ee2 100644 --- a/src/supervision/dataset/utils.py +++ b/src/supervision/dataset/utils.py @@ -13,10 +13,28 @@ from supervision.detection.core import Detections from supervision.detection.utils.converters import mask_to_polygons +from supervision.detection.utils.converters import ( + mask_to_rle as _mask_to_rle, +) +from supervision.detection.utils.converters import ( + rle_to_mask as _rle_to_mask, +) from supervision.detection.utils.polygons import ( approximate_polygon, filter_polygons_by_area, ) +from supervision.utils.internal import deprecated + + +@deprecated("Import mask_to_rle from supervision.detection.utils.converters instead.") +def mask_to_rle(*args, **kwargs): # type: ignore[no-untyped-def] + return _mask_to_rle(*args, **kwargs) + + +@deprecated("Import rle_to_mask from supervision.detection.utils.converters instead.") +def rle_to_mask(*args, **kwargs): # type: ignore[no-untyped-def] + return _rle_to_mask(*args, **kwargs) + if TYPE_CHECKING: from supervision.dataset.core import DetectionDataset @@ -137,131 +155,3 @@ def train_test_split( split_index = int(len(data) * train_ratio) return data[:split_index], data[split_index:] - - -def rle_to_mask( - rle: npt.NDArray[np.int_] | list[int], resolution_wh: tuple[int, int] -) -> npt.NDArray[np.bool_]: - """ - Converts run-length encoding (RLE) to a binary mask. - - Args: - rle: The 1D RLE array, the format - used in the COCO dataset (column-wise encoding, values of an array with - even indices represent the number of pixels assigned as background, - values of an array with odd indices represent the number of pixels - assigned as foreground object). - resolution_wh: The width (w) and height (h) - of the desired binary mask. - - Returns: - The generated 2D Boolean mask of shape `(h, w)`, where the foreground object is - marked with `True`'s and the rest is filled with `False`'s. - - Raises: - AssertionError: If the sum of pixels encoded in RLE differs from the - number of pixels in the expected mask (computed based on resolution_wh). - - Examples: - ```pycon - >>> import numpy as np - >>> import supervision as sv - >>> mask = sv.rle_to_mask([5, 2, 2, 2, 5], (4, 4)) - >>> mask # doctest: +NORMALIZE_WHITESPACE - array([[0, 0, 0, 0], - [0, 1, 1, 0], - [0, 1, 1, 0], - [0, 0, 0, 0]], dtype=uint8) - - ``` - """ - if isinstance(rle, list): - rle = np.array(rle, dtype=int) - - width, height = resolution_wh - - assert width * height == np.sum(rle), ( - "the sum of the number of pixels in the RLE must be the same " - "as the number of pixels in the expected mask" - ) - - zero_one_values = np.zeros(shape=(rle.size, 1), dtype=np.uint8) - zero_one_values[1::2] = 1 - - decoded_rle = np.repeat(zero_one_values, rle, axis=0) - decoded_rle = np.append( - decoded_rle, np.zeros(width * height - len(decoded_rle), dtype=np.uint8) - ) - return decoded_rle.reshape((height, width), order="F") - - -def mask_to_rle(mask: npt.NDArray[np.bool_]) -> list[int]: - """ - Converts a binary mask into a run-length encoding (RLE). - - Args: - mask: 2D binary mask where `True` indicates foreground - object and `False` indicates background. - - Returns: - The run-length encoded mask. Values of a list with even indices - represent the number of pixels assigned as background (`False`), values - of a list with odd indices represent the number of pixels assigned - as foreground object (`True`). - - Raises: - AssertionError: If input mask is not 2D or is empty. - - Examples: - ```pycon - >>> import numpy as np - >>> import supervision as sv - >>> mask = np.array([ - ... [True, True, True, True], - ... [True, True, True, True], - ... [True, True, True, True], - ... [True, True, True, True], - ... ]) - >>> rle = sv.mask_to_rle(mask) - >>> [int(x) for x in rle] - [0, 16] - - ``` - - ```pycon - >>> import numpy as np - >>> import supervision as sv - >>> mask = np.array([ - ... [False, False, False, False], - ... [False, True, True, False], - ... [False, True, True, False], - ... [False, False, False, False], - ... ]) - >>> rle = sv.mask_to_rle(mask) - >>> [int(x) for x in rle] - [5, 2, 2, 2, 5] - - ``` - - ![mask_to_rle](https://media.roboflow.com/supervision-docs/ - mask-to-rle.png){ align=center width="800" } - """ - assert mask.ndim == 2, "Input mask must be 2D" - assert mask.size != 0, "Input mask cannot be empty" - - on_value_change_indices = np.where( - mask.ravel(order="F") != np.roll(mask.ravel(order="F"), 1) - )[0] - - on_value_change_indices = np.append(on_value_change_indices, mask.size) - # need to add 0 at the beginning when the same value is in the first and - # last element of the flattened mask - if on_value_change_indices[0] != 0: - on_value_change_indices = np.insert(on_value_change_indices, 0, 0) - - rle = np.diff(on_value_change_indices) - - if mask[0][0] == 1: - rle = np.insert(rle, 0, 0) - - return list(rle) diff --git a/src/supervision/detection/utils/converters.py b/src/supervision/detection/utils/converters.py index 604ada29c7..0bfcaae2e9 100644 --- a/src/supervision/detection/utils/converters.py +++ b/src/supervision/detection/utils/converters.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from typing import Literal, overload + import cv2 import numpy as np import numpy.typing as npt @@ -304,6 +308,236 @@ def mask_to_polygons(mask: npt.NDArray[np.bool_]) -> list[npt.NDArray[np.int32]] ] +def _decode_coco_rle_string(s: str) -> list[int]: + """Decode a COCO compressed RLE counts string to a list of run-length integers. + + Implements the decoding algorithm from the COCO API (pycocotools) for + compressed RLE strings. Each character encodes 5 data bits in a base-48 + scheme with continuation and sign flags, using delta encoding for indices + beyond the first two. + + Args: + s: The compressed RLE counts string. + + Returns: + A list of run-length integers (alternating background/foreground counts). + """ + counts: list[int] = [] + i = 0 + while i < len(s): + x = 0 + k = 0 + more = True + while more: + if i >= len(s): + raise ValueError( + f"Malformed compressed RLE string: unexpected end at position {i}" + ) + c = ord(s[i]) - 48 + x |= (c & 0x1F) << (5 * k) + more = bool(c & 0x20) + i += 1 + k += 1 + if not more and (c & 0x10): + x |= ~0 << (5 * k) + if len(counts) > 2: + x += counts[-2] + counts.append(x) + return counts + + +def _encode_coco_rle_string(counts: list[int]) -> str: + """Encode a list of run-length integers to a COCO compressed RLE string. + + Implements the encoding algorithm from the COCO API (pycocotools). + The inverse of :func:`_decode_coco_rle_string`. + + Args: + counts: A list of run-length integers (alternating background/foreground + counts). + + Returns: + The compressed RLE counts string. + """ + chars: list[str] = [] + for i, cnt in enumerate(counts): + x = cnt - counts[i - 2] if i > 2 else cnt + more = True + while more: + c = x & 0x1F + x >>= 5 + more = (x != -1) if (c & 0x10) else (x != 0) + if more: + c |= 0x20 + chars.append(chr(c + 48)) + return "".join(chars) + + +def rle_to_mask( + rle: npt.NDArray[np.int_] | list[int] | str | bytes, + resolution_wh: tuple[int, int], +) -> npt.NDArray[np.bool_]: + """ + Converts run-length encoding (RLE) to a binary mask. + + Args: + rle: The RLE data in one of the following formats: + - A 1D array or list of integers (uncompressed COCO RLE, where + values at even indices represent background pixel counts and + values at odd indices represent foreground pixel counts). + - A compressed COCO RLE string or bytes, as produced by + ``pycocotools.mask.encode``. + resolution_wh: The width (w) and height (h) + of the desired binary mask. + + Returns: + The generated 2D Boolean mask of shape `(h, w)`, where the foreground object is + marked with `True`'s and the rest is filled with `False`'s. + + Raises: + AssertionError: If the sum of pixels encoded in RLE differs from the + number of pixels in the expected mask (computed based on resolution_wh). + + Examples: + ```pycon + >>> import numpy as np + >>> import supervision as sv + >>> mask = sv.rle_to_mask([5, 2, 2, 2, 5], (4, 4)) + >>> mask # doctest: +NORMALIZE_WHITESPACE + array([[False, False, False, False], + [False, True, True, False], + [False, True, True, False], + [False, False, False, False]]) + + >>> mask = sv.rle_to_mask("52203", (4, 4)) + >>> mask # doctest: +NORMALIZE_WHITESPACE + array([[False, False, False, False], + [False, True, True, False], + [False, True, True, False], + [False, False, False, False]]) + + ``` + """ + if isinstance(rle, bytes): + rle = rle.decode("utf-8") + if isinstance(rle, str): + rle = np.array(_decode_coco_rle_string(rle), dtype=int) + elif isinstance(rle, list): + rle = np.array(rle, dtype=int) + + width, height = resolution_wh + + assert width * height == np.sum(rle), ( + "the sum of the number of pixels in the RLE must be the same " + "as the number of pixels in the expected mask" + ) + + zero_one_values = np.zeros(shape=(rle.size, 1), dtype=np.uint8) + zero_one_values[1::2] = 1 + + decoded_rle = np.repeat(zero_one_values, rle, axis=0) + decoded_rle = np.append( + decoded_rle, np.zeros(width * height - len(decoded_rle), dtype=np.uint8) + ) + return decoded_rle.reshape((height, width), order="F").astype(bool) + + +@overload +def mask_to_rle( + mask: npt.NDArray[np.bool_], compressed: Literal[False] = ... +) -> list[int]: ... + + +@overload +def mask_to_rle( + mask: npt.NDArray[np.bool_], compressed: Literal[True] = ... +) -> str: ... + + +def mask_to_rle( + mask: npt.NDArray[np.bool_], compressed: bool = False +) -> list[int] | str: + """ + Converts a binary mask into a run-length encoding (RLE). + + Args: + mask: 2D binary mask where `True` indicates foreground + object and `False` indicates background. + compressed: If ``True``, return a compressed COCO RLE string + compatible with ``pycocotools``. If ``False`` (default), + return a list of integers. + + Returns: + The run-length encoded mask. When ``compressed`` is ``False``, + values of a list with even indices represent the number of pixels + assigned as background (`False`), values of a list with odd indices + represent the number of pixels assigned as foreground object (`True`). + When ``compressed`` is ``True``, a COCO compressed RLE string. + + Raises: + AssertionError: If input mask is not 2D or is empty. + + Examples: + ```pycon + >>> import numpy as np + >>> import supervision as sv + >>> mask = np.array([ + ... [True, True, True, True], + ... [True, True, True, True], + ... [True, True, True, True], + ... [True, True, True, True], + ... ]) + >>> rle = sv.mask_to_rle(mask) + >>> [int(x) for x in rle] + [0, 16] + + ``` + + ```pycon + >>> import numpy as np + >>> import supervision as sv + >>> mask = np.array([ + ... [False, False, False, False], + ... [False, True, True, False], + ... [False, True, True, False], + ... [False, False, False, False], + ... ]) + >>> rle = sv.mask_to_rle(mask) + >>> [int(x) for x in rle] + [5, 2, 2, 2, 5] + + >>> sv.mask_to_rle(mask, compressed=True) + '52203' + + ``` + + ![mask_to_rle](https://media.roboflow.com/supervision-docs/ + mask-to-rle.png){ align=center width="800" } + """ + assert mask.ndim == 2, "Input mask must be 2D" + assert mask.size != 0, "Input mask cannot be empty" + + on_value_change_indices = np.where( + mask.ravel(order="F") != np.roll(mask.ravel(order="F"), 1) + )[0] + + on_value_change_indices = np.append(on_value_change_indices, mask.size) + # need to add 0 at the beginning when the same value is in the first and + # last element of the flattened mask + if on_value_change_indices[0] != 0: + on_value_change_indices = np.insert(on_value_change_indices, 0, 0) + + rle = np.diff(on_value_change_indices) + + if mask[0][0] == 1: + rle = np.insert(rle, 0, 0) + + counts = list(rle) + if compressed: + return _encode_coco_rle_string(counts) + return counts + + def polygon_to_xyxy(polygon: npt.NDArray[np.number]) -> npt.NDArray[np.number]: """ Converts a polygon represented by a NumPy array into a bounding box. diff --git a/src/supervision/detection/utils/internal.py b/src/supervision/detection/utils/internal.py index 937171209f..bf63cca3ce 100644 --- a/src/supervision/detection/utils/internal.py +++ b/src/supervision/detection/utils/internal.py @@ -8,7 +8,7 @@ import numpy.typing as npt from supervision.config import CLASS_NAME_DATA_FIELD -from supervision.detection.utils.converters import polygon_to_mask +from supervision.detection.utils.converters import polygon_to_mask, rle_to_mask from supervision.geometry.core import Vector @@ -72,7 +72,7 @@ def process_roboflow_result( confidence: list[float] = [] class_id: list[int] = [] class_name: list[str] = [] - masks: list[npt.NDArray[np.uint8]] = [] + masks: list[npt.NDArray[np.bool_]] = [] tracker_ids: list[int] = [] image_width = int(roboflow_result["image"]["width"]) @@ -88,7 +88,27 @@ def process_roboflow_result( x_max = x_min + width y_max = y_min + height - if "points" not in prediction: + rle_data = prediction.get("rle") or prediction.get("rle_mask") + if not isinstance(rle_data, dict) or not { + "size", + "counts", + }.issubset(rle_data): + rle_data = None + if rle_data is not None: + try: + h, w = rle_data["size"] + mask = rle_to_mask(rle_data["counts"], (w, h)) + except (ValueError, AssertionError, KeyError, TypeError): + rle_data = None + if rle_data is not None: + xyxy.append([x_min, y_min, x_max, y_max]) + class_id.append(prediction["class_id"]) + class_name.append(prediction["class"]) + confidence.append(prediction["confidence"]) + masks.append(mask) + if "tracker_id" in prediction: + tracker_ids.append(prediction["tracker_id"]) + elif "points" not in prediction: xyxy.append([x_min, y_min, x_max, y_max]) class_id.append(prediction["class_id"]) class_name.append(prediction["class"]) diff --git a/tests/dataset/test_utils.py b/tests/dataset/test_utils.py index b1e4980be1..febed32678 100644 --- a/tests/dataset/test_utils.py +++ b/tests/dataset/test_utils.py @@ -3,17 +3,13 @@ from contextlib import ExitStack as DoesNotRaise from typing import TypeVar -import numpy as np -import numpy.typing as npt import pytest from supervision import Detections from supervision.dataset.utils import ( build_class_index_mapping, map_detections_class_id, - mask_to_rle, merge_class_lists, - rle_to_mask, train_test_split, ) from tests.helpers import _create_detections @@ -235,133 +231,3 @@ def test_map_detections_class_id( source_to_target_mapping=source_to_target_mapping, detections=detections ) assert result == expected_result - - -@pytest.mark.parametrize( - ("mask", "expected_rle", "exception"), - [ - ( - np.zeros((3, 3)).astype(bool), - [9], - DoesNotRaise(), - ), # mask with background only (mask with only False values) - ( - np.ones((3, 3)).astype(bool), - [0, 9], - DoesNotRaise(), - ), # mask with foreground only (mask with only True values) - ( - np.array( - [ - [0, 0, 0, 0, 0], - [0, 1, 1, 1, 0], - [0, 1, 0, 1, 0], - [0, 1, 1, 1, 0], - [0, 0, 0, 0, 0], - ] - ).astype(bool), - [6, 3, 2, 1, 1, 1, 2, 3, 6], - DoesNotRaise(), - ), # mask where foreground object has hole - ( - np.array( - [ - [1, 0, 1, 0, 1], - [1, 0, 1, 0, 1], - [1, 0, 1, 0, 1], - [1, 0, 1, 0, 1], - [1, 0, 1, 0, 1], - ] - ).astype(bool), - [0, 5, 5, 5, 5, 5], - DoesNotRaise(), - ), # mask where foreground consists of 3 separate components - ( - np.array([[[]]]).astype(bool), - None, - pytest.raises(AssertionError, match="Input mask must be 2D"), - ), # raises AssertionError because mask dimensionality is not 2D - ( - np.array([[]]).astype(bool), - None, - pytest.raises(AssertionError, match="Input mask cannot be empty"), - ), # raises AssertionError because mask is empty - ], -) -def test_mask_to_rle( - mask: npt.NDArray[np.bool_], expected_rle: list[int], exception: Exception -) -> None: - with exception: - result = mask_to_rle(mask=mask) - assert result == expected_rle - - -@pytest.mark.parametrize( - ("rle", "resolution_wh", "expected_mask", "exception"), - [ - ( - np.array([9]), - [3, 3], - np.zeros((3, 3)).astype(bool), - DoesNotRaise(), - ), # mask with background only (mask with only False values); rle as array - ( - [9], - [3, 3], - np.zeros((3, 3)).astype(bool), - DoesNotRaise(), - ), # mask with background only (mask with only False values); rle as list - ( - np.array([0, 9]), - [3, 3], - np.ones((3, 3)).astype(bool), - DoesNotRaise(), - ), # mask with foreground only (mask with only True values) - ( - np.array([6, 3, 2, 1, 1, 1, 2, 3, 6]), - [5, 5], - np.array( - [ - [0, 0, 0, 0, 0], - [0, 1, 1, 1, 0], - [0, 1, 0, 1, 0], - [0, 1, 1, 1, 0], - [0, 0, 0, 0, 0], - ] - ).astype(bool), - DoesNotRaise(), - ), # mask where foreground object has hole - ( - np.array([0, 5, 5, 5, 5, 5]), - [5, 5], - np.array( - [ - [1, 0, 1, 0, 1], - [1, 0, 1, 0, 1], - [1, 0, 1, 0, 1], - [1, 0, 1, 0, 1], - [1, 0, 1, 0, 1], - ] - ).astype(bool), - DoesNotRaise(), - ), # mask where foreground consists of 3 separate components - ( - np.array([0, 5, 5, 5, 5, 5]), - [2, 2], - None, - pytest.raises( - AssertionError, match="sum of the number of pixels in the RLE" - ), - ), # raises AssertionError because number of pixels in RLE does not match - # number of pixels in expected mask (width x height). - ], -) -def test_rle_to_mask( - rle: npt.NDArray[np.int_], - resolution_wh: tuple[int, int], - expected_mask: npt.NDArray[np.bool_], - exception: Exception, -) -> None: - with exception: - result = rle_to_mask(rle=rle, resolution_wh=resolution_wh) - assert np.all(result == expected_mask) diff --git a/tests/detection/utils/test_converters.py b/tests/detection/utils/test_converters.py index 5372343eb4..4e39448aac 100644 --- a/tests/detection/utils/test_converters.py +++ b/tests/detection/utils/test_converters.py @@ -1,9 +1,16 @@ from __future__ import annotations +from contextlib import ExitStack as DoesNotRaise + import numpy as np +import numpy.typing as npt import pytest from supervision.detection.utils.converters import ( + _decode_coco_rle_string, + _encode_coco_rle_string, + mask_to_rle, + rle_to_mask, xcycwh_to_xyxy, xywh_to_xyxy, xyxy_to_mask, @@ -301,3 +308,224 @@ def test_xyxy_to_mask(boxes: np.ndarray, resolution_wh, expected: np.ndarray) -> assert result.dtype == np.bool_ assert result.shape == expected.shape np.testing.assert_array_equal(result, expected) + + +@pytest.mark.parametrize( + "counts", + [ + [5, 2, 2, 2, 5], + [0, 16], + [9], + [0, 5, 5, 5, 5, 5], + [6, 3, 2, 1, 1, 1, 2, 3, 6], + [3, 1, 2, 4, 2, 1, 3], + ], +) +def test_coco_rle_encode_decode_round_trip(counts: list[int]) -> None: + encoded = _encode_coco_rle_string(counts) + decoded = _decode_coco_rle_string(encoded) + assert decoded == counts + + +@pytest.mark.parametrize( + ("mask", "compressed", "expected_rle", "exception"), + [ + ( + np.zeros((3, 3)).astype(bool), + False, + [9], + DoesNotRaise(), + ), # mask with background only (mask with only False values) + ( + np.ones((3, 3)).astype(bool), + False, + [0, 9], + DoesNotRaise(), + ), # mask with foreground only (mask with only True values) + ( + np.array( + [ + [0, 0, 0, 0, 0], + [0, 1, 1, 1, 0], + [0, 1, 0, 1, 0], + [0, 1, 1, 1, 0], + [0, 0, 0, 0, 0], + ] + ).astype(bool), + False, + [6, 3, 2, 1, 1, 1, 2, 3, 6], + DoesNotRaise(), + ), # mask where foreground object has hole + ( + np.array( + [ + [1, 0, 1, 0, 1], + [1, 0, 1, 0, 1], + [1, 0, 1, 0, 1], + [1, 0, 1, 0, 1], + [1, 0, 1, 0, 1], + ] + ).astype(bool), + False, + [0, 5, 5, 5, 5, 5], + DoesNotRaise(), + ), # mask where foreground consists of 3 separate components + ( + np.array( + [ + [False, False, False, False], + [False, True, True, False], + [False, True, True, False], + [False, False, False, False], + ] + ), + True, + "52203", + DoesNotRaise(), + ), # compressed RLE string + ( + np.array([[[]]]).astype(bool), + False, + None, + pytest.raises(AssertionError, match="Input mask must be 2D"), + ), # raises AssertionError because mask dimensionality is not 2D + ( + np.array([[]]).astype(bool), + False, + None, + pytest.raises(AssertionError, match="Input mask cannot be empty"), + ), # raises AssertionError because mask is empty + ], +) +def test_mask_to_rle( + mask: npt.NDArray[np.bool_], + compressed: bool, + expected_rle: list[int] | str | None, + exception: Exception, +) -> None: + with exception: + result = mask_to_rle(mask=mask, compressed=compressed) + assert result == expected_rle + + +@pytest.mark.parametrize( + ("rle", "resolution_wh", "expected_mask", "exception"), + [ + ( + np.array([9]), + [3, 3], + np.zeros((3, 3)).astype(bool), + DoesNotRaise(), + ), # mask with background only (mask with only False values); rle as array + ( + [9], + [3, 3], + np.zeros((3, 3)).astype(bool), + DoesNotRaise(), + ), # mask with background only (mask with only False values); rle as list + ( + np.array([0, 9]), + [3, 3], + np.ones((3, 3)).astype(bool), + DoesNotRaise(), + ), # mask with foreground only (mask with only True values) + ( + np.array([6, 3, 2, 1, 1, 1, 2, 3, 6]), + [5, 5], + np.array( + [ + [0, 0, 0, 0, 0], + [0, 1, 1, 1, 0], + [0, 1, 0, 1, 0], + [0, 1, 1, 1, 0], + [0, 0, 0, 0, 0], + ] + ).astype(bool), + DoesNotRaise(), + ), # mask where foreground object has hole + ( + np.array([0, 5, 5, 5, 5, 5]), + [5, 5], + np.array( + [ + [1, 0, 1, 0, 1], + [1, 0, 1, 0, 1], + [1, 0, 1, 0, 1], + [1, 0, 1, 0, 1], + [1, 0, 1, 0, 1], + ] + ).astype(bool), + DoesNotRaise(), + ), # mask where foreground consists of 3 separate components + ( + np.array([0, 5, 5, 5, 5, 5]), + [2, 2], + None, + pytest.raises( + AssertionError, match="sum of the number of pixels in the RLE" + ), + ), # raises AssertionError because number of pixels in RLE does not match + # number of pixels in expected mask (width x height). + ( + b"3124OM1", + [4, 4], + np.array( + [ + [0, 0, 1, 1], + [0, 0, 1, 1], + [0, 1, 1, 0], + [1, 1, 0, 0], + ] + ).astype(bool), + DoesNotRaise(), + ), # compressed RLE bytes + ( + "52203", + [4, 4], + np.array( + [ + [0, 0, 0, 0], + [0, 1, 1, 0], + [0, 1, 1, 0], + [0, 0, 0, 0], + ] + ).astype(bool), + DoesNotRaise(), + ), # compressed RLE string + ( + "!", + [4, 4], + None, + pytest.raises(ValueError, match="Malformed compressed RLE string"), + ), # malformed compressed RLE string with invalid character + ( + "52P", + [4, 4], + None, + pytest.raises(ValueError, match="Malformed compressed RLE string"), + ), # malformed compressed RLE: unterminated continuation byte + ], +) +def test_rle_to_mask( + rle: npt.NDArray[np.int_], + resolution_wh: tuple[int, int], + expected_mask: npt.NDArray[np.bool_], + exception: Exception, +) -> None: + with exception: + result = rle_to_mask(rle=rle, resolution_wh=resolution_wh) + assert np.all(result == expected_mask) + + +def test_mask_rle_compressed_round_trip() -> None: + mask = np.array( + [ + [False, False, False, False], + [False, True, True, False], + [False, True, True, False], + [False, False, False, False], + ] + ) + compressed = mask_to_rle(mask, compressed=True) + recovered = rle_to_mask(compressed, (4, 4)) + np.testing.assert_array_equal(mask, recovered) diff --git a/tests/detection/utils/test_internal.py b/tests/detection/utils/test_internal.py index c919182f5c..ccc9531dfa 100644 --- a/tests/detection/utils/test_internal.py +++ b/tests/detection/utils/test_internal.py @@ -17,6 +17,13 @@ TEST_MASK = np.zeros((1, 1000, 1000), dtype=bool) TEST_MASK[:, 300:351, 200:251] = True +TEST_RLE_MASK = np.zeros((1, 4, 4), dtype=bool) +TEST_RLE_MASK[0, 1:3, 1:3] = True + +TEST_RLE_NONCONTIGUOUS_MASK = np.zeros((1, 4, 4), dtype=bool) +TEST_RLE_NONCONTIGUOUS_MASK[0, 0:2, 0:2] = True +TEST_RLE_NONCONTIGUOUS_MASK[0, 3, 2:4] = True + @pytest.mark.parametrize( ("roboflow_result", "expected_result", "exception"), @@ -219,6 +226,189 @@ ), DoesNotRaise(), ), # two instance segmentation results - one correct, one incorrect + ( + { + "predictions": [ + { + "x": 1.5, + "y": 1.5, + "width": 2.0, + "height": 2.0, + "confidence": 0.9, + "class_id": 0, + "class": "person", + "rle": {"size": [4, 4], "counts": "52203"}, + } + ], + "image": {"width": 4, "height": 4}, + }, + ( + np.array([[0.5, 0.5, 2.5, 2.5]]), + np.array([0.9]), + np.array([0]), + TEST_RLE_MASK, + None, + {CLASS_NAME_DATA_FIELD: np.array(["person"])}, + ), + DoesNotRaise(), + ), # single RLE prediction with compressed string counts + ( + { + "predictions": [ + { + "x": 2.0, + "y": 2.0, + "width": 4.0, + "height": 4.0, + "confidence": 0.85, + "class_id": 1, + "class": "cat", + "rle": {"size": [4, 4], "counts": "02203ON0"}, + } + ], + "image": {"width": 4, "height": 4}, + }, + ( + np.array([[0.0, 0.0, 4.0, 4.0]]), + np.array([0.85]), + np.array([1]), + TEST_RLE_NONCONTIGUOUS_MASK, + None, + {CLASS_NAME_DATA_FIELD: np.array(["cat"])}, + ), + DoesNotRaise(), + ), # single RLE prediction with non-contiguous mask + ( + { + "predictions": [ + { + "x": 1.5, + "y": 1.5, + "width": 2.0, + "height": 2.0, + "confidence": 0.9, + "class_id": 0, + "class": "person", + "rle": {"size": [4, 4], "counts": "52203"}, + "tracker_id": 5, + } + ], + "image": {"width": 4, "height": 4}, + }, + ( + np.array([[0.5, 0.5, 2.5, 2.5]]), + np.array([0.9]), + np.array([0]), + TEST_RLE_MASK, + np.array([5]), + {CLASS_NAME_DATA_FIELD: np.array(["person"])}, + ), + DoesNotRaise(), + ), # RLE prediction with tracker_id + ( + { + "predictions": [ + { + "x": 1.5, + "y": 1.5, + "width": 2.0, + "height": 2.0, + "confidence": 0.9, + "class_id": 0, + "class": "person", + "rle_mask": {"size": [4, 4], "counts": "52203"}, + } + ], + "image": {"width": 4, "height": 4}, + }, + ( + np.array([[0.5, 0.5, 2.5, 2.5]]), + np.array([0.9]), + np.array([0]), + TEST_RLE_MASK, + None, + {CLASS_NAME_DATA_FIELD: np.array(["person"])}, + ), + DoesNotRaise(), + ), # single RLE prediction with compressed string counts under rle_mask key + ( + { + "predictions": [ + { + "x": 1.5, + "y": 1.5, + "width": 2.0, + "height": 2.0, + "confidence": 0.9, + "class_id": 0, + "class": "person", + "rle": "bad_string", + } + ], + "image": {"width": 4, "height": 4}, + }, + ( + np.array([[0.5, 0.5, 2.5, 2.5]]), + np.array([0.9]), + np.array([0]), + None, + None, + {CLASS_NAME_DATA_FIELD: np.array(["person"])}, + ), + DoesNotRaise(), + ), # malformed RLE payload should fall through to box-only detection + ( + { + "predictions": [ + { + "x": 1.5, + "y": 1.5, + "width": 2.0, + "height": 2.0, + "confidence": 0.9, + "class_id": 0, + "class": "person", + "rle": {"size": [4, 4]}, + } + ], + "image": {"width": 4, "height": 4}, + }, + ( + np.array([[0.5, 0.5, 2.5, 2.5]]), + np.array([0.9]), + np.array([0]), + None, + None, + {CLASS_NAME_DATA_FIELD: np.array(["person"])}, + ), + DoesNotRaise(), + ), # RLE dict missing counts falls through to box-only detection + ( + { + "predictions": [ + { + "x": 1.5, + "y": 1.5, + "width": 2.0, + "height": 2.0, + "confidence": 0.9, + "class_id": 0, + "class": "person", + "rle": {"size": [4, 4], "counts": "!"}, + } + ], + "image": {"width": 4, "height": 4}, + }, + ( + np.array([[0.5, 0.5, 2.5, 2.5]]), + np.array([0.9]), + np.array([0]), + None, + None, + {CLASS_NAME_DATA_FIELD: np.array(["person"])}, + ), + DoesNotRaise(), + ), # malformed compressed counts falls through to box-only detection ], ) def test_process_roboflow_result(