From 7e57e2c0a9284e0b48003b4db7efc6a73539be8b Mon Sep 17 00:00:00 2001 From: YousefZahran1 Date: Thu, 30 Apr 2026 23:45:57 +0300 Subject: [PATCH 1/2] fix: return empty int array instead of None for class_id on empty VLM parse When from_paligemma or from_google_gemini_2_0 find no detections (no regex matches, JSON decode error, or empty bounding-box list), they previously returned None for class_id. All other early-exit and filter paths already return a zero-length ndarray of dtype int. This inconsistency causes downstream AttributeError when callers unconditionally call .shape or iterate over the result. Affected paths: - from_paligemma: matches.shape[0] == 0 branch - from_google_gemini_2_0: JSONDecodeError branch and len(xyxy) == 0 branch Closes #2219 --- src/supervision/detection/vlm.py | 6 +++--- tests/detection/test_vlm.py | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/supervision/detection/vlm.py b/src/supervision/detection/vlm.py index 1aa0fb5182..e287dcc1c7 100644 --- a/src/supervision/detection/vlm.py +++ b/src/supervision/detection/vlm.py @@ -229,7 +229,7 @@ def from_paligemma( matches = np.array(matches) if matches else np.empty((0, 5)) if matches.shape[0] == 0: - return np.empty((0, 4)), None, np.empty(0, dtype=str) + return np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0, dtype=str) xyxy, class_name = matches[:, [1, 0, 3, 2]], matches[:, 4] xyxy = xyxy.astype(int) / 1024 * np.array([w, h, w, h]) @@ -626,7 +626,7 @@ def from_google_gemini_2_0( try: data = json.loads(result) except json.JSONDecodeError: - return np.empty((0, 4)), None, np.empty((0,), dtype=str) + return np.empty((0, 4)), np.empty((0,), dtype=int), np.empty((0,), dtype=str) labels = [] xyxy = [] @@ -640,7 +640,7 @@ def from_google_gemini_2_0( xyxy.append([box[1], box[0], box[3], box[2]]) if len(xyxy) == 0: - return np.empty((0, 4)), None, np.empty((0,), dtype=str) + return np.empty((0, 4)), np.empty((0,), dtype=int), np.empty((0,), dtype=str) xyxy = denormalize_boxes( np.array(xyxy, dtype=np.float64), diff --git a/tests/detection/test_vlm.py b/tests/detection/test_vlm.py index c21679b820..b5d035b065 100644 --- a/tests/detection/test_vlm.py +++ b/tests/detection/test_vlm.py @@ -27,49 +27,49 @@ "", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0).astype(str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0).astype(str)), ), # empty text ( does_not_raise(), "", (1000, 1000), ["cat", "dog"], - (np.empty((0, 4)), None, np.empty(0).astype(str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0).astype(str)), ), # empty text, classes ( does_not_raise(), "\n", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0).astype(str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0).astype(str)), ), # newline only ( does_not_raise(), "the quick brown fox jumps over the lazy dog.", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0).astype(str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0).astype(str)), ), # random text, no location ( does_not_raise(), " cat", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0).astype(str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0).astype(str)), ), # partial location ( does_not_raise(), " cat", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0).astype(str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0).astype(str)), ), # extra loc ( does_not_raise(), "", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0).astype(str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0).astype(str)), ), # no class ( does_not_raise(), @@ -436,21 +436,21 @@ def test_from_qwen_2_5_vl( "random text", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0, dtype=str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0, dtype=str)), ), # random text without JSON format ( does_not_raise(), "```json\ninvalid json\n```", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0, dtype=str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0, dtype=str)), ), # invalid JSON within code blocks ( does_not_raise(), "```json\n[]\n```", (1000, 1000), None, - (np.empty((0, 4)), None, np.empty(0, dtype=str)), + (np.empty((0, 4)), np.empty((0,), dtype=int), np.empty(0, dtype=str)), ), # empty JSON array ( does_not_raise(), From 8bb5f1de04fae1a50421a75a798eac520db6ffa9 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Tue, 19 May 2026 14:26:31 +0200 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> --- src/supervision/detection/vlm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/supervision/detection/vlm.py b/src/supervision/detection/vlm.py index e287dcc1c7..de53548d04 100644 --- a/src/supervision/detection/vlm.py +++ b/src/supervision/detection/vlm.py @@ -202,7 +202,7 @@ def validate_vlm_parameters(vlm: VLM | str, result: Any, kwargs: dict[str, Any]) def from_paligemma( result: str, resolution_wh: tuple[int, int], classes: list[str] | None = None -) -> tuple[npt.NDArray[Any], npt.NDArray[Any] | None, npt.NDArray[Any]]: +) -> tuple[npt.NDArray[Any], npt.NDArray[Any], npt.NDArray[Any]]: """ Parse bounding boxes from paligemma-formatted text, scale them to the specified resolution, and optionally filter by classes.