From c9201d0e151a9a6fa425e0cc62981edd5b491c06 Mon Sep 17 00:00:00 2001 From: Satish K C Date: Tue, 24 Mar 2026 16:16:00 -0500 Subject: [PATCH 1/4] feat: add show_progress option to dataset loaders and savers Add optional tqdm progress bars to all time-consuming dataset operations. Addresses #183. - load_coco_annotations / DetectionDataset.from_coco - load_pascal_voc_annotations / DetectionDataset.from_pascal_voc - load_yolo_annotations / DetectionDataset.from_yolo - save_dataset_images / DetectionDataset.as_coco / as_yolo / as_pascal_voc The show_progress parameter defaults to False for full backward compatibility. Uses tqdm.auto so progress bars work in both terminal and Jupyter notebook environments. --- src/supervision/dataset/core.py | 24 ++++++- src/supervision/dataset/formats/coco.py | 66 +++++++++++------ src/supervision/dataset/formats/pascal_voc.py | 59 ++++++++++----- src/supervision/dataset/formats/yolo.py | 71 +++++++++++++------ src/supervision/dataset/utils.py | 50 ++++++++++--- 5 files changed, 196 insertions(+), 74 deletions(-) diff --git a/src/supervision/dataset/core.py b/src/supervision/dataset/core.py index 44a98079a..25f9900ae 100644 --- a/src/supervision/dataset/core.py +++ b/src/supervision/dataset/core.py @@ -334,6 +334,7 @@ def as_pascal_voc( min_image_area_percentage: float = 0.0, max_image_area_percentage: float = 1.0, approximation_percentage: float = 0.0, + show_progress: bool = False, ) -> None: """ Exports the dataset to PASCAL VOC format. This method saves the images @@ -357,11 +358,13 @@ def as_pascal_voc( approximation_percentage: The percentage of polygon points to be removed from the input polygon, in the range [0, 1). Argument is used only for segmentation datasets. + show_progress: If `True`, display a progress bar while saving images. """ if images_directory_path: save_dataset_images( dataset=self, images_directory_path=images_directory_path, + show_progress=show_progress, ) if annotations_directory_path: Path(annotations_directory_path).mkdir(parents=True, exist_ok=True) @@ -390,6 +393,7 @@ def from_pascal_voc( images_directory_path: str, annotations_directory_path: str, force_masks: bool = False, + show_progress: bool = False, ) -> DetectionDataset: """ Creates a Dataset instance from PASCAL VOC formatted data. @@ -400,6 +404,7 @@ def from_pascal_voc( containing the PASCAL VOC XML annotations. force_masks: If True, forces masks to be loaded for all annotations, regardless of whether they are present. + show_progress: If `True`, display a progress bar while loading images. Returns: A DetectionDataset instance containing @@ -432,6 +437,7 @@ def from_pascal_voc( images_directory_path=images_directory_path, annotations_directory_path=annotations_directory_path, force_masks=force_masks, + show_progress=show_progress, ) return DetectionDataset( @@ -446,6 +452,7 @@ def from_yolo( data_yaml_path: str, force_masks: bool = False, is_obb: bool = False, + show_progress: bool = False, ) -> DetectionDataset: """ Creates a Dataset instance from YOLO formatted data. @@ -463,6 +470,7 @@ def from_yolo( is_obb: If True, loads the annotations in OBB format. OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`, where pairs of [x, y] are box corners. + show_progress: If `True`, display a progress bar while loading images. Returns: A DetectionDataset instance @@ -496,6 +504,7 @@ def from_yolo( data_yaml_path=data_yaml_path, force_masks=force_masks, is_obb=is_obb, + show_progress=show_progress, ) return DetectionDataset( classes=classes, images=image_paths, annotations=annotations @@ -509,6 +518,7 @@ def as_yolo( min_image_area_percentage: float = 0.0, max_image_area_percentage: float = 1.0, approximation_percentage: float = 0.0, + show_progress: bool = False, ) -> None: """ Exports the dataset to YOLO format. This method saves the @@ -537,10 +547,13 @@ def as_yolo( be removed from the input polygon, in the range [0, 1). This is useful for simplifying the annotations. Argument is used only for segmentation datasets. + show_progress: If `True`, display a progress bar while saving images. """ if images_directory_path is not None: save_dataset_images( - dataset=self, images_directory_path=images_directory_path + dataset=self, + images_directory_path=images_directory_path, + show_progress=show_progress, ) if annotations_directory_path is not None: save_yolo_annotations( @@ -559,6 +572,7 @@ def from_coco( images_directory_path: str, annotations_path: str, force_masks: bool = False, + show_progress: bool = False, ) -> DetectionDataset: """ Creates a Dataset instance from COCO formatted data. @@ -570,6 +584,7 @@ def from_coco( force_masks: If True, forces masks to be loaded for all annotations, regardless of whether they are present. + show_progress: If `True`, display a progress bar while loading images. Returns: A DetectionDataset instance containing the loaded images and annotations. @@ -599,6 +614,7 @@ def from_coco( images_directory_path=images_directory_path, annotations_path=annotations_path, force_masks=force_masks, + show_progress=show_progress, ) return DetectionDataset(classes=classes, images=images, annotations=annotations) @@ -609,6 +625,7 @@ def as_coco( min_image_area_percentage: float = 0.0, max_image_area_percentage: float = 1.0, approximation_percentage: float = 0.0, + show_progress: bool = False, ) -> None: """ Exports the dataset to COCO format. This method saves the @@ -645,10 +662,13 @@ def as_coco( to be removed from the input polygon, in the range [0, 1). This is useful for simplifying the annotations. Argument is used only for segmentation datasets. + show_progress: If `True`, display a progress bar while saving images. """ if images_directory_path is not None: save_dataset_images( - dataset=self, images_directory_path=images_directory_path + dataset=self, + images_directory_path=images_directory_path, + show_progress=show_progress, ) if annotations_path is not None: save_coco_annotations( diff --git a/src/supervision/dataset/formats/coco.py b/src/supervision/dataset/formats/coco.py index b63979846..c15f7f995 100644 --- a/src/supervision/dataset/formats/coco.py +++ b/src/supervision/dataset/formats/coco.py @@ -6,6 +6,7 @@ import numpy as np import numpy.typing as npt +from tqdm.auto import tqdm from supervision.dataset.utils import ( approximate_mask_with_polygons, @@ -254,6 +255,7 @@ def load_coco_annotations( annotations_path: str, force_masks: bool = False, use_iscrowd: bool = True, + show_progress: bool = False, ) -> tuple[list[str], list[str], dict[str, Detections]]: """ Load COCO annotations and convert them to `Detections`. @@ -267,9 +269,21 @@ def load_coco_annotations( annotations_path: Path to COCO JSON annotations. force_masks: If `True`, always attempt to load masks. use_iscrowd: If `True`, include `iscrowd` and `area` in detection data. + show_progress: If `True`, display a progress bar while loading images. Returns: A tuple of `(classes, image_paths, annotations)`. + + Examples: + ```python + import supervision as sv + + ds = sv.DetectionDataset.from_coco( + images_directory_path="images/train", + annotations_path="images/train/_annotations.coco.json", + show_progress=True, + ) + ``` """ coco_data = read_json_file(file_path=annotations_path) classes = coco_categories_to_classes(coco_categories=coco_data["categories"]) @@ -286,32 +300,38 @@ def load_coco_annotations( images = [] annotations = {} - for coco_image in coco_images: - image_name, image_width, image_height = ( - coco_image["file_name"], - coco_image["width"], - coco_image["height"], - ) - image_annotations = coco_annotations_groups.get(coco_image["id"], []) - image_path = os.path.join(images_directory_path, image_name) + with tqdm( + total=len(coco_images), + desc="Loading COCO annotations", + disable=not show_progress, + ) as progress_bar: + for coco_image in coco_images: + image_name, image_width, image_height = ( + coco_image["file_name"], + coco_image["width"], + coco_image["height"], + ) + image_annotations = coco_annotations_groups.get(coco_image["id"], []) + image_path = os.path.join(images_directory_path, image_name) - with_masks = force_masks or any( - _with_seg_mask(annotation) for annotation in image_annotations - ) - annotation = coco_annotations_to_detections( - image_annotations=image_annotations, - resolution_wh=(image_width, image_height), - with_masks=with_masks, - use_iscrowd=use_iscrowd, - ) + with_masks = force_masks or any( + _with_seg_mask(annotation) for annotation in image_annotations + ) + annotation = coco_annotations_to_detections( + image_annotations=image_annotations, + resolution_wh=(image_width, image_height), + with_masks=with_masks, + use_iscrowd=use_iscrowd, + ) - annotation = map_detections_class_id( - source_to_target_mapping=class_index_mapping, - detections=annotation, - ) + annotation = map_detections_class_id( + source_to_target_mapping=class_index_mapping, + detections=annotation, + ) - images.append(image_path) - annotations[image_path] = annotation + images.append(image_path) + annotations[image_path] = annotation + progress_bar.update(1) return classes, images, annotations diff --git a/src/supervision/dataset/formats/pascal_voc.py b/src/supervision/dataset/formats/pascal_voc.py index 91b6664cd..bed749444 100644 --- a/src/supervision/dataset/formats/pascal_voc.py +++ b/src/supervision/dataset/formats/pascal_voc.py @@ -9,6 +9,7 @@ import numpy.typing as npt from defusedxml.ElementTree import parse, tostring from defusedxml.minidom import parseString +from tqdm.auto import tqdm from supervision.dataset.utils import approximate_mask_with_polygons from supervision.detection.core import Detections @@ -149,6 +150,7 @@ def load_pascal_voc_annotations( images_directory_path: str, annotations_directory_path: str, force_masks: bool = False, + show_progress: bool = False, ) -> tuple[list[str], list[str], dict[str, Detections]]: """ Loads PASCAL VOC XML annotations and returns the image name, @@ -160,11 +162,23 @@ def load_pascal_voc_annotations( PASCAL VOC annotation files. force_masks: If True, forces masks to be loaded for all annotations, regardless of whether they are present. + show_progress: If `True`, display a progress bar while loading images. Returns: A tuple with a list of class names, a list of paths to images, and a dictionary with image paths as keys and corresponding Detections instances as values. + + Examples: + ```python + import supervision as sv + + ds = sv.DetectionDataset.from_pascal_voc( + images_directory_path="images/train", + annotations_directory_path="images/train/labels", + show_progress=True, + ) + ``` """ image_paths = [ @@ -177,24 +191,33 @@ def load_pascal_voc_annotations( classes: list[str] = [] annotations = {} - for image_path in image_paths: - image_stem = Path(image_path).stem - annotation_path = os.path.join(annotations_directory_path, f"{image_stem}.xml") - if not os.path.exists(annotation_path): - annotations[image_path] = Detections.empty() - continue - - tree = parse(annotation_path) - root = tree.getroot() - - image = cv2.imread(image_path) - if image is None: - raise ValueError(f"Could not read image from path: {image_path}") - resolution_wh = (image.shape[1], image.shape[0]) - annotation, classes = detections_from_xml_obj( - root, classes, resolution_wh, force_masks - ) - annotations[image_path] = annotation + with tqdm( + total=len(image_paths), + desc="Loading Pascal VOC annotations", + disable=not show_progress, + ) as progress_bar: + for image_path in image_paths: + image_stem = Path(image_path).stem + annotation_path = os.path.join( + annotations_directory_path, f"{image_stem}.xml" + ) + if not os.path.exists(annotation_path): + annotations[image_path] = Detections.empty() + progress_bar.update(1) + continue + + tree = parse(annotation_path) + root = tree.getroot() + + image = cv2.imread(image_path) + if image is None: + raise ValueError(f"Could not read image from path: {image_path}") + resolution_wh = (image.shape[1], image.shape[0]) + annotation, classes = detections_from_xml_obj( + root, classes, resolution_wh, force_masks + ) + annotations[image_path] = annotation + progress_bar.update(1) return classes, image_paths, annotations diff --git a/src/supervision/dataset/formats/yolo.py b/src/supervision/dataset/formats/yolo.py index b9bb02332..2126cc765 100644 --- a/src/supervision/dataset/formats/yolo.py +++ b/src/supervision/dataset/formats/yolo.py @@ -7,6 +7,7 @@ import numpy as np import numpy.typing as npt from PIL import Image +from tqdm.auto import tqdm from supervision.config import ORIENTED_BOX_COORDINATES from supervision.dataset.utils import approximate_mask_with_polygons @@ -141,6 +142,7 @@ def load_yolo_annotations( data_yaml_path: str, force_masks: bool = False, is_obb: bool = False, + show_progress: bool = False, ) -> tuple[list[str], list[str], dict[str, Detections]]: """ Loads YOLO annotations and returns class names, images, @@ -157,11 +159,24 @@ def load_yolo_annotations( is_obb: If True, loads the annotations in OBB format. OBB annotations are defined as `[class_id, x, y, x, y, x, y, x, y]`, where pairs of [x, y] are box corners. + show_progress: If `True`, display a progress bar while loading images. Returns: A tuple containing a list of class names, a dictionary with image names as keys and images as values, and a dictionary with image names as keys and corresponding Detections instances as values. + + Examples: + ```python + import supervision as sv + + ds = sv.DetectionDataset.from_yolo( + images_directory_path="images/train", + annotations_directory_path="labels/train", + data_yaml_path="data.yaml", + show_progress=True, + ) + ``` """ image_paths = [ str(path) @@ -184,32 +199,42 @@ def load_yolo_annotations( classes = _extract_class_names(file_path=data_yaml_path) annotations = {} - for image_path in image_paths: - image_stem = Path(image_path).stem - annotation_path = os.path.join(annotations_directory_path, f"{image_stem}.txt") - if not os.path.exists(annotation_path): - annotations[image_path] = Detections.empty() - continue - - # PIL is much faster than cv2 for checking image shape and mode: https://github.com/roboflow/supervision/issues/1554 - image = Image.open(image_path) - lines = read_txt_file(file_path=annotation_path, skip_empty=True) - w, h = image.size - resolution_wh = (w, h) - if image.mode not in ("RGB", "L"): - raise ValueError( - f"Images must be 'RGB' or 'grayscale', \ + with tqdm( + total=len(image_paths), + desc="Loading YOLO annotations", + disable=not show_progress, + ) as progress_bar: + for image_path in image_paths: + image_stem = Path(image_path).stem + annotation_path = os.path.join( + annotations_directory_path, f"{image_stem}.txt" + ) + if not os.path.exists(annotation_path): + annotations[image_path] = Detections.empty() + progress_bar.update(1) + continue + + # PIL is much faster than cv2 for checking image shape and mode: https://github.com/roboflow/supervision/issues/1554 + image = Image.open(image_path) + lines = read_txt_file(file_path=annotation_path, skip_empty=True) + w, h = image.size + resolution_wh = (w, h) + if image.mode not in ("RGB", "L"): + raise ValueError( + f"Images must be 'RGB' or 'grayscale', \ but {image_path} mode is '{image.mode}'." + ) + + with_masks = force_masks or _with_seg_mask(lines=lines) + annotation = yolo_annotations_to_detections( + lines=lines, + resolution_wh=resolution_wh, + with_masks=with_masks, + is_obb=is_obb, ) + annotations[image_path] = annotation + progress_bar.update(1) - with_masks = force_masks or _with_seg_mask(lines=lines) - annotation = yolo_annotations_to_detections( - lines=lines, - resolution_wh=resolution_wh, - with_masks=with_masks, - is_obb=is_obb, - ) - annotations[image_path] = annotation return classes, image_paths, annotations diff --git a/src/supervision/dataset/utils.py b/src/supervision/dataset/utils.py index f52111a3e..1300389d3 100644 --- a/src/supervision/dataset/utils.py +++ b/src/supervision/dataset/utils.py @@ -10,6 +10,7 @@ import cv2 import numpy as np import numpy.typing as npt +from tqdm.auto import tqdm from supervision.detection.core import Detections from supervision.detection.utils.converters import mask_to_polygons @@ -100,15 +101,48 @@ def map_detections_class_id( return detections_copy -def save_dataset_images(dataset: DetectionDataset, images_directory_path: str) -> None: +def save_dataset_images( + dataset: DetectionDataset, + images_directory_path: str, + show_progress: bool = False, +) -> None: + """ + Saves all images from a dataset to the specified directory. + + Args: + dataset: The dataset whose images should be saved. + images_directory_path: Path to the directory where images will be saved. + show_progress: If `True`, display a progress bar while saving images. + + Examples: + ```python + import supervision as sv + + ds = sv.DetectionDataset.from_coco( + images_directory_path="images/train", + annotations_path="images/train/_annotations.coco.json", + ) + sv.dataset.utils.save_dataset_images( + dataset=ds, + images_directory_path="output/images", + show_progress=True, + ) + ``` + """ Path(images_directory_path).mkdir(parents=True, exist_ok=True) - for image_path in dataset.image_paths: - final_path = os.path.join(images_directory_path, Path(image_path).name) - if image_path in dataset._images_in_memory: - image = dataset._images_in_memory[image_path] - cv2.imwrite(final_path, image) - else: - shutil.copyfile(image_path, final_path) + with tqdm( + total=len(dataset.image_paths), + desc="Saving images", + disable=not show_progress, + ) as progress_bar: + for image_path in dataset.image_paths: + final_path = os.path.join(images_directory_path, Path(image_path).name) + if image_path in dataset._images_in_memory: + image = dataset._images_in_memory[image_path] + cv2.imwrite(final_path, image) + else: + shutil.copyfile(image_path, final_path) + progress_bar.update(1) def train_test_split( From 069659c63eb79a363d34d55596406ce15d10506d Mon Sep 17 00:00:00 2001 From: Satish K C Date: Thu, 26 Mar 2026 09:35:48 -0500 Subject: [PATCH 2/4] feat: propagate show_progress to annotation savers and add tests - Add show_progress param to save_yolo_annotations and save_coco_annotations - Wrap Pascal VOC annotation export loop in as_pascal_voc with tqdm - Pass show_progress through as_yolo and as_coco to their annotation savers - Add test_show_progress.py covering all loaders and savers with both show_progress=True and show_progress=False --- src/supervision/dataset/core.py | 45 +-- src/supervision/dataset/formats/coco.py | 53 ++-- src/supervision/dataset/formats/yolo.py | 37 ++- tests/dataset/test_show_progress.py | 366 ++++++++++++++++++++++++ 4 files changed, 446 insertions(+), 55 deletions(-) create mode 100644 tests/dataset/test_show_progress.py diff --git a/src/supervision/dataset/core.py b/src/supervision/dataset/core.py index 25f9900ae..86162a92d 100644 --- a/src/supervision/dataset/core.py +++ b/src/supervision/dataset/core.py @@ -10,6 +10,7 @@ import cv2 import numpy as np import numpy.typing as npt +from tqdm.auto import tqdm from supervision.classification.core import Classifications from supervision.config import CLASS_NAME_DATA_FIELD @@ -368,24 +369,30 @@ def as_pascal_voc( ) if annotations_directory_path: Path(annotations_directory_path).mkdir(parents=True, exist_ok=True) - for image_path, image, annotations in self: - annotation_name = Path(image_path).stem - annotations_path = os.path.join( - annotations_directory_path, f"{annotation_name}.xml" - ) - image_name = Path(image_path).name - pascal_voc_xml = detections_to_pascal_voc( - detections=annotations, - classes=self.classes, - filename=image_name, - image_shape=image.shape, - min_image_area_percentage=min_image_area_percentage, - max_image_area_percentage=max_image_area_percentage, - approximation_percentage=approximation_percentage, - ) - - with open(annotations_path, "w") as f: - f.write(pascal_voc_xml) + with tqdm( + total=len(self), + desc="Saving Pascal VOC annotations", + disable=not show_progress, + ) as progress_bar: + for image_path, image, annotations in self: + annotation_name = Path(image_path).stem + annotations_path = os.path.join( + annotations_directory_path, f"{annotation_name}.xml" + ) + image_name = Path(image_path).name + pascal_voc_xml = detections_to_pascal_voc( + detections=annotations, + classes=self.classes, + filename=image_name, + image_shape=image.shape, + min_image_area_percentage=min_image_area_percentage, + max_image_area_percentage=max_image_area_percentage, + approximation_percentage=approximation_percentage, + ) + + with open(annotations_path, "w") as f: + f.write(pascal_voc_xml) + progress_bar.update(1) @classmethod def from_pascal_voc( @@ -562,6 +569,7 @@ def as_yolo( min_image_area_percentage=min_image_area_percentage, max_image_area_percentage=max_image_area_percentage, approximation_percentage=approximation_percentage, + show_progress=show_progress, ) if data_yaml_path is not None: save_data_yaml(data_yaml_path=data_yaml_path, classes=self.classes) @@ -677,6 +685,7 @@ def as_coco( min_image_area_percentage=min_image_area_percentage, max_image_area_percentage=max_image_area_percentage, approximation_percentage=approximation_percentage, + show_progress=show_progress, ) diff --git a/src/supervision/dataset/formats/coco.py b/src/supervision/dataset/formats/coco.py index c15f7f995..d914a181b 100644 --- a/src/supervision/dataset/formats/coco.py +++ b/src/supervision/dataset/formats/coco.py @@ -346,6 +346,7 @@ def save_coco_annotations( min_image_area_percentage: float = 0.0, max_image_area_percentage: float = 1.0, approximation_percentage: float = 0.75, + show_progress: bool = False, ) -> None: Path(annotation_path).parent.mkdir(parents=True, exist_ok=True) licenses = [ @@ -361,30 +362,36 @@ def save_coco_annotations( coco_categories = classes_to_coco_categories(classes=dataset.classes) image_id, annotation_id = 1, 1 - for image_path, image, annotation in dataset: - image_height, image_width, _ = image.shape - image_name = f"{Path(image_path).stem}{Path(image_path).suffix}" - coco_image = { - "id": image_id, - "license": 1, - "file_name": image_name, - "height": image_height, - "width": image_width, - "date_captured": datetime.now().strftime("%m/%d/%Y,%H:%M:%S"), - } - - coco_images.append(coco_image) - coco_annotation, annotation_id = detections_to_coco_annotations( - detections=annotation, - image_id=image_id, - annotation_id=annotation_id, - min_image_area_percentage=min_image_area_percentage, - max_image_area_percentage=max_image_area_percentage, - approximation_percentage=approximation_percentage, - ) + with tqdm( + total=len(dataset), + desc="Saving COCO annotations", + disable=not show_progress, + ) as progress_bar: + for image_path, image, annotation in dataset: + image_height, image_width, _ = image.shape + image_name = f"{Path(image_path).stem}{Path(image_path).suffix}" + coco_image = { + "id": image_id, + "license": 1, + "file_name": image_name, + "height": image_height, + "width": image_width, + "date_captured": datetime.now().strftime("%m/%d/%Y,%H:%M:%S"), + } + + coco_images.append(coco_image) + coco_annotation, annotation_id = detections_to_coco_annotations( + detections=annotation, + image_id=image_id, + annotation_id=annotation_id, + min_image_area_percentage=min_image_area_percentage, + max_image_area_percentage=max_image_area_percentage, + approximation_percentage=approximation_percentage, + ) - coco_annotations.extend(coco_annotation) - image_id += 1 + coco_annotations.extend(coco_annotation) + image_id += 1 + progress_bar.update(1) annotation_dict = { "info": {}, diff --git a/src/supervision/dataset/formats/yolo.py b/src/supervision/dataset/formats/yolo.py index 2126cc765..5ddcc7885 100644 --- a/src/supervision/dataset/formats/yolo.py +++ b/src/supervision/dataset/formats/yolo.py @@ -308,22 +308,31 @@ def save_yolo_annotations( min_image_area_percentage: float = 0.0, max_image_area_percentage: float = 1.0, approximation_percentage: float = 0.75, + show_progress: bool = False, ) -> None: Path(annotations_directory_path).mkdir(parents=True, exist_ok=True) - for image_path, image, annotation in dataset: - image_name = Path(image_path).name - yolo_annotations_name = _image_name_to_annotation_name(image_name=image_name) - yolo_annotations_path = os.path.join( - annotations_directory_path, yolo_annotations_name - ) - lines = detections_to_yolo_annotations( - detections=annotation, - image_shape=image.shape, - min_image_area_percentage=min_image_area_percentage, - max_image_area_percentage=max_image_area_percentage, - approximation_percentage=approximation_percentage, - ) - save_text_file(lines=lines, file_path=yolo_annotations_path) + with tqdm( + total=len(dataset), + desc="Saving YOLO annotations", + disable=not show_progress, + ) as progress_bar: + for image_path, image, annotation in dataset: + image_name = Path(image_path).name + yolo_annotations_name = _image_name_to_annotation_name( + image_name=image_name + ) + yolo_annotations_path = os.path.join( + annotations_directory_path, yolo_annotations_name + ) + lines = detections_to_yolo_annotations( + detections=annotation, + image_shape=image.shape, + min_image_area_percentage=min_image_area_percentage, + max_image_area_percentage=max_image_area_percentage, + approximation_percentage=approximation_percentage, + ) + save_text_file(lines=lines, file_path=yolo_annotations_path) + progress_bar.update(1) def save_data_yaml(data_yaml_path: str, classes: list[str]) -> None: diff --git a/tests/dataset/test_show_progress.py b/tests/dataset/test_show_progress.py new file mode 100644 index 000000000..d2476b4e8 --- /dev/null +++ b/tests/dataset/test_show_progress.py @@ -0,0 +1,366 @@ +""" +Tests that show_progress parameter works correctly for all dataset +loaders and savers, and that output is identical regardless of its value. +""" +from __future__ import annotations + +import json +import os +from pathlib import Path + +import cv2 +import numpy as np +import pytest + +from supervision import DetectionDataset, Detections +from supervision.dataset.formats.coco import load_coco_annotations, save_coco_annotations +from supervision.dataset.formats.pascal_voc import load_pascal_voc_annotations +from supervision.dataset.formats.yolo import load_yolo_annotations, save_yolo_annotations +from supervision.dataset.utils import save_dataset_images +from tests.helpers import create_yolo_dataset + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def yolo_dataset(tmp_path: Path) -> dict: + return create_yolo_dataset(str(tmp_path / "yolo"), num_images=3) + + +@pytest.fixture +def coco_dataset(tmp_path: Path) -> dict: + """Create a minimal COCO dataset on disk (JSON only; no real images needed).""" + images_dir = tmp_path / "coco" / "images" + images_dir.mkdir(parents=True) + annotations_path = tmp_path / "coco" / "annotations.json" + + coco_data = { + "categories": [ + {"id": 1, "name": "dog", "supercategory": "animal"}, + {"id": 2, "name": "cat", "supercategory": "animal"}, + ], + "images": [ + {"id": 1, "file_name": "img1.jpg", "width": 100, "height": 100}, + {"id": 2, "file_name": "img2.jpg", "width": 100, "height": 100}, + ], + "annotations": [ + { + "id": 1, + "image_id": 1, + "category_id": 1, + "bbox": [10, 10, 30, 30], + "area": 900, + "segmentation": [], + "iscrowd": 0, + }, + { + "id": 2, + "image_id": 2, + "category_id": 2, + "bbox": [5, 5, 20, 20], + "area": 400, + "segmentation": [], + "iscrowd": 0, + }, + ], + } + annotations_path.write_text(json.dumps(coco_data)) + + return { + "images_dir": str(images_dir), + "annotations_path": str(annotations_path), + } + + +@pytest.fixture +def pascal_voc_dataset(tmp_path: Path) -> dict: + """Create a minimal Pascal VOC dataset on disk with real images and XML files.""" + images_dir = tmp_path / "voc" / "images" + annotations_dir = tmp_path / "voc" / "annotations" + images_dir.mkdir(parents=True) + annotations_dir.mkdir(parents=True) + + for i in range(1, 3): + img = np.zeros((100, 100, 3), dtype=np.uint8) + cv2.imwrite(str(images_dir / f"img{i}.jpg"), img) + + xml = f""" + + VOC + img{i}.jpg + 1001003 + + dog + + {10 + i}{10 + i} + {40 + i}{40 + i} + + +""" + (annotations_dir / f"img{i}.xml").write_text(xml) + + return { + "images_dir": str(images_dir), + "annotations_dir": str(annotations_dir), + } + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _load_yolo(info: dict, show_progress: bool) -> tuple: + return load_yolo_annotations( + images_directory_path=info["images_dir"], + annotations_directory_path=info["labels_dir"], + data_yaml_path=info["data_yaml_path"], + show_progress=show_progress, + ) + + +# --------------------------------------------------------------------------- +# Load: YOLO +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("show_progress", [False, True]) +def test_load_yolo_show_progress(yolo_dataset: dict, show_progress: bool) -> None: + classes, image_paths, annotations = _load_yolo(yolo_dataset, show_progress) + assert len(image_paths) == yolo_dataset["num_images"] + assert len(annotations) == yolo_dataset["num_images"] + assert isinstance(classes, list) + + +def test_load_yolo_show_progress_consistent(yolo_dataset: dict) -> None: + classes_off, paths_off, ann_off = _load_yolo(yolo_dataset, show_progress=False) + classes_on, paths_on, ann_on = _load_yolo(yolo_dataset, show_progress=True) + assert classes_off == classes_on + assert paths_off == paths_on + assert set(ann_off.keys()) == set(ann_on.keys()) + + +# --------------------------------------------------------------------------- +# Load: COCO +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("show_progress", [False, True]) +def test_load_coco_show_progress(coco_dataset: dict, show_progress: bool) -> None: + classes, image_paths, annotations = load_coco_annotations( + images_directory_path=coco_dataset["images_dir"], + annotations_path=coco_dataset["annotations_path"], + show_progress=show_progress, + ) + assert classes == ["dog", "cat"] + assert len(image_paths) == 2 + assert len(annotations) == 2 + + +def test_load_coco_show_progress_consistent(coco_dataset: dict) -> None: + classes_off, paths_off, ann_off = load_coco_annotations( + images_directory_path=coco_dataset["images_dir"], + annotations_path=coco_dataset["annotations_path"], + show_progress=False, + ) + classes_on, paths_on, ann_on = load_coco_annotations( + images_directory_path=coco_dataset["images_dir"], + annotations_path=coco_dataset["annotations_path"], + show_progress=True, + ) + assert classes_off == classes_on + assert paths_off == paths_on + + +# --------------------------------------------------------------------------- +# Load: Pascal VOC +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("show_progress", [False, True]) +def test_load_pascal_voc_show_progress( + pascal_voc_dataset: dict, show_progress: bool +) -> None: + classes, image_paths, annotations = load_pascal_voc_annotations( + images_directory_path=pascal_voc_dataset["images_dir"], + annotations_directory_path=pascal_voc_dataset["annotations_dir"], + show_progress=show_progress, + ) + assert "dog" in classes + assert len(image_paths) == 2 + assert len(annotations) == 2 + + +def test_load_pascal_voc_show_progress_consistent( + pascal_voc_dataset: dict, +) -> None: + classes_off, paths_off, _ = load_pascal_voc_annotations( + images_directory_path=pascal_voc_dataset["images_dir"], + annotations_directory_path=pascal_voc_dataset["annotations_dir"], + show_progress=False, + ) + classes_on, paths_on, _ = load_pascal_voc_annotations( + images_directory_path=pascal_voc_dataset["images_dir"], + annotations_directory_path=pascal_voc_dataset["annotations_dir"], + show_progress=True, + ) + assert classes_off == classes_on + assert set(paths_off) == set(paths_on) + + +# --------------------------------------------------------------------------- +# Save: YOLO +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("show_progress", [False, True]) +def test_save_yolo_annotations_show_progress( + yolo_dataset: dict, tmp_path: Path, show_progress: bool +) -> None: + ds = DetectionDataset.from_yolo( + images_directory_path=yolo_dataset["images_dir"], + annotations_directory_path=yolo_dataset["labels_dir"], + data_yaml_path=yolo_dataset["data_yaml_path"], + ) + out_dir = tmp_path / f"yolo_out_{show_progress}" + save_yolo_annotations( + dataset=ds, + annotations_directory_path=str(out_dir), + show_progress=show_progress, + ) + written = list(out_dir.glob("*.txt")) + assert len(written) == yolo_dataset["num_images"] + + +def test_save_yolo_annotations_show_progress_consistent( + yolo_dataset: dict, tmp_path: Path +) -> None: + ds = DetectionDataset.from_yolo( + images_directory_path=yolo_dataset["images_dir"], + annotations_directory_path=yolo_dataset["labels_dir"], + data_yaml_path=yolo_dataset["data_yaml_path"], + ) + out_off = tmp_path / "yolo_off" + out_on = tmp_path / "yolo_on" + save_yolo_annotations(dataset=ds, annotations_directory_path=str(out_off)) + save_yolo_annotations( + dataset=ds, annotations_directory_path=str(out_on), show_progress=True + ) + files_off = sorted(f.name for f in out_off.glob("*.txt")) + files_on = sorted(f.name for f in out_on.glob("*.txt")) + assert files_off == files_on + + +# --------------------------------------------------------------------------- +# Save: COCO +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("show_progress", [False, True]) +def test_save_coco_annotations_show_progress( + yolo_dataset: dict, tmp_path: Path, show_progress: bool +) -> None: + ds = DetectionDataset.from_yolo( + images_directory_path=yolo_dataset["images_dir"], + annotations_directory_path=yolo_dataset["labels_dir"], + data_yaml_path=yolo_dataset["data_yaml_path"], + ) + out_path = tmp_path / f"coco_{show_progress}" / "annotations.json" + save_coco_annotations( + dataset=ds, + annotation_path=str(out_path), + show_progress=show_progress, + ) + assert out_path.exists() + data = json.loads(out_path.read_text()) + assert len(data["images"]) == yolo_dataset["num_images"] + + +def test_save_coco_annotations_show_progress_consistent( + yolo_dataset: dict, tmp_path: Path +) -> None: + ds = DetectionDataset.from_yolo( + images_directory_path=yolo_dataset["images_dir"], + annotations_directory_path=yolo_dataset["labels_dir"], + data_yaml_path=yolo_dataset["data_yaml_path"], + ) + out_off = tmp_path / "coco_off" / "annotations.json" + out_on = tmp_path / "coco_on" / "annotations.json" + save_coco_annotations(dataset=ds, annotation_path=str(out_off)) + save_coco_annotations( + dataset=ds, annotation_path=str(out_on), show_progress=True + ) + data_off = json.loads(out_off.read_text()) + data_on = json.loads(out_on.read_text()) + assert len(data_off["images"]) == len(data_on["images"]) + assert len(data_off["annotations"]) == len(data_on["annotations"]) + assert data_off["categories"] == data_on["categories"] + + +# --------------------------------------------------------------------------- +# Save: Pascal VOC (via DetectionDataset.as_pascal_voc) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("show_progress", [False, True]) +def test_as_pascal_voc_show_progress( + yolo_dataset: dict, tmp_path: Path, show_progress: bool +) -> None: + ds = DetectionDataset.from_yolo( + images_directory_path=yolo_dataset["images_dir"], + annotations_directory_path=yolo_dataset["labels_dir"], + data_yaml_path=yolo_dataset["data_yaml_path"], + ) + out_dir = tmp_path / f"voc_{show_progress}" + ds.as_pascal_voc( + annotations_directory_path=str(out_dir), + show_progress=show_progress, + ) + written = list(out_dir.glob("*.xml")) + assert len(written) == yolo_dataset["num_images"] + + +def test_as_pascal_voc_show_progress_consistent( + yolo_dataset: dict, tmp_path: Path +) -> None: + ds = DetectionDataset.from_yolo( + images_directory_path=yolo_dataset["images_dir"], + annotations_directory_path=yolo_dataset["labels_dir"], + data_yaml_path=yolo_dataset["data_yaml_path"], + ) + out_off = tmp_path / "voc_off" + out_on = tmp_path / "voc_on" + ds.as_pascal_voc(annotations_directory_path=str(out_off)) + ds.as_pascal_voc( + annotations_directory_path=str(out_on), show_progress=True + ) + files_off = sorted(f.name for f in out_off.glob("*.xml")) + files_on = sorted(f.name for f in out_on.glob("*.xml")) + assert files_off == files_on + + +# --------------------------------------------------------------------------- +# Save: images (save_dataset_images) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("show_progress", [False, True]) +def test_save_dataset_images_show_progress( + yolo_dataset: dict, tmp_path: Path, show_progress: bool +) -> None: + ds = DetectionDataset.from_yolo( + images_directory_path=yolo_dataset["images_dir"], + annotations_directory_path=yolo_dataset["labels_dir"], + data_yaml_path=yolo_dataset["data_yaml_path"], + ) + out_dir = tmp_path / f"images_{show_progress}" + save_dataset_images( + dataset=ds, + images_directory_path=str(out_dir), + show_progress=show_progress, + ) + written = list(out_dir.glob("*.jpg")) + assert len(written) == yolo_dataset["num_images"] From e1b32207a8e2d816e85f883236d116b74fc34051 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Mar 2026 14:36:27 +0000 Subject: [PATCH 3/4] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?= =?UTF-8?q?format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/dataset/test_show_progress.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/dataset/test_show_progress.py b/tests/dataset/test_show_progress.py index d2476b4e8..4df151d95 100644 --- a/tests/dataset/test_show_progress.py +++ b/tests/dataset/test_show_progress.py @@ -2,24 +2,29 @@ Tests that show_progress parameter works correctly for all dataset loaders and savers, and that output is identical regardless of its value. """ + from __future__ import annotations import json -import os from pathlib import Path import cv2 import numpy as np import pytest -from supervision import DetectionDataset, Detections -from supervision.dataset.formats.coco import load_coco_annotations, save_coco_annotations +from supervision import DetectionDataset +from supervision.dataset.formats.coco import ( + load_coco_annotations, + save_coco_annotations, +) from supervision.dataset.formats.pascal_voc import load_pascal_voc_annotations -from supervision.dataset.formats.yolo import load_yolo_annotations, save_yolo_annotations +from supervision.dataset.formats.yolo import ( + load_yolo_annotations, + save_yolo_annotations, +) from supervision.dataset.utils import save_dataset_images from tests.helpers import create_yolo_dataset - # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @@ -290,9 +295,7 @@ def test_save_coco_annotations_show_progress_consistent( out_off = tmp_path / "coco_off" / "annotations.json" out_on = tmp_path / "coco_on" / "annotations.json" save_coco_annotations(dataset=ds, annotation_path=str(out_off)) - save_coco_annotations( - dataset=ds, annotation_path=str(out_on), show_progress=True - ) + save_coco_annotations(dataset=ds, annotation_path=str(out_on), show_progress=True) data_off = json.loads(out_off.read_text()) data_on = json.loads(out_on.read_text()) assert len(data_off["images"]) == len(data_on["images"]) @@ -334,9 +337,7 @@ def test_as_pascal_voc_show_progress_consistent( out_off = tmp_path / "voc_off" out_on = tmp_path / "voc_on" ds.as_pascal_voc(annotations_directory_path=str(out_off)) - ds.as_pascal_voc( - annotations_directory_path=str(out_on), show_progress=True - ) + ds.as_pascal_voc(annotations_directory_path=str(out_on), show_progress=True) files_off = sorted(f.name for f in out_off.glob("*.xml")) files_on = sorted(f.name for f in out_on.glob("*.xml")) assert files_off == files_on From a9c024b171f25766e6afaace33413eb9feae1cfd Mon Sep 17 00:00:00 2001 From: Satish K C Date: Thu, 26 Mar 2026 09:40:17 -0500 Subject: [PATCH 4/4] fix: close Image.open with context manager and clean up ValueError message in YOLO loader --- src/supervision/dataset/formats/yolo.py | 11 ++++++----- tests/dataset/test_show_progress.py | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/supervision/dataset/formats/yolo.py b/src/supervision/dataset/formats/yolo.py index 5ddcc7885..b4244746c 100644 --- a/src/supervision/dataset/formats/yolo.py +++ b/src/supervision/dataset/formats/yolo.py @@ -215,14 +215,15 @@ def load_yolo_annotations( continue # PIL is much faster than cv2 for checking image shape and mode: https://github.com/roboflow/supervision/issues/1554 - image = Image.open(image_path) + with Image.open(image_path) as image: + w, h = image.size + mode = image.mode lines = read_txt_file(file_path=annotation_path, skip_empty=True) - w, h = image.size resolution_wh = (w, h) - if image.mode not in ("RGB", "L"): + if mode not in ("RGB", "L"): raise ValueError( - f"Images must be 'RGB' or 'grayscale', \ - but {image_path} mode is '{image.mode}'." + f"Images must be 'RGB' or 'grayscale'," + f" but {image_path} mode is '{mode}'." ) with_masks = force_masks or _with_seg_mask(lines=lines) diff --git a/tests/dataset/test_show_progress.py b/tests/dataset/test_show_progress.py index 4df151d95..5c62e716a 100644 --- a/tests/dataset/test_show_progress.py +++ b/tests/dataset/test_show_progress.py @@ -166,12 +166,12 @@ def test_load_coco_show_progress(coco_dataset: dict, show_progress: bool) -> Non def test_load_coco_show_progress_consistent(coco_dataset: dict) -> None: - classes_off, paths_off, ann_off = load_coco_annotations( + classes_off, paths_off, _ann_off = load_coco_annotations( images_directory_path=coco_dataset["images_dir"], annotations_path=coco_dataset["annotations_path"], show_progress=False, ) - classes_on, paths_on, ann_on = load_coco_annotations( + classes_on, paths_on, _ann_on = load_coco_annotations( images_directory_path=coco_dataset["images_dir"], annotations_path=coco_dataset["annotations_path"], show_progress=True,