diff --git a/tests/constants.py b/tests/constants.py index 82f0ebb..76138e3 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -76,6 +76,7 @@ {"name": "yolo26x", "version": "v26"}, {"name": "yolo26n-seg", "version": "v26"}, {"name": "yolo26n-pose", "version": "v26"}, + {"name": "yolo26n", "version": "v26_nms", "cli_version": "yolov26_nms"}, {"name": "yolov8n-cls", "version": "v8"}, {"name": "yolov8n-seg", "version": "v8"}, {"name": "yolov8n-pose", "version": "v8"}, diff --git a/tests/helper_functions.py b/tests/helper_functions.py index 3ef5950..8b931ad 100644 --- a/tests/helper_functions.py +++ b/tests/helper_functions.py @@ -127,3 +127,33 @@ def nn_archive_checker(extra_keys_to_check: list = []): # noqa: B006 assert temp_cfg[keys[-1]] == target, ( f"Value `{temp_cfg[keys[-1]]}` at key `{keys}` doesn't match expected value `{target}`" ) + + +def load_latest_nn_archive_config() -> dict: + """Load config.json from the most recently exported NNArchive.""" + output_dir = "shared_with_container/outputs" + subdirs = [ + d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d)) + ] + assert subdirs, f"No folders found in `{output_dir}`" + + subdirs.sort(key=lambda d: os.path.getmtime(os.path.join(output_dir, d))) + latest_subdir = subdirs[-1] + model_output_path = os.path.join(output_dir, latest_subdir) + + archive_files = [f for f in os.listdir(model_output_path) if f.endswith(".tar.xz")] + assert len(archive_files) == 1, ( + f"Expected 1 .tar.xz file, found {len(archive_files)}: {archive_files}" + ) + archive_path = os.path.join(model_output_path, archive_files[0]) + + with tarfile.open(archive_path, "r:xz") as tar: + file_names = [m.name for m in tar.getmembers() if m.isfile()] + config_files = [name for name in file_names if name.endswith("config.json")] + assert len(config_files) == 1, ( + f"Expected 1 config.json file, found {len(config_files)}: {config_files}" + ) + config_member = tar.getmember(config_files[0]) + config_file = tar.extractfile(config_member) + assert config_file is not None, "Failed to extract config.json" + return json.load(config_file) diff --git a/tests/nnarchive_output_checks.py b/tests/nnarchive_output_checks.py new file mode 100644 index 0000000..129ac5f --- /dev/null +++ b/tests/nnarchive_output_checks.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from copy import deepcopy + +V8_DETECTION_CHECK = { + "name": "yolov8n", + "version": "v8", + "model_outputs": ["output1_yolov6r2", "output2_yolov6r2", "output3_yolov6r2"], + "head_outputs": ["output1_yolov6r2", "output2_yolov6r2", "output3_yolov6r2"], + "yolo_outputs": ["output1_yolov6r2", "output2_yolov6r2", "output3_yolov6r2"], +} + +V8_SEG_CHECK = { + "name": "yolov8n-seg", + "version": "v8", + "model_outputs": [ + "output1_yolov8", + "output2_yolov8", + "output3_yolov8", + "output1_masks", + "output2_masks", + "output3_masks", + "protos_output", + ], + "head_outputs": [ + "output1_yolov8", + "output2_yolov8", + "output3_yolov8", + "output1_masks", + "output2_masks", + "output3_masks", + "protos_output", + ], + "yolo_outputs": ["output1_yolov8", "output2_yolov8", "output3_yolov8"], + "mask_outputs": ["output1_masks", "output2_masks", "output3_masks"], +} + +V8_POSE_CHECK = { + "name": "yolov8n-pose", + "version": "v8", + "model_outputs": [ + "output1_yolov8", + "output2_yolov8", + "output3_yolov8", + "kpt_output1", + "kpt_output2", + "kpt_output3", + ], + "head_outputs": [ + "output1_yolov8", + "output2_yolov8", + "output3_yolov8", + "kpt_output1", + "kpt_output2", + "kpt_output3", + ], + "yolo_outputs": ["output1_yolov8", "output2_yolov8", "output3_yolov8"], + "keypoints_outputs": ["kpt_output1", "kpt_output2", "kpt_output3"], +} + + +def _clone_check(base_case: dict, *, name: str, version: str) -> dict: + case = deepcopy(base_case) + case["name"] = name + case["version"] = version + return case + + +N_VARIANT_OUTPUT_NAME_CHECKS = [ + V8_DETECTION_CHECK, + V8_SEG_CHECK, + V8_POSE_CHECK, + _clone_check(V8_DETECTION_CHECK, name="yolov9t", version="v9"), + _clone_check(V8_DETECTION_CHECK, name="yolov11n", version="v11"), + _clone_check(V8_SEG_CHECK, name="yolov11n-seg", version="v11"), + _clone_check(V8_POSE_CHECK, name="yolov11n-pose", version="v11"), + _clone_check(V8_DETECTION_CHECK, name="yolov12n", version="v12"), + { + "name": "yolo26n", + "version": "v26", + "model_outputs": ["output_yolo26"], + "head_outputs": ["output_yolo26"], + "yolo_outputs": ["output_yolo26"], + }, + { + "name": "yolo26n-seg", + "version": "v26", + "model_outputs": ["output_yolo26", "output_masks", "protos_output"], + "head_outputs": ["output_yolo26", "output_masks", "protos_output"], + "yolo_outputs": ["output_yolo26"], + "mask_outputs": ["output_masks"], + }, + { + "name": "yolo26n-pose", + "version": "v26", + "model_outputs": ["output_yolo26", "kpt_output"], + "head_outputs": ["output_yolo26", "kpt_output"], + "yolo_outputs": ["output_yolo26"], + "keypoints_outputs": ["kpt_output"], + }, +] diff --git a/tests/test_end2end.py b/tests/test_end2end.py index 283fc84..308d051 100644 --- a/tests/test_end2end.py +++ b/tests/test_end2end.py @@ -6,7 +6,13 @@ import pytest from constants import PRIVATE_TEST_MODELS, SAVE_FOLDER, TEST_MODELS -from helper_functions import download_model, download_private_model, nn_archive_checker +from helper_functions import ( + download_model, + download_private_model, + load_latest_nn_archive_config, + nn_archive_checker, +) +from nnarchive_output_checks import N_VARIANT_OUTPUT_NAME_CHECKS logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -15,7 +21,12 @@ @pytest.mark.parametrize( "model", TEST_MODELS, - ids=[model["name"] for model in TEST_MODELS], + ids=[ + model.get("cli_version", model["name"]) + if model.get("cli_version") + else model["name"] + for model in TEST_MODELS + ], ) def test_cli_conversion(model: dict, test_config: dict, subtests): """Tests the whole CLI conversion flow with no extra params specified.""" @@ -50,6 +61,8 @@ def test_cli_conversion(model: dict, test_config: dict, subtests): pytest.skip("Weights not present and `download_weights` not set") command = ["tools", model_path] + if model.get("cli_version"): + command += ["--version", model.get("cli_version")] if model.get("size"): # edge case when stride=64 is needed command += ["--imgsz", model.get("size")] @@ -79,6 +92,65 @@ def test_cli_conversion(model: dict, test_config: dict, subtests): nn_archive_checker(extra_keys_to_check=extra_keys_to_check) +@pytest.mark.parametrize( + "model_case", + N_VARIANT_OUTPUT_NAME_CHECKS, + ids=[model_case["name"] for model_case in N_VARIANT_OUTPUT_NAME_CHECKS], +) +def test_n_variant_nnarchive_outputs(model_case: dict, test_config: dict): + """Checks NNArchive output-related fields for selected variants.""" + if ( + test_config["test_case"] is not None + and model_case["name"] != test_config["test_case"] + ): + pytest.skip( + f"Test case ({model_case['name']}) doesn't match selected test case ({test_config['test_case']})" + ) + + if ( + test_config["yolo_version"] is not None + and model_case["version"] != test_config["yolo_version"] + ): + pytest.skip( + f"Model version ({model_case['version']}) doesn't match selected version ({test_config['yolo_version']})." + ) + + model_path = os.path.join(SAVE_FOLDER, f"{model_case['name']}.pt") + if not os.path.exists(model_path): + if test_config["download_weights"]: + model_path = download_model(model_case["name"], SAVE_FOLDER) + else: + pytest.skip("Weights missing and `download_weights` not set") + + command = ["tools", model_path] + result = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True + ) + if result.returncode != 0: + pytest.fail(f"Exit code: {result.returncode}, Output: {result.stdout}") + + cfg = load_latest_nn_archive_config() + output_names = [output["name"] for output in cfg["model"]["outputs"]] + head = cfg["model"]["heads"][0] + metadata = head["metadata"] + head_output_names = head["outputs"] + yolo_output_names = metadata["yolo_outputs"] or [] + mask_output_names = metadata["mask_outputs"] or [] + keypoint_output_names = metadata["keypoints_outputs"] or [] + + for key, actual in [ + ("model_outputs", output_names), + ("head_outputs", head_output_names), + ("yolo_outputs", yolo_output_names), + ("mask_outputs", mask_output_names), + ("keypoints_outputs", keypoint_output_names), + ]: + for expected_name in model_case.get(key, []): + assert expected_name in actual, ( + f"{key}: expected `{expected_name}` for {model_case['name']}, got {actual}" + ) + + @pytest.mark.parametrize( "model", PRIVATE_TEST_MODELS, diff --git a/tools/main.py b/tools/main.py index a8f9717..3cf3f59 100644 --- a/tools/main.py +++ b/tools/main.py @@ -28,6 +28,7 @@ YOLOV11_CONVERSION, YOLOV12_CONVERSION, YOLOV26_CONVERSION, + YOLOV26_NMS_CONVERSION, detect_version, ) @@ -50,6 +51,7 @@ YOLOV11_CONVERSION, YOLOV12_CONVERSION, YOLOV26_CONVERSION, + YOLOV26_NMS_CONVERSION, ] @@ -176,6 +178,7 @@ def convert( YOLOV9_CONVERSION, YOLOV11_CONVERSION, YOLOV12_CONVERSION, + YOLOV26_NMS_CONVERSION, ]: from tools.yolo.yolov8_exporter import YoloV8Exporter diff --git a/tools/modules/exporter.py b/tools/modules/exporter.py index c2825bc..5bf9c63 100644 --- a/tools/modules/exporter.py +++ b/tools/modules/exporter.py @@ -2,7 +2,7 @@ import os from datetime import datetime -from typing import List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import onnx import onnxsim @@ -101,6 +101,44 @@ def export_onnx(self) -> os.PathLike: return self.f_onnx + @staticmethod + def _infer_layout_from_shape(shape: List[Any]) -> Optional[str]: + rank = len(shape) + if rank == 4: + return "NCHW" + if rank == 3: + return "NCD" + if rank == 2: + return "NC" + if rank == 1: + return "C" + return None + + def get_output_specs(self) -> Dict[str, Dict[str, Any]]: + """Collect output shape and layout for all ONNX outputs by name.""" + if self.f_onnx is None: + raise RuntimeError("ONNX must be exported before reading output specs.") + + model_onnx = onnx.load(self.f_onnx) + specs: Dict[str, Dict[str, Any]] = {} + + for output in model_onnx.graph.output: + shape: List[Any] = [] + for dim in output.type.tensor_type.shape.dim: + if dim.HasField("dim_value"): + shape.append(int(dim.dim_value)) + elif dim.HasField("dim_param") and dim.dim_param: + shape.append(dim.dim_param) + else: + shape.append(None) + + specs[output.name] = { + "shape": shape, + "layout": self._infer_layout_from_shape(shape), + } + + return specs + def make_nn_archive( self, class_list: List[str], @@ -144,6 +182,7 @@ def make_nn_archive( if output_kwargs is None: output_kwargs = {} + output_specs = self.get_output_specs() archive = ArchiveGenerator( archive_name=self.model_name, @@ -172,6 +211,8 @@ def make_nn_archive( { "name": output, "dtype": DataType.FLOAT32, + "shape": output_specs.get(output, {}).get("shape"), + "layout": output_specs.get(output, {}).get("layout"), } for output in self.all_output_names ], diff --git a/tools/modules/heads.py b/tools/modules/heads.py index fb5e058..5c407c9 100644 --- a/tools/modules/heads.py +++ b/tools/modules/heads.py @@ -366,11 +366,17 @@ def __init__(self, old_detect, use_rvc2: bool): self.use_rvc2 = use_rvc2 - self.proj_conv = nn.Conv2d(old_detect.dfl.c1, 1, 1, bias=False).requires_grad_( - False - ) - x = torch.arange(old_detect.dfl.c1, dtype=torch.float) - self.proj_conv.weight.data[:] = nn.Parameter(x.view(1, old_detect.dfl.c1, 1, 1)) + # yolo26: dfl will be nn.Identity(), we set proj_conv = None and skip the DFL block in forward + if hasattr(old_detect.dfl, "c1"): + self.proj_conv = nn.Conv2d( + old_detect.dfl.c1, 1, 1, bias=False + ).requires_grad_(False) + x = torch.arange(old_detect.dfl.c1, dtype=torch.float) + self.proj_conv.weight.data[:] = nn.Parameter( + x.view(1, old_detect.dfl.c1, 1, 1) + ) + else: + self.proj_conv = None def forward(self, x): bs = x[0].shape[0] # batch size @@ -382,9 +388,10 @@ def forward(self, x): # ------------------------------ # DFL PART - box = box.view(bs, 4, self.reg_max, h * w).permute(0, 2, 1, 3) - box = self.proj_conv(F.softmax(box, dim=1))[:, 0] - box = box.reshape([bs, 4, h, w]) + if self.proj_conv is not None: + box = box.view(bs, 4, self.reg_max, h * w).permute(0, 2, 1, 3) + box = self.proj_conv(F.softmax(box, dim=1))[:, 0] + box = box.reshape([bs, 4, h, w]) # ------------------------------ cls = self.cv3[i](x[i]) @@ -584,8 +591,10 @@ def forward(self, x): } dbox = self._get_decode_boxes(preds) - y = torch.cat((dbox, preds["scores"].sigmoid()), 1) # (bs, 4+nc, num_anchors) - y = y.permute(0, 2, 1) # (bs, num_anchors, 4+nc) + cls_scores = preds["scores"].sigmoid() # (bs, nc, num_anchors) + conf, _ = cls_scores.max(1, keepdim=True) # ReduceMax: (bs, 1, num_anchors) + y = torch.cat((dbox, conf, cls_scores), 1) # (bs, 4+1+nc, num_anchors) + y = y.permute(0, 2, 1) # (bs, num_anchors, 5+nc) return y def _get_decode_boxes(self, preds): @@ -642,11 +651,12 @@ def dist2bbox(distance, anchor_points, xywh=True, dim=-1): class SegmentV26(DetectV26): """YOLOv26 Segment head for end-to-end NMS-free instance segmentation models. - Outputs decoded boxes, class scores, mask coefficients (separate), and prototype masks. + Outputs decoded boxes, confidence, class scores, mask coefficients (separate), and prototype masks. Output format: - - detections: (batch, num_anchors, 4 + nc) + - detections: (batch, num_anchors, 5 + nc) - 4: decoded bbox coordinates (x1, y1, x2, y2) in pixel space + - 1: confidence score (ReduceMax over class scores) - nc: class scores (sigmoided) - mask_coeffs: (batch, num_anchors, nm) - nm: mask coefficients (raw, to be used with protos) @@ -681,7 +691,7 @@ def forward(self, x): Returns: Tuple of: - - detections: (batch, num_anchors, 4 + nc) + - detections: (batch, num_anchors, 5 + nc) - mask_coeffs: (batch, num_anchors, nm) - protos: (batch, nm, proto_h, proto_w) """ @@ -712,9 +722,11 @@ def forward(self, x): # Decode boxes to pixel coordinates dbox = self._get_decode_boxes(preds) - # Detection output: boxes (4) + class scores (nc) - y = torch.cat((dbox, preds["scores"].sigmoid()), 1) # (bs, 4+nc, num_anchors) - y = y.permute(0, 2, 1) # (bs, num_anchors, 4+nc) + # Detection output: boxes (4) + confidence (1) + class scores (nc) + cls_scores = preds["scores"].sigmoid() # (bs, nc, num_anchors) + conf, _ = cls_scores.max(1, keepdim=True) # ReduceMax: (bs, 1, num_anchors) + y = torch.cat((dbox, conf, cls_scores), 1) # (bs, 4+1+nc, num_anchors) + y = y.permute(0, 2, 1) # (bs, num_anchors, 5+nc) # Mask coefficients output (separate) mask_coeffs_cat = torch.cat(mask_coeffs, dim=2) # (bs, nm, num_anchors) @@ -738,11 +750,12 @@ def _get_proto(self, x): class PoseV26(DetectV26): """YOLOv26 Pose head for end-to-end NMS-free pose estimation models. - Outputs decoded boxes, class scores, and decoded keypoints (separate). + Outputs decoded boxes, confidence, class scores, and decoded keypoints (separate). Output format: - - detections: (batch, num_anchors, 4 + nc) + - detections: (batch, num_anchors, 5 + nc) - 4: decoded bbox coordinates (x1, y1, x2, y2) in pixel space + - 1: confidence score (ReduceMax over class scores) - nc: class scores (sigmoided) - keypoints: (batch, num_anchors, nk) - nk: keypoint values (x, y, [visibility]) for each keypoint @@ -774,7 +787,7 @@ def forward(self, x): Returns: Tuple of: - - detections: (batch, num_anchors, 4 + nc) + - detections: (batch, num_anchors, 5 + nc) - keypoints: (batch, num_anchors, nk) """ bs = x[0].shape[0] # batch size @@ -806,9 +819,11 @@ def forward(self, x): # from the parent DetectV26 dbox = self._get_decode_boxes(preds) - # Detection output: boxes (4) + class scores (nc) - y = torch.cat((dbox, preds["scores"].sigmoid()), 1) # (bs, 4+nc, num_anchors) - y = y.permute(0, 2, 1) # (bs, num_anchors, 4+nc) + # Detection output: boxes (4) + confidence (1) + class scores (nc) + cls_scores = preds["scores"].sigmoid() # (bs, nc, num_anchors) + conf, _ = cls_scores.max(1, keepdim=True) # ReduceMax: (bs, 1, num_anchors) + y = torch.cat((dbox, conf, cls_scores), 1) # (bs, 4+1+nc, num_anchors) + y = y.permute(0, 2, 1) # (bs, num_anchors, 5+nc) # Decode and concatenate keypoints # Note: After _get_decode_boxes, self.anchors is (2, A) and self.strides is (1, A) diff --git a/tools/version_detection/__init__.py b/tools/version_detection/__init__.py index 0d097e4..81d7129 100644 --- a/tools/version_detection/__init__.py +++ b/tools/version_detection/__init__.py @@ -13,6 +13,7 @@ YOLOV11_CONVERSION, YOLOV12_CONVERSION, YOLOV26_CONVERSION, + YOLOV26_NMS_CONVERSION, detect_version, ) @@ -30,6 +31,7 @@ "YOLOV11_CONVERSION", "YOLOV12_CONVERSION", "YOLOV26_CONVERSION", + "YOLOV26_NMS_CONVERSION", "GOLD_YOLO_CONVERSION", "UNRECOGNIZED", ] diff --git a/tools/version_detection/version_detection.py b/tools/version_detection/version_detection.py index 5ad5d82..710df49 100644 --- a/tools/version_detection/version_detection.py +++ b/tools/version_detection/version_detection.py @@ -18,6 +18,7 @@ YOLOV11_CONVERSION = "yolov11" YOLOV12_CONVERSION = "yolov12" YOLOV26_CONVERSION = "yolov26" +YOLOV26_NMS_CONVERSION = "yolov26_nms" GOLD_YOLO_CONVERSION = "goldyolo" UNRECOGNIZED = "none" diff --git a/tools/yolo/yolo26_exporter.py b/tools/yolo/yolo26_exporter.py index 0284697..8b5267f 100644 --- a/tools/yolo/yolo26_exporter.py +++ b/tools/yolo/yolo26_exporter.py @@ -12,7 +12,8 @@ current_dir = os.path.dirname(os.path.abspath(__file__)) yolo_path = os.path.join(current_dir, "ultralytics") -sys.path.append(yolo_path) +if yolo_path not in sys.path: + sys.path.insert(0, yolo_path) from ultralytics.nn.modules import ( # noqa: E402 Detect, @@ -30,11 +31,11 @@ def get_output_names(mode: int): if mode == DETECT_MODE: - return ["output"] + return ["output_yolo26"] elif mode == SEGMENT_MODE: - return ["output", "mask_output", "protos_output"] + return ["output_yolo26", "output_masks", "protos_output"] elif mode == POSE_MODE: - return ["output", "kpt_output"] + return ["output_yolo26", "kpt_output"] else: logger.warning("Unsupported task type for YOLO26, conversion may fail") return ["output"] @@ -42,7 +43,7 @@ def get_output_names(mode: int): def get_yolo_output_names(mode: int = 0): # For now, yolo output names doesn't differ based on mode because we no longer extract 3 outputs from FPN - return ["output"] + return ["output_yolo26"] class Yolo26Exporter(Exporter): @@ -52,7 +53,7 @@ def __init__(self, model_path: str, imgsz: Tuple[int, int], use_rvc2: bool): imgsz, use_rvc2, subtype="yolo26", - output_names=["output"], + output_names=["output_yolo26"], ) self.load_model() @@ -128,11 +129,11 @@ def export_nn_archive( self.make_nn_archive( class_list=names, n_classes=self.model.model[-1].nc, - parser="YOLOExtendedParser", n_prototypes=self.model.model[-1].nm, + parser="YOLOExtendedParser", is_softmax=False, # E2E outputs are already sigmoided output_kwargs={ - "mask_outputs": ["mask_output"], + "mask_outputs": ["output_masks"], "protos_outputs": "protos_output", }, encoding=encoding, diff --git a/tools/yolo/yolov8_exporter.py b/tools/yolo/yolov8_exporter.py index 29db152..7048cc4 100644 --- a/tools/yolo/yolov8_exporter.py +++ b/tools/yolo/yolov8_exporter.py @@ -119,9 +119,16 @@ def __init__( def load_model(self): # load the model model, _ = load_checkpoint( - self.model_path, device="cpu", inplace=True, fuse=True + self.model_path, device="cpu", inplace=True, fuse=False ) + # for yolo26 end2end has to be disabled before fusing + # otherwise cv2/cv3 are removed in the fuse process + head = model.model[-1] + if getattr(head, "end2end", False): + head.end2end = False + model.fuse() + self.mode = -1 if isinstance(model.model[-1], (Segment)) or isinstance( model.model[-1], (YOLOESegment) @@ -241,7 +248,13 @@ def export_nn_archive( n_classes=self.model.model[-1].nc, parser="YOLOExtendedParser", n_keypoints=self.model.model[-1].kpt_shape[0], - output_kwargs={"keypoints_outputs": ["kpt_output"]}, + output_kwargs={ + "keypoints_outputs": [ + "kpt_output1", + "kpt_output2", + "kpt_output3", + ] + }, encoding=encoding, ) elif self.mode == CLASSIFY_MODE: @@ -259,6 +272,8 @@ def make_cls_nn_archive( n_classes (int): Number of classes encoding (Encoding): Color encoding used in the input model. Defaults to RGB. """ + output_specs = self.get_output_specs() + archive = ArchiveGenerator( archive_name=self.model_name, save_path=str(self.output_folder), @@ -286,6 +301,8 @@ def make_cls_nn_archive( { "name": output, "dtype": DataType.FLOAT32, + "shape": output_specs.get(output, {}).get("shape"), + "layout": output_specs.get(output, {}).get("layout"), } for output in self.all_output_names ],