diff --git a/tests/constants.py b/tests/constants.py
index 82f0ebb..76138e3 100644
--- a/tests/constants.py
+++ b/tests/constants.py
@@ -76,6 +76,7 @@
     {"name": "yolo26x", "version": "v26"},
     {"name": "yolo26n-seg", "version": "v26"},
     {"name": "yolo26n-pose", "version": "v26"},
+    {"name": "yolo26n", "version": "v26_nms", "cli_version": "yolov26_nms"},
     {"name": "yolov8n-cls", "version": "v8"},
     {"name": "yolov8n-seg", "version": "v8"},
     {"name": "yolov8n-pose", "version": "v8"},
diff --git a/tests/helper_functions.py b/tests/helper_functions.py
index 3ef5950..8b931ad 100644
--- a/tests/helper_functions.py
+++ b/tests/helper_functions.py
@@ -127,3 +127,33 @@ def nn_archive_checker(extra_keys_to_check: list = []):  # noqa: B006
                 assert temp_cfg[keys[-1]] == target, (
                     f"Value `{temp_cfg[keys[-1]]}` at key `{keys}` doesn't match expected value `{target}`"
                 )
+
+
+def load_latest_nn_archive_config() -> dict:
+    """Load config.json from the most recently exported NNArchive."""
+    output_dir = "shared_with_container/outputs"
+    subdirs = [
+        d for d in os.listdir(output_dir) if os.path.isdir(os.path.join(output_dir, d))
+    ]
+    assert subdirs, f"No folders found in `{output_dir}`"
+
+    subdirs.sort(key=lambda d: os.path.getmtime(os.path.join(output_dir, d)))
+    latest_subdir = subdirs[-1]
+    model_output_path = os.path.join(output_dir, latest_subdir)
+
+    archive_files = [f for f in os.listdir(model_output_path) if f.endswith(".tar.xz")]
+    assert len(archive_files) == 1, (
+        f"Expected 1 .tar.xz file, found {len(archive_files)}: {archive_files}"
+    )
+    archive_path = os.path.join(model_output_path, archive_files[0])
+
+    with tarfile.open(archive_path, "r:xz") as tar:
+        file_names = [m.name for m in tar.getmembers() if m.isfile()]
+        config_files = [name for name in file_names if name.endswith("config.json")]
+        assert len(config_files) == 1, (
+            f"Expected 1 config.json file, found {len(config_files)}: {config_files}"
+        )
+        config_member = tar.getmember(config_files[0])
+        config_file = tar.extractfile(config_member)
+        assert config_file is not None, "Failed to extract config.json"
+        return json.load(config_file)
diff --git a/tests/nnarchive_output_checks.py b/tests/nnarchive_output_checks.py
new file mode 100644
index 0000000..129ac5f
--- /dev/null
+++ b/tests/nnarchive_output_checks.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+from copy import deepcopy
+
+V8_DETECTION_CHECK = {
+    "name": "yolov8n",
+    "version": "v8",
+    "model_outputs": ["output1_yolov6r2", "output2_yolov6r2", "output3_yolov6r2"],
+    "head_outputs": ["output1_yolov6r2", "output2_yolov6r2", "output3_yolov6r2"],
+    "yolo_outputs": ["output1_yolov6r2", "output2_yolov6r2", "output3_yolov6r2"],
+}
+
+V8_SEG_CHECK = {
+    "name": "yolov8n-seg",
+    "version": "v8",
+    "model_outputs": [
+        "output1_yolov8",
+        "output2_yolov8",
+        "output3_yolov8",
+        "output1_masks",
+        "output2_masks",
+        "output3_masks",
+        "protos_output",
+    ],
+    "head_outputs": [
+        "output1_yolov8",
+        "output2_yolov8",
+        "output3_yolov8",
+        "output1_masks",
+        "output2_masks",
+        "output3_masks",
+        "protos_output",
+    ],
+    "yolo_outputs": ["output1_yolov8", "output2_yolov8", "output3_yolov8"],
+    "mask_outputs": ["output1_masks", "output2_masks", "output3_masks"],
+}
+
+V8_POSE_CHECK = {
+    "name": "yolov8n-pose",
+    "version": "v8",
+    "model_outputs": [
+        "output1_yolov8",
+        "output2_yolov8",
+        "output3_yolov8",
+        "kpt_output1",
+        "kpt_output2",
+        "kpt_output3",
+    ],
+    "head_outputs": [
+        "output1_yolov8",
+        "output2_yolov8",
+        "output3_yolov8",
+        "kpt_output1",
+        "kpt_output2",
+        "kpt_output3",
+    ],
+    "yolo_outputs": ["output1_yolov8", "output2_yolov8", "output3_yolov8"],
+    "keypoints_outputs": ["kpt_output1", "kpt_output2", "kpt_output3"],
+}
+
+
+def _clone_check(base_case: dict, *, name: str, version: str) -> dict:
+    case = deepcopy(base_case)
+    case["name"] = name
+    case["version"] = version
+    return case
+
+
+N_VARIANT_OUTPUT_NAME_CHECKS = [
+    V8_DETECTION_CHECK,
+    V8_SEG_CHECK,
+    V8_POSE_CHECK,
+    _clone_check(V8_DETECTION_CHECK, name="yolov9t", version="v9"),
+    _clone_check(V8_DETECTION_CHECK, name="yolov11n", version="v11"),
+    _clone_check(V8_SEG_CHECK, name="yolov11n-seg", version="v11"),
+    _clone_check(V8_POSE_CHECK, name="yolov11n-pose", version="v11"),
+    _clone_check(V8_DETECTION_CHECK, name="yolov12n", version="v12"),
+    {
+        "name": "yolo26n",
+        "version": "v26",
+        "model_outputs": ["output_yolo26"],
+        "head_outputs": ["output_yolo26"],
+        "yolo_outputs": ["output_yolo26"],
+    },
+    {
+        "name": "yolo26n-seg",
+        "version": "v26",
+        "model_outputs": ["output_yolo26", "output_masks", "protos_output"],
+        "head_outputs": ["output_yolo26", "output_masks", "protos_output"],
+        "yolo_outputs": ["output_yolo26"],
+        "mask_outputs": ["output_masks"],
+    },
+    {
+        "name": "yolo26n-pose",
+        "version": "v26",
+        "model_outputs": ["output_yolo26", "kpt_output"],
+        "head_outputs": ["output_yolo26", "kpt_output"],
+        "yolo_outputs": ["output_yolo26"],
+        "keypoints_outputs": ["kpt_output"],
+    },
+]
diff --git a/tests/test_end2end.py b/tests/test_end2end.py
index 283fc84..308d051 100644
--- a/tests/test_end2end.py
+++ b/tests/test_end2end.py
@@ -6,7 +6,13 @@
 
 import pytest
 from constants import PRIVATE_TEST_MODELS, SAVE_FOLDER, TEST_MODELS
-from helper_functions import download_model, download_private_model, nn_archive_checker
+from helper_functions import (
+    download_model,
+    download_private_model,
+    load_latest_nn_archive_config,
+    nn_archive_checker,
+)
+from nnarchive_output_checks import N_VARIANT_OUTPUT_NAME_CHECKS
 
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
@@ -15,7 +21,12 @@
 @pytest.mark.parametrize(
     "model",
     TEST_MODELS,
-    ids=[model["name"] for model in TEST_MODELS],
+    ids=[
+        model.get("cli_version", model["name"])
+        if model.get("cli_version")
+        else model["name"]
+        for model in TEST_MODELS
+    ],
 )
 def test_cli_conversion(model: dict, test_config: dict, subtests):
     """Tests the whole CLI conversion flow with no extra params specified."""
@@ -50,6 +61,8 @@ def test_cli_conversion(model: dict, test_config: dict, subtests):
             pytest.skip("Weights not present and `download_weights` not set")
 
     command = ["tools", model_path]
+    if model.get("cli_version"):
+        command += ["--version", model.get("cli_version")]
     if model.get("size"):  # edge case when stride=64 is needed
         command += ["--imgsz", model.get("size")]
 
@@ -79,6 +92,65 @@ def test_cli_conversion(model: dict, test_config: dict, subtests):
             nn_archive_checker(extra_keys_to_check=extra_keys_to_check)
 
 
+@pytest.mark.parametrize(
+    "model_case",
+    N_VARIANT_OUTPUT_NAME_CHECKS,
+    ids=[model_case["name"] for model_case in N_VARIANT_OUTPUT_NAME_CHECKS],
+)
+def test_n_variant_nnarchive_outputs(model_case: dict, test_config: dict):
+    """Checks NNArchive output-related fields for selected variants."""
+    if (
+        test_config["test_case"] is not None
+        and model_case["name"] != test_config["test_case"]
+    ):
+        pytest.skip(
+            f"Test case ({model_case['name']}) doesn't match selected test case ({test_config['test_case']})"
+        )
+
+    if (
+        test_config["yolo_version"] is not None
+        and model_case["version"] != test_config["yolo_version"]
+    ):
+        pytest.skip(
+            f"Model version ({model_case['version']}) doesn't match selected version ({test_config['yolo_version']})."
+        )
+
+    model_path = os.path.join(SAVE_FOLDER, f"{model_case['name']}.pt")
+    if not os.path.exists(model_path):
+        if test_config["download_weights"]:
+            model_path = download_model(model_case["name"], SAVE_FOLDER)
+        else:
+            pytest.skip("Weights missing and `download_weights` not set")
+
+    command = ["tools", model_path]
+    result = subprocess.run(
+        command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
+    )
+    if result.returncode != 0:
+        pytest.fail(f"Exit code: {result.returncode}, Output: {result.stdout}")
+
+    cfg = load_latest_nn_archive_config()
+    output_names = [output["name"] for output in cfg["model"]["outputs"]]
+    head = cfg["model"]["heads"][0]
+    metadata = head["metadata"]
+    head_output_names = head["outputs"]
+    yolo_output_names = metadata["yolo_outputs"] or []
+    mask_output_names = metadata["mask_outputs"] or []
+    keypoint_output_names = metadata["keypoints_outputs"] or []
+
+    for key, actual in [
+        ("model_outputs", output_names),
+        ("head_outputs", head_output_names),
+        ("yolo_outputs", yolo_output_names),
+        ("mask_outputs", mask_output_names),
+        ("keypoints_outputs", keypoint_output_names),
+    ]:
+        for expected_name in model_case.get(key, []):
+            assert expected_name in actual, (
+                f"{key}: expected `{expected_name}` for {model_case['name']}, got {actual}"
+            )
+
+
 @pytest.mark.parametrize(
     "model",
     PRIVATE_TEST_MODELS,
diff --git a/tools/main.py b/tools/main.py
index a8f9717..3cf3f59 100644
--- a/tools/main.py
+++ b/tools/main.py
@@ -28,6 +28,7 @@
     YOLOV11_CONVERSION,
     YOLOV12_CONVERSION,
     YOLOV26_CONVERSION,
+    YOLOV26_NMS_CONVERSION,
     detect_version,
 )
 
@@ -50,6 +51,7 @@
     YOLOV11_CONVERSION,
     YOLOV12_CONVERSION,
     YOLOV26_CONVERSION,
+    YOLOV26_NMS_CONVERSION,
 ]
 
 
@@ -176,6 +178,7 @@ def convert(
             YOLOV9_CONVERSION,
             YOLOV11_CONVERSION,
             YOLOV12_CONVERSION,
+            YOLOV26_NMS_CONVERSION,
         ]:
             from tools.yolo.yolov8_exporter import YoloV8Exporter
 
diff --git a/tools/modules/exporter.py b/tools/modules/exporter.py
index c2825bc..5bf9c63 100644
--- a/tools/modules/exporter.py
+++ b/tools/modules/exporter.py
@@ -2,7 +2,7 @@
 
 import os
 from datetime import datetime
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import onnx
 import onnxsim
@@ -101,6 +101,44 @@ def export_onnx(self) -> os.PathLike:
 
         return self.f_onnx
 
+    @staticmethod
+    def _infer_layout_from_shape(shape: List[Any]) -> Optional[str]:
+        rank = len(shape)
+        if rank == 4:
+            return "NCHW"
+        if rank == 3:
+            return "NCD"
+        if rank == 2:
+            return "NC"
+        if rank == 1:
+            return "C"
+        return None
+
+    def get_output_specs(self) -> Dict[str, Dict[str, Any]]:
+        """Collect output shape and layout for all ONNX outputs by name."""
+        if self.f_onnx is None:
+            raise RuntimeError("ONNX must be exported before reading output specs.")
+
+        model_onnx = onnx.load(self.f_onnx)
+        specs: Dict[str, Dict[str, Any]] = {}
+
+        for output in model_onnx.graph.output:
+            shape: List[Any] = []
+            for dim in output.type.tensor_type.shape.dim:
+                if dim.HasField("dim_value"):
+                    shape.append(int(dim.dim_value))
+                elif dim.HasField("dim_param") and dim.dim_param:
+                    shape.append(dim.dim_param)
+                else:
+                    shape.append(None)
+
+            specs[output.name] = {
+                "shape": shape,
+                "layout": self._infer_layout_from_shape(shape),
+            }
+
+        return specs
+
     def make_nn_archive(
         self,
         class_list: List[str],
@@ -144,6 +182,7 @@ def make_nn_archive(
 
         if output_kwargs is None:
             output_kwargs = {}
+        output_specs = self.get_output_specs()
 
         archive = ArchiveGenerator(
             archive_name=self.model_name,
@@ -172,6 +211,8 @@ def make_nn_archive(
                         {
                             "name": output,
                             "dtype": DataType.FLOAT32,
+                            "shape": output_specs.get(output, {}).get("shape"),
+                            "layout": output_specs.get(output, {}).get("layout"),
                         }
                         for output in self.all_output_names
                     ],
diff --git a/tools/modules/heads.py b/tools/modules/heads.py
index fb5e058..5c407c9 100644
--- a/tools/modules/heads.py
+++ b/tools/modules/heads.py
@@ -366,11 +366,17 @@ def __init__(self, old_detect, use_rvc2: bool):
 
         self.use_rvc2 = use_rvc2
 
-        self.proj_conv = nn.Conv2d(old_detect.dfl.c1, 1, 1, bias=False).requires_grad_(
-            False
-        )
-        x = torch.arange(old_detect.dfl.c1, dtype=torch.float)
-        self.proj_conv.weight.data[:] = nn.Parameter(x.view(1, old_detect.dfl.c1, 1, 1))
+        # yolo26: dfl will be nn.Identity(), we set proj_conv = None and skip the DFL block in forward
+        if hasattr(old_detect.dfl, "c1"):
+            self.proj_conv = nn.Conv2d(
+                old_detect.dfl.c1, 1, 1, bias=False
+            ).requires_grad_(False)
+            x = torch.arange(old_detect.dfl.c1, dtype=torch.float)
+            self.proj_conv.weight.data[:] = nn.Parameter(
+                x.view(1, old_detect.dfl.c1, 1, 1)
+            )
+        else:
+            self.proj_conv = None
 
     def forward(self, x):
         bs = x[0].shape[0]  # batch size
@@ -382,9 +388,10 @@ def forward(self, x):
 
             # ------------------------------
             # DFL PART
-            box = box.view(bs, 4, self.reg_max, h * w).permute(0, 2, 1, 3)
-            box = self.proj_conv(F.softmax(box, dim=1))[:, 0]
-            box = box.reshape([bs, 4, h, w])
+            if self.proj_conv is not None:
+                box = box.view(bs, 4, self.reg_max, h * w).permute(0, 2, 1, 3)
+                box = self.proj_conv(F.softmax(box, dim=1))[:, 0]
+                box = box.reshape([bs, 4, h, w])
             # ------------------------------
 
             cls = self.cv3[i](x[i])
@@ -584,8 +591,10 @@ def forward(self, x):
         }
 
         dbox = self._get_decode_boxes(preds)
-        y = torch.cat((dbox, preds["scores"].sigmoid()), 1)  # (bs, 4+nc, num_anchors)
-        y = y.permute(0, 2, 1)  # (bs, num_anchors, 4+nc)
+        cls_scores = preds["scores"].sigmoid()  # (bs, nc, num_anchors)
+        conf, _ = cls_scores.max(1, keepdim=True)  # ReduceMax: (bs, 1, num_anchors)
+        y = torch.cat((dbox, conf, cls_scores), 1)  # (bs, 4+1+nc, num_anchors)
+        y = y.permute(0, 2, 1)  # (bs, num_anchors, 5+nc)
         return y
 
     def _get_decode_boxes(self, preds):
@@ -642,11 +651,12 @@ def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
 class SegmentV26(DetectV26):
     """YOLOv26 Segment head for end-to-end NMS-free instance segmentation models.
 
-    Outputs decoded boxes, class scores, mask coefficients (separate), and prototype masks.
+    Outputs decoded boxes, confidence, class scores, mask coefficients (separate), and prototype masks.
 
     Output format:
-        - detections: (batch, num_anchors, 4 + nc)
+        - detections: (batch, num_anchors, 5 + nc)
             - 4: decoded bbox coordinates (x1, y1, x2, y2) in pixel space
+            - 1: confidence score (ReduceMax over class scores)
             - nc: class scores (sigmoided)
         - mask_coeffs: (batch, num_anchors, nm)
             - nm: mask coefficients (raw, to be used with protos)
@@ -681,7 +691,7 @@ def forward(self, x):
 
         Returns:
             Tuple of:
-                - detections: (batch, num_anchors, 4 + nc)
+                - detections: (batch, num_anchors, 5 + nc)
                 - mask_coeffs: (batch, num_anchors, nm)
                 - protos: (batch, nm, proto_h, proto_w)
         """
@@ -712,9 +722,11 @@ def forward(self, x):
         # Decode boxes to pixel coordinates
         dbox = self._get_decode_boxes(preds)
 
-        # Detection output: boxes (4) + class scores (nc)
-        y = torch.cat((dbox, preds["scores"].sigmoid()), 1)  # (bs, 4+nc, num_anchors)
-        y = y.permute(0, 2, 1)  # (bs, num_anchors, 4+nc)
+        # Detection output: boxes (4) + confidence (1) + class scores (nc)
+        cls_scores = preds["scores"].sigmoid()  # (bs, nc, num_anchors)
+        conf, _ = cls_scores.max(1, keepdim=True)  # ReduceMax: (bs, 1, num_anchors)
+        y = torch.cat((dbox, conf, cls_scores), 1)  # (bs, 4+1+nc, num_anchors)
+        y = y.permute(0, 2, 1)  # (bs, num_anchors, 5+nc)
 
         # Mask coefficients output (separate)
         mask_coeffs_cat = torch.cat(mask_coeffs, dim=2)  # (bs, nm, num_anchors)
@@ -738,11 +750,12 @@ def _get_proto(self, x):
 class PoseV26(DetectV26):
     """YOLOv26 Pose head for end-to-end NMS-free pose estimation models.
 
-    Outputs decoded boxes, class scores, and decoded keypoints (separate).
+    Outputs decoded boxes, confidence, class scores, and decoded keypoints (separate).
 
     Output format:
-        - detections: (batch, num_anchors, 4 + nc)
+        - detections: (batch, num_anchors, 5 + nc)
             - 4: decoded bbox coordinates (x1, y1, x2, y2) in pixel space
+            - 1: confidence score (ReduceMax over class scores)
             - nc: class scores (sigmoided)
         - keypoints: (batch, num_anchors, nk)
             - nk: keypoint values (x, y, [visibility]) for each keypoint
@@ -774,7 +787,7 @@ def forward(self, x):
 
         Returns:
             Tuple of:
-                - detections: (batch, num_anchors, 4 + nc)
+                - detections: (batch, num_anchors, 5 + nc)
                 - keypoints: (batch, num_anchors, nk)
         """
         bs = x[0].shape[0]  # batch size
@@ -806,9 +819,11 @@ def forward(self, x):
         # from the parent DetectV26
         dbox = self._get_decode_boxes(preds)
 
-        # Detection output: boxes (4) + class scores (nc)
-        y = torch.cat((dbox, preds["scores"].sigmoid()), 1)  # (bs, 4+nc, num_anchors)
-        y = y.permute(0, 2, 1)  # (bs, num_anchors, 4+nc)
+        # Detection output: boxes (4) + confidence (1) + class scores (nc)
+        cls_scores = preds["scores"].sigmoid()  # (bs, nc, num_anchors)
+        conf, _ = cls_scores.max(1, keepdim=True)  # ReduceMax: (bs, 1, num_anchors)
+        y = torch.cat((dbox, conf, cls_scores), 1)  # (bs, 4+1+nc, num_anchors)
+        y = y.permute(0, 2, 1)  # (bs, num_anchors, 5+nc)
 
         # Decode and concatenate keypoints
         # Note: After _get_decode_boxes, self.anchors is (2, A) and self.strides is (1, A)
diff --git a/tools/version_detection/__init__.py b/tools/version_detection/__init__.py
index 0d097e4..81d7129 100644
--- a/tools/version_detection/__init__.py
+++ b/tools/version_detection/__init__.py
@@ -13,6 +13,7 @@
     YOLOV11_CONVERSION,
     YOLOV12_CONVERSION,
     YOLOV26_CONVERSION,
+    YOLOV26_NMS_CONVERSION,
     detect_version,
 )
 
@@ -30,6 +31,7 @@
     "YOLOV11_CONVERSION",
     "YOLOV12_CONVERSION",
     "YOLOV26_CONVERSION",
+    "YOLOV26_NMS_CONVERSION",
     "GOLD_YOLO_CONVERSION",
     "UNRECOGNIZED",
 ]
diff --git a/tools/version_detection/version_detection.py b/tools/version_detection/version_detection.py
index 5ad5d82..710df49 100644
--- a/tools/version_detection/version_detection.py
+++ b/tools/version_detection/version_detection.py
@@ -18,6 +18,7 @@
 YOLOV11_CONVERSION = "yolov11"
 YOLOV12_CONVERSION = "yolov12"
 YOLOV26_CONVERSION = "yolov26"
+YOLOV26_NMS_CONVERSION = "yolov26_nms"
 GOLD_YOLO_CONVERSION = "goldyolo"
 UNRECOGNIZED = "none"
 
diff --git a/tools/yolo/yolo26_exporter.py b/tools/yolo/yolo26_exporter.py
index 0284697..8b5267f 100644
--- a/tools/yolo/yolo26_exporter.py
+++ b/tools/yolo/yolo26_exporter.py
@@ -12,7 +12,8 @@
 
 current_dir = os.path.dirname(os.path.abspath(__file__))
 yolo_path = os.path.join(current_dir, "ultralytics")
-sys.path.append(yolo_path)
+if yolo_path not in sys.path:
+    sys.path.insert(0, yolo_path)
 
 from ultralytics.nn.modules import (  # noqa: E402
     Detect,
@@ -30,11 +31,11 @@
 
 def get_output_names(mode: int):
     if mode == DETECT_MODE:
-        return ["output"]
+        return ["output_yolo26"]
     elif mode == SEGMENT_MODE:
-        return ["output", "mask_output", "protos_output"]
+        return ["output_yolo26", "output_masks", "protos_output"]
     elif mode == POSE_MODE:
-        return ["output", "kpt_output"]
+        return ["output_yolo26", "kpt_output"]
     else:
         logger.warning("Unsupported task type for YOLO26, conversion may fail")
         return ["output"]
@@ -42,7 +43,7 @@ def get_output_names(mode: int):
 
 def get_yolo_output_names(mode: int = 0):
     # For now, yolo output names doesn't differ based on mode because we no longer extract 3 outputs from FPN
-    return ["output"]
+    return ["output_yolo26"]
 
 
 class Yolo26Exporter(Exporter):
@@ -52,7 +53,7 @@ def __init__(self, model_path: str, imgsz: Tuple[int, int], use_rvc2: bool):
             imgsz,
             use_rvc2,
             subtype="yolo26",
-            output_names=["output"],
+            output_names=["output_yolo26"],
         )
         self.load_model()
 
@@ -128,11 +129,11 @@ def export_nn_archive(
             self.make_nn_archive(
                 class_list=names,
                 n_classes=self.model.model[-1].nc,
-                parser="YOLOExtendedParser",
                 n_prototypes=self.model.model[-1].nm,
+                parser="YOLOExtendedParser",
                 is_softmax=False,  # E2E outputs are already sigmoided
                 output_kwargs={
-                    "mask_outputs": ["mask_output"],
+                    "mask_outputs": ["output_masks"],
                     "protos_outputs": "protos_output",
                 },
                 encoding=encoding,
diff --git a/tools/yolo/yolov8_exporter.py b/tools/yolo/yolov8_exporter.py
index 29db152..7048cc4 100644
--- a/tools/yolo/yolov8_exporter.py
+++ b/tools/yolo/yolov8_exporter.py
@@ -119,9 +119,16 @@ def __init__(
     def load_model(self):
         # load the model
         model, _ = load_checkpoint(
-            self.model_path, device="cpu", inplace=True, fuse=True
+            self.model_path, device="cpu", inplace=True, fuse=False
         )
 
+        # for yolo26 end2end has to be disabled before fusing
+        # otherwise cv2/cv3 are removed in the fuse process
+        head = model.model[-1]
+        if getattr(head, "end2end", False):
+            head.end2end = False
+        model.fuse()
+
         self.mode = -1
         if isinstance(model.model[-1], (Segment)) or isinstance(
             model.model[-1], (YOLOESegment)
@@ -241,7 +248,13 @@ def export_nn_archive(
                 n_classes=self.model.model[-1].nc,
                 parser="YOLOExtendedParser",
                 n_keypoints=self.model.model[-1].kpt_shape[0],
-                output_kwargs={"keypoints_outputs": ["kpt_output"]},
+                output_kwargs={
+                    "keypoints_outputs": [
+                        "kpt_output1",
+                        "kpt_output2",
+                        "kpt_output3",
+                    ]
+                },
                 encoding=encoding,
             )
         elif self.mode == CLASSIFY_MODE:
@@ -259,6 +272,8 @@ def make_cls_nn_archive(
             n_classes (int): Number of classes
             encoding (Encoding): Color encoding used in the input model. Defaults to RGB.
         """
+        output_specs = self.get_output_specs()
+
         archive = ArchiveGenerator(
             archive_name=self.model_name,
             save_path=str(self.output_folder),
@@ -286,6 +301,8 @@ def make_cls_nn_archive(
                         {
                             "name": output,
                             "dtype": DataType.FLOAT32,
+                            "shape": output_specs.get(output, {}).get("shape"),
+                            "layout": output_specs.get(output, {}).get("layout"),
                         }
                         for output in self.all_output_names
                     ],