From fd0f2eb04c1fd4a4d9f79583cca93fae31528664 Mon Sep 17 00:00:00 2001
From: Eduardo Pacheco <eduardo@argmaxinc.com>
Date: Mon, 16 Feb 2026 12:22:15 -0300
Subject: [PATCH 1/8] feat: Argmax Sortformer diarization pipelines

---
 .../pipeline/diarization/speakerkit.py        | 117 ++++++++++++++----
 src/openbench/pipeline/pipeline_aliases.py    |  17 ++-
 2 files changed, 106 insertions(+), 28 deletions(-)

diff --git a/src/openbench/pipeline/diarization/speakerkit.py b/src/openbench/pipeline/diarization/speakerkit.py
index 3718cb3..24404c4 100644
--- a/src/openbench/pipeline/diarization/speakerkit.py
+++ b/src/openbench/pipeline/diarization/speakerkit.py
@@ -8,7 +8,7 @@
 from typing import Callable, Literal, TypedDict
 
 from argmaxtools.utils import get_logger
-from pydantic import Field
+from pydantic import Field, model_validator
 
 from ...dataset import DiarizationSample
 from ...pipeline_prediction import DiarizationAnnotation
@@ -23,48 +23,113 @@
 TEMP_AUDIO_DIR = Path("audio_temp")
 
 
-class SpeakerKitPipelineConfig(DiarizationPipelineConfig):
-    cli_path: str = Field(..., description="The absolute path to the SpeakerKit CLI")
-    clusterer_version: Literal["pyannote3", "pyannote4"] = Field(
-        "pyannote4", description="The version of the clusterer to use"
-    )
-    model_path: str | None = Field(None, description="The absolute path to the SpeakerKit model")
-
-
 class SpeakerKitInput(TypedDict):
     audio_path: Path
     output_path: Path
     num_speakers: int | None
 
 
-class SpeakerKitCli:
-    def __init__(self, config: SpeakerKitPipelineConfig):
-        self.cli_path = config.cli_path
-        self.model_path = config.model_path
-        self.clusterer_version = config.clusterer_version
-
-    def __call__(self, speakerkit_input: SpeakerKitInput) -> tuple[Path, float]:
+class SpeakerKitPipelineConfig(DiarizationPipelineConfig):
+    cli_path: str = Field(..., description="The absolute path to the SpeakerKit CLI")
+    model_path: str | None = Field(None, description="The absolute path to the SpeakerKit model directory")
+    clusterer_version: Literal["pyannote3", "pyannote4"] | None = Field(
+        None, description="The version of the clusterer to use"
+    )
+    sortformer_model_name: str | None = Field(None, description="The name of the Sortformer model to use")
+    sortformer_model_variant: str | None = Field(None, description="The variant of the Sortformer model to use")
+
+    @model_validator(mode="after")
+    def validate_sortformer_model(self) -> "SpeakerKitPipelineConfig":
+        if self.sortformer_model_name is not None and self.sortformer_model_variant is None:
+            raise ValueError(
+                "If `sortformer_model_name` is provided, `sortformer_model_variant` must also be provided"
+            )
+
+        if self.sortformer_model_name is None and self.sortformer_model_variant is not None:
+            raise ValueError(
+                "If `sortformer_model_variant` is provided, `sortformer_model_name` must also be provided"
+            )
+
+        return self
+
+    @model_validator(mode="after")
+    def validate_model_options(self) -> "SpeakerKitPipelineConfig":
+        if (
+            self.clusterer_version is None
+            and self.sortformer_model_name is None
+            and self.sortformer_model_variant is None
+        ):
+            raise ValueError(
+                "At least one of `clusterer_version`, `sortformer_model_name`, or `sortformer_model_variant` must be provided"
+            )
+
+        if (
+            self.clusterer_version is not None
+            and self.sortformer_model_name is not None
+            and self.sortformer_model_variant is not None
+        ):
+            raise ValueError(
+                "Only one of `clusterer_version`, `sortformer_model_name`, or `sortformer_model_variant` can be provided"
+            )
+
+        return self
+
+    @property
+    def is_sortformer(self) -> bool:
+        return self.clusterer_version is None
+
+    def generate_cli_args(self, inputs: SpeakerKitInput) -> list[str]:
         cmd = [
             self.cli_path,
             "diarize",
             "--audio-path",
-            str(speakerkit_input["audio_path"]),
+            str(inputs["audio_path"]),
             "--rttm-path",
-            str(speakerkit_input["output_path"]),
-            "--clusterer-version",
-            self.clusterer_version,
+            str(inputs["output_path"]),
             "--verbose",
         ]
 
-        if self.model_path:
+        if self.clusterer_version is not None:
+            cmd.extend(["--clusterer-version", self.clusterer_version])
+        elif self.sortformer_model_name is not None and self.sortformer_model_variant is not None:
+            cmd.extend(
+                [
+                    "--sortformer-model-name",
+                    self.sortformer_model_name,
+                    "--sortformer-model-variant",
+                    self.sortformer_model_variant,
+                ]
+            )
+
+        if self.model_path is not None:
             cmd.extend(["--model-path", self.model_path])
 
-        if speakerkit_input["num_speakers"] is not None:
-            cmd.extend(["--num-speakers", str(speakerkit_input["num_speakers"])])
+        if inputs["num_speakers"] is not None:
+            if self.is_sortformer:
+                logger.warning("`num_speakers` is not supported for Sortformer. Ignoring...")
+            else:
+                cmd.extend(["--num-speakers", str(inputs["num_speakers"])])
 
         if "SPEAKERKIT_API_KEY" in os.environ:
             cmd.extend(["--api-key", os.environ["SPEAKERKIT_API_KEY"]])
 
+        return cmd
+
+
+def parse_stdout(stdout: str) -> float:
+    pattern = r"Model Load Time:\s+\d+\.\d+\s+ms\nTotal Time:\s+(\d+\.\d+)\s+ms"
+    matches = re.search(pattern, stdout)
+    total_time = float(matches.group(1))
+    return total_time / 1000
+
+
+class SpeakerKitCli:
+    def __init__(self, config: SpeakerKitPipelineConfig):
+        self.config = config
+
+    def __call__(self, speakerkit_input: SpeakerKitInput) -> tuple[Path, float]:
+        cmd = self.config.generate_cli_args(speakerkit_input)
+
         try:
             result = subprocess.run(cmd, check=True, capture_output=True, text=True)
             logger.debug(f"Diarization CLI stdout:\n{result.stdout}")
@@ -81,11 +146,9 @@ def __call__(self, speakerkit_input: SpeakerKitInput) -> tuple[Path, float]:
         speakerkit_input["audio_path"].unlink()
 
         # Parse stdout and take the total time it took to diarize
-        pattern = r"Model Load Time:\s+\d+\.\d+\s+ms\nTotal Time:\s+(\d+\.\d+)\s+ms"
-        matches = re.search(pattern, result.stdout)
-        total_time = float(matches.group(1))
+        total_time = parse_stdout(result.stdout)
 
-        return speakerkit_input["output_path"], total_time / 1000
+        return speakerkit_input["output_path"], total_time
 
 
 @register_pipeline
diff --git a/src/openbench/pipeline/pipeline_aliases.py b/src/openbench/pipeline/pipeline_aliases.py
index 97287b0..83a77e8 100644
--- a/src/openbench/pipeline/pipeline_aliases.py
+++ b/src/openbench/pipeline/pipeline_aliases.py
@@ -114,7 +114,22 @@ def register_pipeline_aliases() -> None:
             "cli_path": os.getenv("SPEAKERKIT_CLI_PATH"),
             "clusterer_version": "pyannote4",
         },
-        description="SpeakerKit speaker diarization pipeline. Requires CLI installation and API key. Set `SPEAKERKIT_CLI_PATH` and `SPEAKERKIT_API_KEY` env vars. For access to the CLI binary contact speakerkitpro@argmaxinc.com",
+        description="SpeakerKit speaker diarization pipeline using community-1 model from pyannote. Requires CLI installation and API key. Set `SPEAKERKIT_CLI_PATH` and `SPEAKERKIT_API_KEY` env vars. For access to the CLI binary contact speakerkitpro@argmaxinc.com",
+    )
+
+    PipelineRegistry.register_alias(
+        "speakerkit-sortformer-compressed",
+        SpeakerKitPipeline,
+        default_config={
+            "out_dir": "./speakerkit-sortformer-report",
+            "cli_path": os.getenv("SPEAKERKIT_CLI_PATH"),
+            "sortformer_model_name": "v2-1",
+            "sortformer_model_variant": "384_94MB",
+        },
+        description=(
+            "SpeakerKit speaker diarization pipeline using Sortformer model compressed to 94MB. Requires CLI installation and API key. "
+            "Set `SPEAKERKIT_CLI_PATH` and `SPEAKERKIT_API_KEY` env vars. For access to the CLI binary contact speakerkitpro@argmaxinc.com."
+        ),
     )
 
     PipelineRegistry.register_alias(

From f7b86345fa8ca98f585fb0cc4a99a852779adeb6 Mon Sep 17 00:00:00 2001
From: Eduardo Pacheco <eduardo@argmaxinc.com>
Date: Mon, 16 Feb 2026 13:43:26 -0300
Subject: [PATCH 2/8] refactor: sortformer orchestration

---
 src/openbench/engine/whisperkitpro_engine.py  | 11 +++-
 .../orchestration_whisperkitpro.py            | 17 +++--
 src/openbench/pipeline/pipeline_aliases.py    | 66 ++++++++++++++-----
 3 files changed, 69 insertions(+), 25 deletions(-)

diff --git a/src/openbench/engine/whisperkitpro_engine.py b/src/openbench/engine/whisperkitpro_engine.py
index 0103320..b8bb0d7 100644
--- a/src/openbench/engine/whisperkitpro_engine.py
+++ b/src/openbench/engine/whisperkitpro_engine.py
@@ -90,7 +90,11 @@ class WhisperKitProConfig(BaseModel):
         False,
         description="Whether to perform diarization",
     )
-    orchestration_strategy: Literal["word", "segment"] = Field(
+    diarization_mode: Literal["realtime", "prerecorded"] = Field(
+        "prerecorded",
+        description="Sortformer streaming mode: `realtime` (1.04s latency) or `prerecorded` (9.84s latency). This is only applicable when `clusterer_version` is `sortformer`.",
+    )
+    orchestration_strategy: Literal["word", "segment", "subsegment"] = Field(
         "segment",
         description="The orchestration strategy to use either `word` or `segment`",
     )
@@ -98,9 +102,9 @@ class WhisperKitProConfig(BaseModel):
         None,
         description="The path to the speaker models directory",
     )
-    clusterer_version: Literal["pyannote3", "pyannote4"] = Field(
+    clusterer_version: Literal["pyannote3", "pyannote4", "sortformer"] = Field(
         "pyannote4",
-        description="The version of the clusterer to use",
+        description="The version of the clusterer to use. If `sortformer` is the diarization model used is Sortformer, otherwise it is pyannote.",
     )
     use_exclusive_reconciliation: bool = Field(
         False,
@@ -174,6 +178,7 @@ def generate_cli_args(self, model_path: Path | None = None) -> list[str]:
             # Add rttm path
             args.extend(["--rttm-path", self.rttm_path])
             args.extend(["--clusterer-version", self.clusterer_version])
+            args.extend(["--diarization-mode", self.diarization_mode])
             # If speaker models path is provided use it
             if self.speaker_models_path:
                 args.extend(["--speaker-models-path", self.speaker_models_path])
diff --git a/src/openbench/pipeline/orchestration/orchestration_whisperkitpro.py b/src/openbench/pipeline/orchestration/orchestration_whisperkitpro.py
index 5e88083..6680438 100644
--- a/src/openbench/pipeline/orchestration/orchestration_whisperkitpro.py
+++ b/src/openbench/pipeline/orchestration/orchestration_whisperkitpro.py
@@ -69,13 +69,17 @@ class WhisperKitProOrchestrationConfig(OrchestrationConfig):
         ComputeUnit.CPU_AND_NE,
         description="The compute units to use for the text decoder. Default is CPU_AND_NE.",
     )
-    orchestration_strategy: Literal["word", "segment"] = Field(
-        "segment",
-        description="The orchestration strategy to use either `word` or `segment`",
+    orchestration_strategy: Literal["word", "segment", "subsegment"] = Field(
+        "subsegment",
+        description="The orchestration strategy to use either `word`, `segment` or `subsegment`",
     )
-    clusterer_version: Literal["pyannote3", "pyannote4"] = Field(
+    clusterer_version: Literal["pyannote3", "pyannote4", "sortformer"] = Field(
         "pyannote4",
-        description="The version of the clusterer to use",
+        description="The version of the clusterer to use. If `sortformer` is the diarization model used is Sortformer, otherwise it is pyannote.",
+    )
+    diarization_mode: Literal["realtime", "prerecorded"] = Field(
+        "prerecorded",
+        description="Sortformer streaming mode: `realtime` (1.04s latency) or `prerecorded` (9.84s latency). This is only applicable when `clusterer_version` is `sortformer`.",
     )
     use_exclusive_reconciliation: bool = Field(
         False,
@@ -107,7 +111,8 @@ def build_pipeline(self) -> WhisperKitPro:
             chunking_strategy="vad",
             diarization=True,
             orchestration_strategy=self.config.orchestration_strategy,
-            clusterer_version_string=self.config.clusterer_version,
+            clusterer_version=self.config.clusterer_version,
+            diarization_mode=self.config.diarization_mode,
             use_exclusive_reconciliation=self.config.use_exclusive_reconciliation,
             fast_load=self.config.fast_load,
         )
diff --git a/src/openbench/pipeline/pipeline_aliases.py b/src/openbench/pipeline/pipeline_aliases.py
index 83a77e8..171aee2 100644
--- a/src/openbench/pipeline/pipeline_aliases.py
+++ b/src/openbench/pipeline/pipeline_aliases.py
@@ -218,8 +218,8 @@ def register_pipeline_aliases() -> None:
             "repo_id": "argmaxinc/whisperkit-pro",
             "model_variant": "openai_whisper-tiny",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
-            "orchestration_strategy": "segment",
-            "clusterer_version_string": "pyannote4",
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "pyannote4",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the tiny version of the model. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -232,8 +232,8 @@ def register_pipeline_aliases() -> None:
             "repo_id": "argmaxinc/whisperkit-pro",
             "model_variant": "openai_whisper-large-v3",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
-            "orchestration_strategy": "segment",
-            "clusterer_version_string": "pyannote4",
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "pyannote4",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the large-v3 version of the model. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -246,8 +246,8 @@ def register_pipeline_aliases() -> None:
             "repo_id": "argmaxinc/whisperkit-pro",
             "model_variant": "openai_whisper-large-v3-v20240930",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
-            "orchestration_strategy": "segment",
-            "clusterer_version_string": "pyannote4",
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "pyannote4",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the large-v3-v20240930 version of the model (which is the same as large-v3-turbo from OpenAI). Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -260,8 +260,8 @@ def register_pipeline_aliases() -> None:
             "repo_id": "argmaxinc/whisperkit-pro",
             "model_variant": "openai_whisper-large-v3-v20240930_626MB",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
-            "orchestration_strategy": "segment",
-            "clusterer_version_string": "pyannote4",
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "pyannote4",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the large-v3-v20240930 version of the model compressed to 626MB (which is the same as large-v3-turbo from OpenAI). Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -274,8 +274,8 @@ def register_pipeline_aliases() -> None:
             "repo_id": "argmaxinc/parakeetkit-pro",
             "model_variant": "nvidia_parakeet-v2",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
-            "orchestration_strategy": "segment",
-            "clusterer_version_string": "pyannote4",
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "pyannote4",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the parakeet-v2 version of the model. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -288,13 +288,30 @@ def register_pipeline_aliases() -> None:
             "repo_id": "argmaxinc/parakeetkit-pro",
             "model_variant": "nvidia_parakeet-v2_476MB",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
-            "orchestration_strategy": "segment",
-            "clusterer_version_string": "pyannote4",
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "pyannote4",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the parakeet-v2 version of the model compressed to 476MB. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
     )
 
+    PipelineRegistry.register_alias(
+        "whisperkitpro-orchestration-parakeet-v2-compressed-sortformer-compressed",
+        WhisperKitProOrchestrationPipeline,
+        default_config={
+            "repo_id": "argmaxinc/parakeetkit-pro",
+            "model_variant": "nvidia_parakeet-v2_476MB",
+            "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "sortformer",
+            "diarization_mode": "prerecorded",
+        },
+        description=(
+            "WhisperKitPro orchestration pipeline using the parakeet-v2 version of the model compressed to 476MB and using Sortformer for diarization. "
+            "Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var."
+        ),
+    )
+
     PipelineRegistry.register_alias(
         "whisperkitpro-orchestration-parakeet-v3",
         WhisperKitProOrchestrationPipeline,
@@ -302,8 +319,8 @@ def register_pipeline_aliases() -> None:
             "repo_id": "argmaxinc/parakeetkit-pro",
             "model_variant": "nvidia_parakeet-v3",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
-            "orchestration_strategy": "segment",
-            "clusterer_version_string": "pyannote4",
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "pyannote4",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the parakeet-v3 version of the model. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -316,13 +333,30 @@ def register_pipeline_aliases() -> None:
             "repo_id": "argmaxinc/parakeetkit-pro",
             "model_variant": "nvidia_parakeet-v3_494MB",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
-            "orchestration_strategy": "segment",
-            "clusterer_version_string": "pyannote4",
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "pyannote4",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the parakeet-v3 version of the model compressed to 494MB. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
     )
 
+    PipelineRegistry.register_alias(
+        "whisperkitpro-orchestration-parakeet-v3-compressed-sortformer-compressed",
+        WhisperKitProOrchestrationPipeline,
+        default_config={
+            "repo_id": "argmaxinc/parakeetkit-pro",
+            "model_variant": "nvidia_parakeet-v3_494MB",
+            "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
+            "orchestration_strategy": "subsegment",
+            "clusterer_version": "sortformer",
+            "diarization_mode": "prerecorded",
+        },
+        description=(
+            "WhisperKitPro orchestration pipeline using the parakeet-v3 version of the model compressed to 494MB and using Sortformer for diarization. "
+            "Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var."
+        ),
+    )
+
     PipelineRegistry.register_alias(
         "openai-orchestration",
         OpenAIOrchestrationPipeline,

From a6dc656f6120f77e87efc4b6de834b5eb69c1ae4 Mon Sep 17 00:00:00 2001
From: Eduardo Pacheco <eduardo@argmaxinc.com>
Date: Mon, 16 Feb 2026 16:02:34 -0300
Subject: [PATCH 3/8] fix: sortformer usage with speakerkit

---
 .../pipeline/diarization/speakerkit.py        | 55 +++++++------------
 src/openbench/pipeline/pipeline_aliases.py    |  3 +-
 2 files changed, 23 insertions(+), 35 deletions(-)

diff --git a/src/openbench/pipeline/diarization/speakerkit.py b/src/openbench/pipeline/diarization/speakerkit.py
index 24404c4..805e9c4 100644
--- a/src/openbench/pipeline/diarization/speakerkit.py
+++ b/src/openbench/pipeline/diarization/speakerkit.py
@@ -32,8 +32,8 @@ class SpeakerKitInput(TypedDict):
 class SpeakerKitPipelineConfig(DiarizationPipelineConfig):
     cli_path: str = Field(..., description="The absolute path to the SpeakerKit CLI")
     model_path: str | None = Field(None, description="The absolute path to the SpeakerKit model directory")
-    clusterer_version: Literal["pyannote3", "pyannote4"] | None = Field(
-        None, description="The version of the clusterer to use"
+    clusterer_version: Literal["pyannote3", "pyannote4", "sortformer"] = Field(
+        "pyannote4", description="The version of the clusterer to use"
     )
     sortformer_model_name: str | None = Field(None, description="The name of the Sortformer model to use")
     sortformer_model_variant: str | None = Field(None, description="The variant of the Sortformer model to use")
@@ -52,31 +52,9 @@ def validate_sortformer_model(self) -> "SpeakerKitPipelineConfig":
 
         return self
 
-    @model_validator(mode="after")
-    def validate_model_options(self) -> "SpeakerKitPipelineConfig":
-        if (
-            self.clusterer_version is None
-            and self.sortformer_model_name is None
-            and self.sortformer_model_variant is None
-        ):
-            raise ValueError(
-                "At least one of `clusterer_version`, `sortformer_model_name`, or `sortformer_model_variant` must be provided"
-            )
-
-        if (
-            self.clusterer_version is not None
-            and self.sortformer_model_name is not None
-            and self.sortformer_model_variant is not None
-        ):
-            raise ValueError(
-                "Only one of `clusterer_version`, `sortformer_model_name`, or `sortformer_model_variant` can be provided"
-            )
-
-        return self
-
     @property
     def is_sortformer(self) -> bool:
-        return self.clusterer_version is None
+        return self.clusterer_version == "sortformer"
 
     def generate_cli_args(self, inputs: SpeakerKitInput) -> list[str]:
         cmd = [
@@ -86,12 +64,13 @@ def generate_cli_args(self, inputs: SpeakerKitInput) -> list[str]:
             str(inputs["audio_path"]),
             "--rttm-path",
             str(inputs["output_path"]),
+            "--clusterer-version",
+            self.clusterer_version,
             "--verbose",
         ]
 
-        if self.clusterer_version is not None:
-            cmd.extend(["--clusterer-version", self.clusterer_version])
-        elif self.sortformer_model_name is not None and self.sortformer_model_variant is not None:
+        # Only check variant as we already checked both should be provided
+        if self.sortformer_model_variant is not None:
             cmd.extend(
                 [
                     "--sortformer-model-name",
@@ -115,12 +94,20 @@ def generate_cli_args(self, inputs: SpeakerKitInput) -> list[str]:
 
         return cmd
 
+    def parse_stdout(self, stdout: str) -> float:
+        # Default pattern for pyannote models
+        pattern = r"Model Load Time:\s+\d+\.\d+\s+ms\nTotal Time:\s+(\d+\.\d+)\s+ms"
+        divisor = 1000.0
+
+        # if model is sortfomer we override the pattern and divisor
+        if self.is_sortformer:
+            pattern = r"Prediction time:\s+(\d+\.\d+)\s+seconds"
+            divisor = 1.0
 
-def parse_stdout(stdout: str) -> float:
-    pattern = r"Model Load Time:\s+\d+\.\d+\s+ms\nTotal Time:\s+(\d+\.\d+)\s+ms"
-    matches = re.search(pattern, stdout)
-    total_time = float(matches.group(1))
-    return total_time / 1000
+        matches = re.search(pattern, stdout)
+        if matches is None:
+            raise ValueError(f"Could not parse prediction time from stdout: {stdout!r}")
+        return float(matches.group(1)) / divisor
 
 
 class SpeakerKitCli:
@@ -146,7 +133,7 @@ def __call__(self, speakerkit_input: SpeakerKitInput) -> tuple[Path, float]:
         speakerkit_input["audio_path"].unlink()
 
         # Parse stdout and take the total time it took to diarize
-        total_time = parse_stdout(result.stdout)
+        total_time = self.config.parse_stdout(result.stdout)
 
         return speakerkit_input["output_path"], total_time
 
diff --git a/src/openbench/pipeline/pipeline_aliases.py b/src/openbench/pipeline/pipeline_aliases.py
index 171aee2..a8193aa 100644
--- a/src/openbench/pipeline/pipeline_aliases.py
+++ b/src/openbench/pipeline/pipeline_aliases.py
@@ -123,8 +123,9 @@ def register_pipeline_aliases() -> None:
         default_config={
             "out_dir": "./speakerkit-sortformer-report",
             "cli_path": os.getenv("SPEAKERKIT_CLI_PATH"),
-            "sortformer_model_name": "v2-1",
             "sortformer_model_variant": "384_94MB",
+            "sortformer_model_name": "sortformer",
+            "clusterer_version": "sortformer",
         },
         description=(
             "SpeakerKit speaker diarization pipeline using Sortformer model compressed to 94MB. Requires CLI installation and API key. "

From abe70cb9610feb3fcad6469066a136cf5ca9223a Mon Sep 17 00:00:00 2001
From: Eduardo Pacheco <eduardo@argmaxinc.com>
Date: Mon, 16 Feb 2026 16:10:19 -0300
Subject: [PATCH 4/8] refactor: add verbose to whisperkitpro

---
 src/openbench/engine/whisperkitpro_engine.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/openbench/engine/whisperkitpro_engine.py b/src/openbench/engine/whisperkitpro_engine.py
index b8bb0d7..e1b689e 100644
--- a/src/openbench/engine/whisperkitpro_engine.py
+++ b/src/openbench/engine/whisperkitpro_engine.py
@@ -162,6 +162,7 @@ def generate_cli_args(self, model_path: Path | None = None) -> list[str]:
                 COMPUTE_UNITS_MAPPER[self.text_decoder_compute_units],
                 "--fast-load",
                 str(self.fast_load).lower(),
+                "--verbose",
             ]
         )
 

From 2685d017308f1d001b7dcd53df1c7ad44f548648 Mon Sep 17 00:00:00 2001
From: Eduardo Pacheco <eduardo@argmaxinc.com>
Date: Fri, 20 Feb 2026 15:09:06 -0300
Subject: [PATCH 5/8] refactor: only add --diarization-mode for sortformer

---
 src/openbench/engine/whisperkitpro_engine.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/openbench/engine/whisperkitpro_engine.py b/src/openbench/engine/whisperkitpro_engine.py
index e1b689e..0b7a238 100644
--- a/src/openbench/engine/whisperkitpro_engine.py
+++ b/src/openbench/engine/whisperkitpro_engine.py
@@ -176,10 +176,15 @@ def generate_cli_args(self, model_path: Path | None = None) -> list[str]:
         if self.diarization:
             args.extend(["--diarization"])
             args.extend(["--orchestration-strategy", self.orchestration_strategy])
+
             # Add rttm path
             args.extend(["--rttm-path", self.rttm_path])
             args.extend(["--clusterer-version", self.clusterer_version])
-            args.extend(["--diarization-mode", self.diarization_mode])
+
+            # Only add diarization mode if using Sortformer
+            if self.clusterer_version == "sortformer":
+                args.extend(["--diarization-mode", self.diarization_mode])
+
             # If speaker models path is provided use it
             if self.speaker_models_path:
                 args.extend(["--speaker-models-path", self.speaker_models_path])

From edd30ad206c12c6aa45ba7ea6e949bb2aa542cb4 Mon Sep 17 00:00:00 2001
From: Eduardo Pacheco <eduardo@argmaxinc.com>
Date: Tue, 10 Mar 2026 21:38:37 -0300
Subject: [PATCH 6/8] refactor: CLI args

---
 src/openbench/engine/whisperkitpro_engine.py  | 20 ++++-----
 .../pipeline/diarization/speakerkit.py        | 41 ++-----------------
 .../orchestration_whisperkitpro.py            | 14 +++----
 src/openbench/pipeline/pipeline_aliases.py    | 26 ++++++------
 4 files changed, 33 insertions(+), 68 deletions(-)

diff --git a/src/openbench/engine/whisperkitpro_engine.py b/src/openbench/engine/whisperkitpro_engine.py
index 0b7a238..69a1d00 100644
--- a/src/openbench/engine/whisperkitpro_engine.py
+++ b/src/openbench/engine/whisperkitpro_engine.py
@@ -83,7 +83,7 @@ class WhisperKitProConfig(BaseModel):
         description="The compute units to use for the audio encoder. Default is CPU_AND_NE.",
     )
     text_decoder_compute_units: ct.ComputeUnit = Field(
-        ct.ComputeUnit.CPU_AND_GPU,
+        ct.ComputeUnit.CPU_AND_NE,
         description="The compute units to use for the text decoder. Default is CPU_AND_GPU.",
     )
     diarization: bool = Field(
@@ -92,19 +92,19 @@ class WhisperKitProConfig(BaseModel):
     )
     diarization_mode: Literal["realtime", "prerecorded"] = Field(
         "prerecorded",
-        description="Sortformer streaming mode: `realtime` (1.04s latency) or `prerecorded` (9.84s latency). This is only applicable when `clusterer_version` is `sortformer`.",
+        description="Sortformer streaming mode: `realtime` (1.04s latency) or `prerecorded` (9.84s latency). This is only applicable when `engine` is `sortformer`.",
     )
-    orchestration_strategy: Literal["word", "segment", "subsegment"] = Field(
-        "segment",
-        description="The orchestration strategy to use either `word` or `segment`",
+    orchestration_strategy: Literal["segment", "subsegment"] = Field(
+        "subsegment",
+        description="The orchestration strategy to use either `segment` or `subsegment`",
     )
     speaker_models_path: str | None = Field(
         None,
         description="The path to the speaker models directory",
     )
-    clusterer_version: Literal["pyannote3", "pyannote4", "sortformer"] = Field(
-        "pyannote4",
-        description="The version of the clusterer to use. If `sortformer` is the diarization model used is Sortformer, otherwise it is pyannote.",
+    engine: Literal["pyannote", "sortformer"] = Field(
+        "pyannote",
+        description="The engine to use. If `sortformer` the diarization model used is Sortformer, otherwise it is pyannote.",
     )
     use_exclusive_reconciliation: bool = Field(
         False,
@@ -179,10 +179,10 @@ def generate_cli_args(self, model_path: Path | None = None) -> list[str]:
 
             # Add rttm path
             args.extend(["--rttm-path", self.rttm_path])
-            args.extend(["--clusterer-version", self.clusterer_version])
+            args.extend(["--engine", self.engine])
 
             # Only add diarization mode if using Sortformer
-            if self.clusterer_version == "sortformer":
+            if self.engine == "sortformer":
                 args.extend(["--diarization-mode", self.diarization_mode])
 
             # If speaker models path is provided use it
diff --git a/src/openbench/pipeline/diarization/speakerkit.py b/src/openbench/pipeline/diarization/speakerkit.py
index 805e9c4..7c7bc8a 100644
--- a/src/openbench/pipeline/diarization/speakerkit.py
+++ b/src/openbench/pipeline/diarization/speakerkit.py
@@ -8,7 +8,7 @@
 from typing import Callable, Literal, TypedDict
 
 from argmaxtools.utils import get_logger
-from pydantic import Field, model_validator
+from pydantic import Field
 
 from ...dataset import DiarizationSample
 from ...pipeline_prediction import DiarizationAnnotation
@@ -32,29 +32,7 @@ class SpeakerKitInput(TypedDict):
 class SpeakerKitPipelineConfig(DiarizationPipelineConfig):
     cli_path: str = Field(..., description="The absolute path to the SpeakerKit CLI")
     model_path: str | None = Field(None, description="The absolute path to the SpeakerKit model directory")
-    clusterer_version: Literal["pyannote3", "pyannote4", "sortformer"] = Field(
-        "pyannote4", description="The version of the clusterer to use"
-    )
-    sortformer_model_name: str | None = Field(None, description="The name of the Sortformer model to use")
-    sortformer_model_variant: str | None = Field(None, description="The variant of the Sortformer model to use")
-
-    @model_validator(mode="after")
-    def validate_sortformer_model(self) -> "SpeakerKitPipelineConfig":
-        if self.sortformer_model_name is not None and self.sortformer_model_variant is None:
-            raise ValueError(
-                "If `sortformer_model_name` is provided, `sortformer_model_variant` must also be provided"
-            )
-
-        if self.sortformer_model_name is None and self.sortformer_model_variant is not None:
-            raise ValueError(
-                "If `sortformer_model_variant` is provided, `sortformer_model_name` must also be provided"
-            )
-
-        return self
-
-    @property
-    def is_sortformer(self) -> bool:
-        return self.clusterer_version == "sortformer"
+    engine: Literal["pyannote", "sortformer"] = Field("pyannote", description="The engine to use")
 
     def generate_cli_args(self, inputs: SpeakerKitInput) -> list[str]:
         cmd = [
@@ -64,22 +42,11 @@ def generate_cli_args(self, inputs: SpeakerKitInput) -> list[str]:
             str(inputs["audio_path"]),
             "--rttm-path",
             str(inputs["output_path"]),
-            "--clusterer-version",
-            self.clusterer_version,
+            "--engine",
+            self.engine,
             "--verbose",
         ]
 
-        # Only check variant as we already checked both should be provided
-        if self.sortformer_model_variant is not None:
-            cmd.extend(
-                [
-                    "--sortformer-model-name",
-                    self.sortformer_model_name,
-                    "--sortformer-model-variant",
-                    self.sortformer_model_variant,
-                ]
-            )
-
         if self.model_path is not None:
             cmd.extend(["--model-path", self.model_path])
 
diff --git a/src/openbench/pipeline/orchestration/orchestration_whisperkitpro.py b/src/openbench/pipeline/orchestration/orchestration_whisperkitpro.py
index 6680438..9609e73 100644
--- a/src/openbench/pipeline/orchestration/orchestration_whisperkitpro.py
+++ b/src/openbench/pipeline/orchestration/orchestration_whisperkitpro.py
@@ -69,17 +69,17 @@ class WhisperKitProOrchestrationConfig(OrchestrationConfig):
         ComputeUnit.CPU_AND_NE,
         description="The compute units to use for the text decoder. Default is CPU_AND_NE.",
     )
-    orchestration_strategy: Literal["word", "segment", "subsegment"] = Field(
+    orchestration_strategy: Literal["segment", "subsegment"] = Field(
         "subsegment",
-        description="The orchestration strategy to use either `word`, `segment` or `subsegment`",
+        description="The orchestration strategy to use either `segment` or `subsegment`",
     )
-    clusterer_version: Literal["pyannote3", "pyannote4", "sortformer"] = Field(
-        "pyannote4",
-        description="The version of the clusterer to use. If `sortformer` is the diarization model used is Sortformer, otherwise it is pyannote.",
+    engine: Literal["pyannote", "sortformer"] = Field(
+        "pyannote",
+        description="The engine to use. If `sortformer` the diarization model used is Sortformer, otherwise it is pyannote.",
     )
     diarization_mode: Literal["realtime", "prerecorded"] = Field(
         "prerecorded",
-        description="Sortformer streaming mode: `realtime` (1.04s latency) or `prerecorded` (9.84s latency). This is only applicable when `clusterer_version` is `sortformer`.",
+        description="Sortformer streaming mode: `realtime` (1.04s latency) or `prerecorded` (9.84s latency). This is only applicable when `engine` is `sortformer`.",
     )
     use_exclusive_reconciliation: bool = Field(
         False,
@@ -111,7 +111,7 @@ def build_pipeline(self) -> WhisperKitPro:
             chunking_strategy="vad",
             diarization=True,
             orchestration_strategy=self.config.orchestration_strategy,
-            clusterer_version=self.config.clusterer_version,
+            engine=self.config.engine,
             diarization_mode=self.config.diarization_mode,
             use_exclusive_reconciliation=self.config.use_exclusive_reconciliation,
             fast_load=self.config.fast_load,
diff --git a/src/openbench/pipeline/pipeline_aliases.py b/src/openbench/pipeline/pipeline_aliases.py
index a8193aa..ce9792b 100644
--- a/src/openbench/pipeline/pipeline_aliases.py
+++ b/src/openbench/pipeline/pipeline_aliases.py
@@ -112,7 +112,7 @@ def register_pipeline_aliases() -> None:
         default_config={
             "out_dir": "./speakerkit-report",
             "cli_path": os.getenv("SPEAKERKIT_CLI_PATH"),
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
         },
         description="SpeakerKit speaker diarization pipeline using community-1 model from pyannote. Requires CLI installation and API key. Set `SPEAKERKIT_CLI_PATH` and `SPEAKERKIT_API_KEY` env vars. For access to the CLI binary contact speakerkitpro@argmaxinc.com",
     )
@@ -123,9 +123,7 @@ def register_pipeline_aliases() -> None:
         default_config={
             "out_dir": "./speakerkit-sortformer-report",
             "cli_path": os.getenv("SPEAKERKIT_CLI_PATH"),
-            "sortformer_model_variant": "384_94MB",
-            "sortformer_model_name": "sortformer",
-            "clusterer_version": "sortformer",
+            "engine": "sortformer",
         },
         description=(
             "SpeakerKit speaker diarization pipeline using Sortformer model compressed to 94MB. Requires CLI installation and API key. "
@@ -220,7 +218,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "openai_whisper-tiny",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the tiny version of the model. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -234,7 +232,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "openai_whisper-large-v3",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the large-v3 version of the model. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -248,7 +246,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "openai_whisper-large-v3-v20240930",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the large-v3-v20240930 version of the model (which is the same as large-v3-turbo from OpenAI). Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -262,7 +260,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "openai_whisper-large-v3-v20240930_626MB",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the large-v3-v20240930 version of the model compressed to 626MB (which is the same as large-v3-turbo from OpenAI). Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -276,7 +274,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "nvidia_parakeet-v2",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the parakeet-v2 version of the model. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -290,7 +288,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "nvidia_parakeet-v2_476MB",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the parakeet-v2 version of the model compressed to 476MB. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -304,7 +302,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "nvidia_parakeet-v2_476MB",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "sortformer",
+            "engine": "sortformer",
             "diarization_mode": "prerecorded",
         },
         description=(
@@ -321,7 +319,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "nvidia_parakeet-v3",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the parakeet-v3 version of the model. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -335,7 +333,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "nvidia_parakeet-v3_494MB",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "pyannote4",
+            "engine": "pyannote",
             "use_exclusive_reconciliation": True,
         },
         description="WhisperKitPro orchestration pipeline using the parakeet-v3 version of the model compressed to 494MB. Requires `WHISPERKITPRO_CLI_PATH` env var and depending on your permissions also `WHISPERKITPRO_API_KEY` env var.",
@@ -349,7 +347,7 @@ def register_pipeline_aliases() -> None:
             "model_variant": "nvidia_parakeet-v3_494MB",
             "cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
             "orchestration_strategy": "subsegment",
-            "clusterer_version": "sortformer",
+            "engine": "sortformer",
             "diarization_mode": "prerecorded",
         },
         description=(

From 20344b809c0ae6c05497e86f1b8eb334f9522b54 Mon Sep 17 00:00:00 2001
From: Eduardo Pacheco <eduardo@argmaxinc.com>
Date: Wed, 11 Mar 2026 15:52:59 -0300
Subject: [PATCH 7/8] fix: missing is_sortformer

---
 src/openbench/pipeline/diarization/speakerkit.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/openbench/pipeline/diarization/speakerkit.py b/src/openbench/pipeline/diarization/speakerkit.py
index 7c7bc8a..6dfa908 100644
--- a/src/openbench/pipeline/diarization/speakerkit.py
+++ b/src/openbench/pipeline/diarization/speakerkit.py
@@ -34,6 +34,10 @@ class SpeakerKitPipelineConfig(DiarizationPipelineConfig):
     model_path: str | None = Field(None, description="The absolute path to the SpeakerKit model directory")
     engine: Literal["pyannote", "sortformer"] = Field("pyannote", description="The engine to use")
 
+    @property
+    def is_sortformer(self) -> bool:
+        return self.engine == "sortformer"
+
     def generate_cli_args(self, inputs: SpeakerKitInput) -> list[str]:
         cmd = [
             self.cli_path,
@@ -51,10 +55,7 @@ def generate_cli_args(self, inputs: SpeakerKitInput) -> list[str]:
             cmd.extend(["--model-path", self.model_path])
 
         if inputs["num_speakers"] is not None:
-            if self.is_sortformer:
-                logger.warning("`num_speakers` is not supported for Sortformer. Ignoring...")
-            else:
-                cmd.extend(["--num-speakers", str(inputs["num_speakers"])])
+            cmd.extend(["--num-speakers", str(inputs["num_speakers"])])
 
         if "SPEAKERKIT_API_KEY" in os.environ:
             cmd.extend(["--api-key", os.environ["SPEAKERKIT_API_KEY"]])

From fd8483d0c3a2aba04cf1a206d75b4780fc54056b Mon Sep 17 00:00:00 2001
From: Eduardo Pacheco <eduardo@argmaxinc.com>
Date: Thu, 12 Mar 2026 11:41:15 -0300
Subject: [PATCH 8/8] chore: ignore .sh

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index ddfc86c..10c049c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,4 +45,6 @@ inference_outputs/
 miscellaneous/
 
 # Default openbench-cli output directory
-downloaded_datasets/
\ No newline at end of file
+downloaded_datasets/
+
+*.sh
\ No newline at end of file