Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/openbench/dataset/dataset_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,7 @@ def register_dataset_aliases() -> None:
PipelineType.DIARIZATION,
PipelineType.STREAMING_TRANSCRIPTION,
PipelineType.ORCHESTRATION,
PipelineType.SPEECH_GENERATION,
},
description="Local dataset for testing. To use this dataset you need to set the `LOCAL_DATASET_PATH` and `LOCAL_DATASET_SPLIT` environment variables.",
)
Expand Down
3 changes: 3 additions & 0 deletions src/openbench/dataset/dataset_speech_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class SpeechGenerationExtraInfo(TypedDict, total=False):
"""Extra info for speech generation samples."""

language: str
dialogue: list[dict]


class SpeechGenerationRow(TypedDict):
Expand Down Expand Up @@ -95,5 +96,7 @@ def prepare_sample(self, row: SpeechGenerationRow) -> tuple[Transcript, SpeechGe
extra_info: SpeechGenerationExtraInfo = {}
if "language" in row:
extra_info["language"] = row["language"]
if "dialogue" in row and row["dialogue"]:
extra_info["dialogue"] = row["dialogue"]

return reference, extra_info
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def compute_keyword_stats(
) -> dict[str, Any]:
"""Compute keyword statistics between reference and hypothesis."""

if not dictionary:
return {"true_positives": 0, "ground_truth": 0, "false_positives": 0, "keyword_stats": {}}

# Convert transcripts to text
ref_text = reference.get_transcript_string()
hyp_text = hypothesis.get_transcript_string()
Expand Down
26 changes: 26 additions & 0 deletions src/openbench/pipeline/pipeline_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)
from .pipeline_registry import PipelineRegistry
from .speech_generation import (
ElevenLabsDialogueGenerationPipeline,
WhisperKitSpeechGenerationPipeline,
)
from .streaming_transcription import (
Expand Down Expand Up @@ -668,6 +669,31 @@ def register_pipeline_aliases() -> None:
"Requires `WHISPERKIT_CLI_PATH` env var pointing to the whisperkit-cli binary.",
)

PipelineRegistry.register_alias(
"elevenlabs-dialogue-generation",
ElevenLabsDialogueGenerationPipeline,
default_config={
"out_dir": "./speech_generation_results",
"model_id": "eleven_v3",
"speaker_voice_map": {
"doctor": "9BWtsMINqrJLrRacOk9x",
"patient": "IKne3meq5aSn9XLyUdCD",
"assistant": "pFZP5JQG7iQjIQuC4Bku",
},
"default_voice_id": "9BWtsMINqrJLrRacOk9x",
"max_chars_per_chunk": 4500,
"chunk_silence_duration": 0.75,
"transcription_cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
"transcription_repo_id": "argmaxinc/parakeetkit-pro",
"transcription_model_variant": "nvidia_parakeet-v2_476MB",
"keep_generated_audio": False,
},
description="ElevenLabs dialogue generation pipeline. Generates multi-speaker conversational audio "
"from dialogue turns using ElevenLabs text_to_dialogue API, then transcribes the generated "
"audio to compute WER against the original dialogue text. "
"Requires `ELEVENLABS_API_KEY` and `WHISPERKITPRO_CLI_PATH` env vars.",
)

################# STREAMING TRANSCRIPTION PIPELINES #################

PipelineRegistry.register_alias(
Expand Down
6 changes: 6 additions & 0 deletions src/openbench/pipeline/speech_generation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@
# Copyright (C) 2025 Argmax, Inc. All Rights Reserved.

from .common import SpeechGenerationConfig, SpeechGenerationOutput
from .speech_generation_elevenlabs_dialogue import (
ElevenLabsDialogueGenerationConfig,
ElevenLabsDialogueGenerationPipeline,
)
from .speech_generation_wkp import WhisperKitSpeechGenerationPipeline


__all__ = [
"ElevenLabsDialogueGenerationConfig",
"ElevenLabsDialogueGenerationPipeline",
"SpeechGenerationConfig",
"SpeechGenerationOutput",
"WhisperKitSpeechGenerationPipeline",
Expand Down
Loading