Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions src/openbench/pipeline/pipeline_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
)
from .pipeline_registry import PipelineRegistry
from .speech_generation import (
GeminiSpeechGenerationPipeline,
WhisperKitSpeechGenerationPipeline,
)
from .streaming_transcription import (
Expand Down Expand Up @@ -668,6 +669,25 @@ def register_pipeline_aliases() -> None:
"Requires `WHISPERKIT_CLI_PATH` env var pointing to the whisperkit-cli binary.",
)

PipelineRegistry.register_alias(
"gemini-speech-generation",
GeminiSpeechGenerationPipeline,
default_config={
"out_dir": "./speech_generation_results",
"voice_name": "Charon",
"language_code": "en-US",
"model_name": "gemini-2.5-pro-tts",
"audio_encoding": "MP3",
"transcription_cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
"transcription_repo_id": "argmaxinc/parakeetkit-pro",
"transcription_model_variant": "nvidia_parakeet-v2_476MB",
"keep_generated_audio": False,
},
description="Google Gemini speech generation pipeline. Generates audio from text prompts using Google Cloud TTS, "
"then transcribes the generated audio to compute WER against the original prompt. "
"Requires Google Cloud credentials and `WHISPERKITPRO_CLI_PATH` env var.",
)

################# STREAMING TRANSCRIPTION PIPELINES #################

PipelineRegistry.register_alias(
Expand Down
6 changes: 6 additions & 0 deletions src/openbench/pipeline/speech_generation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@
# Copyright (C) 2025 Argmax, Inc. All Rights Reserved.

from .common import SpeechGenerationConfig, SpeechGenerationOutput
from .speech_generation_gemini import (
GeminiSpeechGenerationConfig,
GeminiSpeechGenerationPipeline,
)
from .speech_generation_wkp import WhisperKitSpeechGenerationPipeline


__all__ = [
"GeminiSpeechGenerationConfig",
"GeminiSpeechGenerationPipeline",
"SpeechGenerationConfig",
"SpeechGenerationOutput",
"WhisperKitSpeechGenerationPipeline",
Expand Down
Loading