dbrkn · dbrkn · Mar 10, 2026
diff --git a/src/openbench/pipeline/pipeline_aliases.py b/src/openbench/pipeline/pipeline_aliases.py
@@ -26,6 +26,7 @@
 )
 from .pipeline_registry import PipelineRegistry
 from .speech_generation import (
+    GeminiSpeechGenerationPipeline,
     WhisperKitSpeechGenerationPipeline,
 )
 from .streaming_transcription import (
@@ -668,6 +669,25 @@ def register_pipeline_aliases() -> None:
         "Requires `WHISPERKIT_CLI_PATH` env var pointing to the whisperkit-cli binary.",
     )
 
+    PipelineRegistry.register_alias(
+        "gemini-speech-generation",
+        GeminiSpeechGenerationPipeline,
+        default_config={
+            "out_dir": "./speech_generation_results",
+            "voice_name": "Charon",
+            "language_code": "en-US",
+            "model_name": "gemini-2.5-pro-tts",
+            "audio_encoding": "MP3",
+            "transcription_cli_path": os.getenv("WHISPERKITPRO_CLI_PATH"),
+            "transcription_repo_id": "argmaxinc/parakeetkit-pro",
+            "transcription_model_variant": "nvidia_parakeet-v2_476MB",
+            "keep_generated_audio": False,
+        },
+        description="Google Gemini speech generation pipeline. Generates audio from text prompts using Google Cloud TTS, "
+        "then transcribes the generated audio to compute WER against the original prompt. "
+        "Requires Google Cloud credentials and `WHISPERKITPRO_CLI_PATH` env var.",
+    )
+
     ################# STREAMING TRANSCRIPTION PIPELINES #################
 
     PipelineRegistry.register_alias(

diff --git a/src/openbench/pipeline/speech_generation/__init__.py b/src/openbench/pipeline/speech_generation/__init__.py
@@ -2,10 +2,16 @@
 # Copyright (C) 2025 Argmax, Inc. All Rights Reserved.
 
 from .common import SpeechGenerationConfig, SpeechGenerationOutput
+from .speech_generation_gemini import (
+    GeminiSpeechGenerationConfig,
+    GeminiSpeechGenerationPipeline,
+)
 from .speech_generation_wkp import WhisperKitSpeechGenerationPipeline
 
 
 __all__ = [
+    "GeminiSpeechGenerationConfig",
+    "GeminiSpeechGenerationPipeline",
     "SpeechGenerationConfig",
     "SpeechGenerationOutput",
     "WhisperKitSpeechGenerationPipeline",