diff --git a/docs/integrations-and-sdks/index.mdx b/docs/integrations-and-sdks/index.mdx index 6f3e97be..6096a57c 100644 --- a/docs/integrations-and-sdks/index.mdx +++ b/docs/integrations-and-sdks/index.mdx @@ -39,7 +39,7 @@ Choose an integration to build accurate, low-latency voice agents rapidly with t title="Pipecat" description="Open-source framework with full control of the voice pipeline in code. Complex agents. Best for: power builders." icon={Pipecat logo} - href="/integrations-and-sdks/pipecat" + href="/integrations-and-sdks/pipecat/" /> diff --git a/docs/integrations-and-sdks/pipecat.mdx b/docs/integrations-and-sdks/pipecat.mdx deleted file mode 100644 index bf9935cd..00000000 --- a/docs/integrations-and-sdks/pipecat.mdx +++ /dev/null @@ -1,72 +0,0 @@ ---- -id: pipecat -description: Learn how to integrate Speechmatics STT with Pipecat. ---- - -import CodeBlock from '@theme/CodeBlock'; - -# Pipecat integration - -Pipecat is an open-source framework for building voice agents. When Speechmatics STT is integrated with Pipecat, you can build real-time voice and multimodal conversational agent specifically tailored to your needs. - -Pipecat is perfect for: - -- **Voice AI**: Voice assistants, chatbots, and IVR systems -- **Transcription**: Realtime transcription of live events or media -- **Accessibility applications**: Screen readers and assistive technologies -- **Content creation**: Podcasts, dubbing, audiobooks, and voice-overs -- **Media production**: News broadcasts and automated announcements - -## Features - -- **Realtime transcription**: low-latency speech-to-text for responsive agents -- **Speaker diarization**: track who’s speaking in multi-participant sessions -- **Turn detection**: capture natural speech boundaries automatically -- **Noise robustness**: maintain accuracy in challenging environments -- **Custom vocabularies**: boost recognition for domain-specific terms -- **Flexible deployment**: use on-device, cloud, or hybrid Pipecat setups -## Quickstart - -### Requirements -- Python 3.10 or later -- uv package manager installed -- Pipecat >= 1.2 -- Speechmatics account. You can create one [here](https://portal.speechmatics.com). -- Speechmatics API key. You can generate one in the [Portal](https://portal.speechmatics.com/settings/api-keys). - -#### Installation -```python -pip install "pipecat-ai[speechmatics]" -``` -### Usage -Set the environment variable SPEECHMATICS_API_KEY to your Speechmatics API key. -```bash -export SPEECHMATICS_API_KEY=your_api_key -``` - -```python -import asyncio -import os - -from pipecat.services.speechmatics import SpeechmaticsSTTService - - -async def main(): - stt = SpeechmaticsSTTService( - api_key=os.environ["SPEECHMATICS_API_KEY"], - ) - - async def audio_stream(): - # Replace with your real audio source. - yield from [b"fake_audio_chunk_1", b"fake_audio_chunk_2"] - - async for result in stt.transcribe(audio_stream()): - speaker = f"Speaker {result.speaker}" if result.speaker else "Unknown" - print(f"{speaker}: {result.text}") - - -if __name__ == "__main__": - asyncio.run(main()) -``` - -For detailed examples, please see the [Speechmatics Academy](https://github.com/speechmatics/speechmatics-academy). diff --git a/docs/integrations-and-sdks/pipecat/assets/.gitkeep b/docs/integrations-and-sdks/pipecat/assets/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/docs/integrations-and-sdks/pipecat/assets/main.py b/docs/integrations-and-sdks/pipecat/assets/main.py new file mode 100644 index 00000000..3d837e90 --- /dev/null +++ b/docs/integrations-and-sdks/pipecat/assets/main.py @@ -0,0 +1,126 @@ +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.frames.frames import LLMRunFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService +from pipecat.services.speechmatics.tts import SpeechmaticsTTSService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.turns.user_stop.turn_analyzer_user_turn_stop_strategy import ( + TurnAnalyzerUserTurnStopStrategy, +) +from pipecat.turns.user_turn_strategies import UserTurnStrategies + +load_dotenv(override=True) + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info("Starting bot") + + async with aiohttp.ClientSession() as session: + stt = SpeechmaticsSTTService( + api_key=os.getenv("SPEECHMATICS_API_KEY"), + params=SpeechmaticsSTTService.InputParams( + turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.EXTERNAL, + ), + ) + + llm = OpenAILLMService( + api_key=os.getenv("OPENAI_API_KEY"), + model="gpt-4o-mini", + ) + + tts = SpeechmaticsTTSService( + api_key=os.getenv("SPEECHMATICS_API_KEY"), + voice_id="sarah", + aiohttp_session=session, + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful voice assistant. Be concise and friendly.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + user_turn_strategies=UserTurnStrategies( + stop=[ + TurnAnalyzerUserTurnStopStrategy( + turn_analyzer=LocalSmartTurnAnalyzerV3() + ) + ] + ), + ), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info("Client connected") + await task.queue_frames([LLMRunFrame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info("Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + transport_params = { + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + } + + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/docs/integrations-and-sdks/pipecat/assets/stt-full-example.py b/docs/integrations-and-sdks/pipecat/assets/stt-full-example.py new file mode 100644 index 00000000..58039be7 --- /dev/null +++ b/docs/integrations-and-sdks/pipecat/assets/stt-full-example.py @@ -0,0 +1,35 @@ +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService + +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + # Service options + language="en", + operating_point=SpeechmaticsSTTService.OperatingPoint.ENHANCED, + + # Turn detection + turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.EXTERNAL, + max_delay=1.5, + include_partials=True, + + # Diarization + enable_diarization=True, + speaker_sensitivity=0.6, + max_speakers=4, + prefer_current_speaker=True, + + # Speaker focus + focus_speakers=["S1", "S2"], + focus_mode=SpeechmaticsSTTService.SpeakerFocusMode.RETAIN, + ignore_speakers=[], + + # Output formatting + speaker_active_format="[{speaker_id}]: {text}", + speaker_passive_format="[{speaker_id} (background)]: {text}", + + # Custom vocabulary + additional_vocab=[ + SpeechmaticsSTTService.AdditionalVocabEntry(content="Speechmatics"), + SpeechmaticsSTTService.AdditionalVocabEntry(content="Pipecat", sounds_like=["pipe cat"]), + ], + ), +) diff --git a/docs/integrations-and-sdks/pipecat/index.mdx b/docs/integrations-and-sdks/pipecat/index.mdx new file mode 100644 index 00000000..3d658074 --- /dev/null +++ b/docs/integrations-and-sdks/pipecat/index.mdx @@ -0,0 +1,88 @@ +--- + description: Build a local voice bot with Speechmatics STT and TTS using Pipecat. +--- + +import CodeBlock from '@theme/CodeBlock' +import pipecatQuickstartMainPy from "./assets/main.py?raw" + +# Pipecat quickstart + +Build a local voice bot with Speechmatics and Pipecat in minutes. + +[Pipecat](https://docs.pipecat.ai/) is a framework for building real-time voice bots using a pipeline architecture. In this quickstart, you’ll run a local WebRTC server and connect to your bot from your browser. + +## Features + +- **Real-time transcription** — Low-latency speech-to-text as users speak +- **Natural text to speech** — Give your bot a clear, natural voice +- **Local web client** — Test your bot in a browser at `http://localhost:7860/client` +- **No infrastructure** — No cloud deployment or room setup required + +## Prerequisites + +- Python 3.10+ +- [Speechmatics API key](https://portal.speechmatics.com) +- [OpenAI API key](https://platform.openai.com) (for the LLM) + +## Setup + +### 1. Create project + +```bash +mkdir voice-agent && cd voice-agent +``` + +### 2. Install dependencies + +Create a `requirements.txt` file: + +```text title="requirements.txt" +pipecat-ai[local-smart-turn-v3,silero,speechmatics,webrtc,openai,runner] +pipecat-ai-small-webrtc-prebuilt +python-dotenv +loguru +``` + +Install with [uv](https://docs.astral.sh/uv/): + +```bash +uv venv +source .venv/bin/activate +uv pip install -r requirements.txt +``` + +### 3. Configure environment + +Create a `.env` file: + +```text title=".env" +SPEECHMATICS_API_KEY=your_speechmatics_key +OPENAI_API_KEY=your_openai_key +``` + +### 4. Create your bot + +Create a `main.py` file: + + + {pipecatQuickstartMainPy} + + +### 5. Run your bot + +```bash +python main.py +``` + +Open `http://localhost:7860/client` in your browser and allow microphone access. + +:::note +The first run can take a little longer while dependencies and models load. +::: + +## Next steps + +- [Speech to text](/integrations-and-sdks/pipecat/stt) — Configure diarization, turn detection, and more +- [Text to speech](/integrations-and-sdks/pipecat/tts) — Choose voices and adjust settings +- [Speechmatics Academy](https://github.com/speechmatics/speechmatics-academy/tree/main/integrations/pipecat) — Full working examples +- [Pipecat quickstart](https://docs.pipecat.ai/getting-started/quickstart) — Learn more patterns and deployment options diff --git a/docs/integrations-and-sdks/pipecat/sidebar.ts b/docs/integrations-and-sdks/pipecat/sidebar.ts new file mode 100644 index 00000000..13d626dd --- /dev/null +++ b/docs/integrations-and-sdks/pipecat/sidebar.ts @@ -0,0 +1,23 @@ +export default { + type: "category", + label: "Pipecat", + collapsible: true, + collapsed: true, + items: [ + { + type: "doc", + id: "integrations-and-sdks/pipecat/index", + label: "Quickstart", + }, + { + type: "doc", + id: "integrations-and-sdks/pipecat/stt", + label: "STT", + }, + { + type: "doc", + id: "integrations-and-sdks/pipecat/tts", + label: "TTS", + }, + ], +} as const; diff --git a/docs/integrations-and-sdks/pipecat/stt.mdx b/docs/integrations-and-sdks/pipecat/stt.mdx new file mode 100644 index 00000000..0edcdda9 --- /dev/null +++ b/docs/integrations-and-sdks/pipecat/stt.mdx @@ -0,0 +1,277 @@ +--- +description: Transcribe live audio in your Pipecat voice bots with Speechmatics STT. +--- + +import CodeBlock from '@theme/CodeBlock' +import sttFullExample from "./assets/stt-full-example.py?raw" + +# Pipecat speech to text + +Use the Speechmatics STT service to transcribe live audio in your Pipecat voice bots. + +## Features + +- **Real-time transcription** — Low-latency streaming with partial (interim) results +- **Turn detection** — Adaptive, fixed, ML-based, or external control modes +- **Speaker diarization** — Identify and attribute speech to different speakers +- **Speaker filtering** — Focus on specific speakers or ignore others (like the assistant) +- **Custom vocabulary** — Boost recognition for domain-specific terms and proper nouns +- **Output formatting** — Configurable templates for multi-speaker transcripts + +## Installation + +```bash +pip install "pipecat-ai[speechmatics]" +``` + +## Basic configuration + +### Authentication + +By default, the service reads your API key from the `SPEECHMATICS_API_KEY` environment variable. + +### Service options + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `api_key` | string | env var | Speechmatics API key (defaults to `SPEECHMATICS_API_KEY`) | +| `base_url` | string | env var | Realtime base URL (defaults to `SPEECHMATICS_RT_URL`, or `wss://eu2.rt.speechmatics.com/v2`) | +| `sample_rate` | number | pipeline default | Audio sample rate in Hz | +| `should_interrupt` | boolean | `true` | Enable interruption on detected speech | + +### Input parameters + +These are passed via `params=SpeechmaticsSTTService.InputParams(...)`: + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `language` | Language \| string | `Language.EN` | Language code for transcription | +| `domain` | string \| null | `null` | Domain-specific model (for example `"finance"`) | +| `operating_point` | OperatingPoint \| null | `null` | Transcription accuracy. Use `OperatingPoint.ENHANCED` (higher accuracy) or `OperatingPoint.STANDARD` (lower latency) | +| `audio_encoding` | AudioEncoding | `PCM_S16LE` | Audio encoding format: `AudioEncoding.PCM_S16LE`, `AudioEncoding.PCM_F32LE`, or `AudioEncoding.MULAW` | +| `punctuation_overrides` | object \| null | `null` | Custom punctuation rules | +| `extra_params` | object \| null | `null` | Additional parameters to pass to the API | + +#### Example + +```python +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService + +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + language="en", + operating_point=SpeechmaticsSTTService.OperatingPoint.ENHANCED, + ), +) +``` + +## Advanced configuration + +### Turn detection + +Turn detection determines when a user has finished their complete thought, while the Realtime API's `EndOfUtterance` message indicates a pause in speech. The service handles this distinction automatically. + +#### Modes + +Set `turn_detection_mode` to control how end of speech is detected: + +| Mode | When to use | +|------|-------------| +| `TurnDetectionMode.EXTERNAL` | Default and recommended. Delegates turn detection to Pipecat's pipeline (VAD, Smart Turn, etc.). Try this first | +| `TurnDetectionMode.ADAPTIVE` | Speechmatics analyzes speech content and acoustic patterns for end-of-turn detection | +| `TurnDetectionMode.FIXED` | Fixed silence threshold using `end_of_utterance_silence_trigger` | +| `TurnDetectionMode.SMART_TURN` | Speechmatics Smart Turn for ML-based turn detection | + +:::tip +Start with `EXTERNAL` mode. This lets you use Pipecat's turn detection features (like `LocalSmartTurnAnalyzerV3`) which are well-integrated with the pipeline. Only switch to other modes if you need Speechmatics to handle turn detection directly. +::: + +```python +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService + +# External mode (default, recommended) - use Pipecat's turn detection +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.EXTERNAL, + ), +) + +# Adaptive mode - Speechmatics determines end-of-turn +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.ADAPTIVE, + ), +) + +# Fixed mode - consistent silence threshold +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.FIXED, + end_of_utterance_silence_trigger=0.8, # 800ms of silence + end_of_utterance_max_delay=5.0, # Force end after 5s + ), +) + +# Smart turn mode - Speechmatics ML-based turn detection +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.SMART_TURN, + ), +) +``` + +:::note +When using `ADAPTIVE` or `SMART_TURN` modes, remove any competing VAD or turn-detection features from your pipeline to avoid conflicts. +::: + +#### Configuration + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `end_of_utterance_silence_trigger` | number \| null | `null` | Silence duration (seconds) that triggers end of utterance. Used primarily in `FIXED` mode. Valid range: >0 to <2 seconds (exclusive) | +| `end_of_utterance_max_delay` | number \| null | `null` | Maximum delay (seconds) before forcing an end of utterance. Must be greater than `end_of_utterance_silence_trigger` | +| `max_delay` | number \| null | `null` | Maximum transcription delay (seconds). Lower values reduce latency at the cost of accuracy. Valid range: 0.7–4.0 seconds | +| `include_partials` | boolean \| null | `null` | Enable partial (interim) transcription results | +| `split_sentences` | boolean \| null | `null` | Split transcription into sentences | + +### Advanced diarization + +The service can attribute words to speakers and lets you decide which speakers are treated as **active** (foreground) vs **passive** (background). + +#### Configuration + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `enable_diarization` | boolean \| null | `null` | Enable speaker diarization | +| `speaker_sensitivity` | number \| null | `null` | Speaker detection sensitivity. Valid range: >0.0 to <1.0 (exclusive) | +| `max_speakers` | number \| null | `null` | Maximum number of speakers to detect. Valid range: 2–100 | +| `prefer_current_speaker` | boolean \| null | `null` | Reduce speaker switching for similar voices | +| `known_speakers` | array \| null | `null` | Pre-define speaker identifiers with labels (`SpeakerIdentifier` objects) | +| `additional_vocab` | array \| null | `null` | Custom vocabulary entries (`AdditionalVocabEntry` objects) for improved recognition | + +```python +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService + +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + enable_diarization=True, + speaker_sensitivity=0.7, + max_speakers=3, + prefer_current_speaker=True, + additional_vocab=[ + SpeechmaticsSTTService.AdditionalVocabEntry(content="Speechmatics"), + SpeechmaticsSTTService.AdditionalVocabEntry(content="API", sounds_like=["A P I"]), + ], + ), +) +``` + +#### Known speakers + +Use `known_speakers` to attribute words to specific speakers across sessions. This is useful when you want consistent speaker identification for known participants. + +```python +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService + +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + enable_diarization=True, + known_speakers=[ + SpeechmaticsSTTService.SpeakerIdentifier(label="Alice", speaker_identifiers=["speaker_abc123"]), + SpeechmaticsSTTService.SpeakerIdentifier(label="Bob", speaker_identifiers=["speaker_def456"]), + ], + ), +) +``` + +Speaker identifiers are unique to each Speechmatics account and can be obtained from a previous transcription session. + +#### Speaker focus + +Control which speakers are treated as **active** (foreground) vs **passive** (background): + +- **Active speakers** are the speakers you care about in your application. They generate `FINAL_TRANSCRIPT` events. +- **Passive speakers** are still transcribed, but their words are buffered and only included in the output alongside new words from active speakers. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `focus_speakers` | array | `[]` | Speaker IDs to treat as active | +| `ignore_speakers` | array | `[]` | Speaker IDs to exclude entirely | +| `focus_mode` | SpeakerFocusMode | `RETAIN` | How to handle non-focused speakers | + +##### Focus modes + +- `SpeakerFocusMode.RETAIN` keeps non-focused speakers as passive. +- `SpeakerFocusMode.IGNORE` discards non-focused speaker words entirely. + +`ignore_speakers` always excludes those speakers from transcription and their speech will not trigger VAD or end of utterance detection. + +```python +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService + +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + focus_speakers=["S1"], + focus_mode=SpeechmaticsSTTService.SpeakerFocusMode.RETAIN, + ignore_speakers=["S3"], + ), +) +``` + +#### Speaker formatting + +Use `speaker_active_format` and `speaker_passive_format` to format transcripts for your LLM. +The templates support `{speaker_id}`, `{text}`, `{ts}`, `{start_time}`, `{end_time}`, and `{lang}`. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `speaker_active_format` | string \| null | `null` | Format template for active speaker output | +| `speaker_passive_format` | string \| null | `null` | Format template for passive speaker output | + +```python +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService + +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams( + speaker_active_format="<{speaker_id}>{text}", + speaker_passive_format="<{speaker_id} background>{text}", + ), +) +``` + +When you use a custom format, include it in your bot's system prompt so the LLM can interpret speaker tags consistently. + +#### Updating speakers during transcription + +You can dynamically change which speakers to focus on or ignore during an active transcription session using the `update_params()` method. + +```python +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService + +stt = SpeechmaticsSTTService( + params=SpeechmaticsSTTService.InputParams(enable_diarization=True), +) + +# Later, during transcription: +stt.update_params( + SpeechmaticsSTTService.UpdateParams( + focus_speakers=["S1", "S2"], + ignore_speakers=["S3"], + focus_mode=SpeechmaticsSTTService.SpeakerFocusMode.RETAIN, + ) +) +``` + +This is useful when you need to adjust speaker filtering based on runtime conditions, such as when a new participant joins or leaves a conversation. + +#### Example + + + {sttFullExample} + + +## Next steps + +- [Quickstart](/integrations-and-sdks/pipecat) — Build a complete voice bot +- [Text to speech](/integrations-and-sdks/pipecat/tts) — Use Speechmatics voices in your bot +- [Pipecat documentation](https://docs.pipecat.ai/server/services/stt/speechmatics) — Full Speechmatics STT reference diff --git a/docs/integrations-and-sdks/pipecat/tts.mdx b/docs/integrations-and-sdks/pipecat/tts.mdx new file mode 100644 index 00000000..dde396d7 --- /dev/null +++ b/docs/integrations-and-sdks/pipecat/tts.mdx @@ -0,0 +1,47 @@ +--- + description: Use Speechmatics text to speech voices in your Pipecat voice bots. +--- + +# Pipecat text to speech + +Use Speechmatics TTS to give your Pipecat voice bot a clear, natural voice. + +## Installation + +```bash +pip install "pipecat-ai[speechmatics]" +``` + +## Usage + +```python +import aiohttp + +from pipecat.services.speechmatics.tts import SpeechmaticsTTSService + +async with aiohttp.ClientSession() as session: + tts = SpeechmaticsTTSService( + api_key="YOUR_API_KEY", + voice_id="sarah", + aiohttp_session=session, + ) +``` + +## Configuration + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `api_key` | string | env var | Speechmatics API key (defaults to `SPEECHMATICS_API_KEY`) | +| `voice_id` | string | `"sarah"` | Voice to use | +| `base_url` | string | service default | Base URL for the Speechmatics TTS endpoint | +| `aiohttp_session` | `aiohttp.ClientSession` | none | Reuse a session for connection pooling and lower latency | + +For available voices and detailed TTS options, see the [Text to speech quickstart](/text-to-speech/quickstart). + + + +## Next steps + +- [Quickstart](/integrations-and-sdks/pipecat) — Build a complete voice bot +- [Speech to text](/integrations-and-sdks/pipecat/stt) — Configure STT options +- [Pipecat documentation](https://docs.pipecat.ai/server/services/tts/speechmatics) — Full Speechmatics TTS reference diff --git a/docs/integrations-and-sdks/sidebar.ts b/docs/integrations-and-sdks/sidebar.ts index 05b66e38..4678ed59 100644 --- a/docs/integrations-and-sdks/sidebar.ts +++ b/docs/integrations-and-sdks/sidebar.ts @@ -1,3 +1,4 @@ +import pipecatSidebar from "./pipecat/sidebar"; import livekitSidebar from "./livekit/sidebar"; export default { @@ -11,21 +12,17 @@ export default { id: "integrations-and-sdks/index", label: "Overview", }, + livekitSidebar, + pipecatSidebar, { type: "doc", id: "integrations-and-sdks/vapi", label: "Vapi", }, - livekitSidebar, - { - type: "doc", - id: "integrations-and-sdks/pipecat", - label: "Pipecat", - }, { type: "doc", id: "integrations-and-sdks/sdks", label: "SDKs", }, - ] -}; \ No newline at end of file + ], +} as const;