Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/integrations-and-sdks/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Choose an integration to build accurate, low-latency voice agents rapidly with t
title="Pipecat"
description="Open-source framework with full control of the voice pipeline in code. Complex agents. Best for: power builders."
icon={<img src="/img/integration-logos/pipecat.png" alt="Pipecat logo" width="28px" height="28px" />}
href="/integrations-and-sdks/pipecat"
href="/integrations-and-sdks/pipecat/"
/>

</Grid>
Expand Down
72 changes: 0 additions & 72 deletions docs/integrations-and-sdks/pipecat.mdx

This file was deleted.

Empty file.
126 changes: 126 additions & 0 deletions docs/integrations-and-sdks/pipecat/assets/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import os

import aiohttp
from dotenv import load_dotenv
from loguru import logger

from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService
from pipecat.services.speechmatics.tts import SpeechmaticsTTSService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.turns.user_stop.turn_analyzer_user_turn_stop_strategy import (
TurnAnalyzerUserTurnStopStrategy,
)
from pipecat.turns.user_turn_strategies import UserTurnStrategies

load_dotenv(override=True)


async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info("Starting bot")

async with aiohttp.ClientSession() as session:
stt = SpeechmaticsSTTService(
api_key=os.getenv("SPEECHMATICS_API_KEY"),
params=SpeechmaticsSTTService.InputParams(
turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.EXTERNAL,
),
)

llm = OpenAILLMService(
api_key=os.getenv("OPENAI_API_KEY"),
model="gpt-4o-mini",
)

tts = SpeechmaticsTTSService(
api_key=os.getenv("SPEECHMATICS_API_KEY"),
voice_id="sarah",
aiohttp_session=session,
)

messages = [
{
"role": "system",
"content": "You are a helpful voice assistant. Be concise and friendly.",
},
]

context = LLMContext(messages)
user_aggregator, assistant_aggregator = LLMContextAggregatorPair(
context,
user_params=LLMUserAggregatorParams(
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
user_turn_strategies=UserTurnStrategies(
stop=[
TurnAnalyzerUserTurnStopStrategy(
turn_analyzer=LocalSmartTurnAnalyzerV3()
)
]
),
),
)

pipeline = Pipeline(
[
transport.input(),
stt,
user_aggregator,
llm,
tts,
transport.output(),
assistant_aggregator,
]
)

task = PipelineTask(
pipeline,
params=PipelineParams(
enable_metrics=True,
enable_usage_metrics=True,
),
)

@transport.event_handler("on_client_connected")
async def on_client_connected(transport, client):
logger.info("Client connected")
await task.queue_frames([LLMRunFrame()])

@transport.event_handler("on_client_disconnected")
async def on_client_disconnected(transport, client):
logger.info("Client disconnected")
await task.cancel()

runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)
await runner.run(task)


async def bot(runner_args: RunnerArguments):
transport_params = {
"webrtc": lambda: TransportParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
}

transport = await create_transport(runner_args, transport_params)
await run_bot(transport, runner_args)


if __name__ == "__main__":
from pipecat.runner.run import main

main()
35 changes: 35 additions & 0 deletions docs/integrations-and-sdks/pipecat/assets/stt-full-example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pipecat.services.speechmatics.stt import SpeechmaticsSTTService

stt = SpeechmaticsSTTService(
params=SpeechmaticsSTTService.InputParams(
# Service options
language="en",
operating_point=SpeechmaticsSTTService.OperatingPoint.ENHANCED,

# Turn detection
turn_detection_mode=SpeechmaticsSTTService.TurnDetectionMode.EXTERNAL,
max_delay=1.5,
include_partials=True,

# Diarization
enable_diarization=True,
speaker_sensitivity=0.6,
max_speakers=4,
prefer_current_speaker=True,

# Speaker focus
focus_speakers=["S1", "S2"],
focus_mode=SpeechmaticsSTTService.SpeakerFocusMode.RETAIN,
ignore_speakers=[],

# Output formatting
speaker_active_format="[{speaker_id}]: {text}",
speaker_passive_format="[{speaker_id} (background)]: {text}",

# Custom vocabulary
additional_vocab=[
SpeechmaticsSTTService.AdditionalVocabEntry(content="Speechmatics"),
SpeechmaticsSTTService.AdditionalVocabEntry(content="Pipecat", sounds_like=["pipe cat"]),
],
),
)
88 changes: 88 additions & 0 deletions docs/integrations-and-sdks/pipecat/index.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
---
description: Build a local voice bot with Speechmatics STT and TTS using Pipecat.
---

import CodeBlock from '@theme/CodeBlock'
import pipecatQuickstartMainPy from "./assets/main.py?raw"

# Pipecat quickstart

Build a local voice bot with Speechmatics and Pipecat in minutes.

[Pipecat](https://docs.pipecat.ai/) is a framework for building real-time voice bots using a pipeline architecture. In this quickstart, you’ll run a local WebRTC server and connect to your bot from your browser.

## Features

- **Real-time transcription** — Low-latency speech-to-text as users speak
- **Natural text to speech** — Give your bot a clear, natural voice
- **Local web client** — Test your bot in a browser at `http://localhost:7860/client`
- **No infrastructure** — No cloud deployment or room setup required

## Prerequisites

- Python 3.10+
- [Speechmatics API key](https://portal.speechmatics.com)
- [OpenAI API key](https://platform.openai.com) (for the LLM)

## Setup

### 1. Create project

```bash
mkdir voice-agent && cd voice-agent
```

### 2. Install dependencies

Create a `requirements.txt` file:

```text title="requirements.txt"
pipecat-ai[local-smart-turn-v3,silero,speechmatics,webrtc,openai,runner]
pipecat-ai-small-webrtc-prebuilt
python-dotenv
loguru
```

Install with [uv](https://docs.astral.sh/uv/):

```bash
uv venv
source .venv/bin/activate
uv pip install -r requirements.txt
```

### 3. Configure environment

Create a `.env` file:

```text title=".env"
SPEECHMATICS_API_KEY=your_speechmatics_key
OPENAI_API_KEY=your_openai_key
```

### 4. Create your bot

Create a `main.py` file:

<CodeBlock language="python" title="main.py">
{pipecatQuickstartMainPy}
</CodeBlock>

### 5. Run your bot

```bash
python main.py
```

Open `http://localhost:7860/client` in your browser and allow microphone access.

:::note
The first run can take a little longer while dependencies and models load.
:::

## Next steps

- [Speech to text](/integrations-and-sdks/pipecat/stt) — Configure diarization, turn detection, and more
- [Text to speech](/integrations-and-sdks/pipecat/tts) — Choose voices and adjust settings
- [Speechmatics Academy](https://github.com/speechmatics/speechmatics-academy/tree/main/integrations/pipecat) — Full working examples
- [Pipecat quickstart](https://docs.pipecat.ai/getting-started/quickstart) — Learn more patterns and deployment options
23 changes: 23 additions & 0 deletions docs/integrations-and-sdks/pipecat/sidebar.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
export default {
type: "category",
label: "Pipecat",
collapsible: true,
collapsed: true,
items: [
{
type: "doc",
id: "integrations-and-sdks/pipecat/index",
label: "Quickstart",
},
{
type: "doc",
id: "integrations-and-sdks/pipecat/stt",
label: "STT",
},
{
type: "doc",
id: "integrations-and-sdks/pipecat/tts",
label: "TTS",
},
],
} as const;
Loading