Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/voice-agents/assets/additional-vocab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from speechmatics.voice import AdditionalVocabEntry, VoiceAgentConfig

config = VoiceAgentConfig(
language="en",
additional_vocab=[
AdditionalVocabEntry(
content="Speechmatics",
sounds_like=["speech matters", "speech matics"]
),
AdditionalVocabEntry(content="API"),
]
)
22 changes: 22 additions & 0 deletions docs/voice-agents/assets/advanced-config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from speechmatics.voice import (
EndOfUtteranceMode,
SpeakerFocusConfig,
SpeakerFocusMode,
SpeakerIdentifier,
VoiceAgentConfig,
VoiceAgentConfigPreset,
)

overrides = VoiceAgentConfig(
end_of_utterance_mode=EndOfUtteranceMode.ADAPTIVE,
enable_diarization=True,
speaker_config=SpeakerFocusConfig(
focus_speakers=["S1"],
focus_mode=SpeakerFocusMode.RETAIN,
),
known_speakers=[
SpeakerIdentifier(label="Alice", speaker_identifiers=["XX...XX"]),
],
)

config = VoiceAgentConfigPreset.ADAPTIVE(overrides)
36 changes: 36 additions & 0 deletions docs/voice-agents/assets/basic-config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from speechmatics.voice import (
AdditionalVocabEntry,
AudioEncoding,
OperatingPoint,
VoiceAgentConfig,
VoiceAgentConfigPreset,
)

overrides = VoiceAgentConfig(
# Language and locale
language="en", # e.g. "en", "es", "fr"
output_locale=None, # e.g. "en-GB", "en-US"

# Model selection
operating_point=OperatingPoint.ENHANCED, # STANDARD or ENHANCED
domain=None, # e.g. "finance", "medical"

# Vocabulary
additional_vocab=[
AdditionalVocabEntry(
content="Speechmatics",
sounds_like=["speech matters", "speech matics"],
),
AdditionalVocabEntry(content="API"),
],
punctuation_overrides=None,

# Audio
sample_rate=16000,
audio_encoding=AudioEncoding.PCM_S16LE,

# Diarization
enable_diarization=True,
)

config = VoiceAgentConfigPreset.ADAPTIVE(overrides)
1 change: 1 addition & 0 deletions docs/voice-agents/assets/custom-config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from speechmatics.voice import VoiceAgentClient, VoiceAgentConfig, EndOfUtteranceMode

config = VoiceAgentConfig(
Expand Down
9 changes: 9 additions & 0 deletions docs/voice-agents/assets/event-subscription.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
@client.on(AgentServerMessageType.ADD_SEGMENT)
def on_final_segment(message):
for segment in message["segments"]:
print(f"[FINAL] {segment['speaker_id']}: {segment['text']}")

@client.on(AgentServerMessageType.ADD_PARTIAL_SEGMENT)
def on_partial_segment(message):
for segment in message["segments"]:
print(f"[PARTIAL] {segment['speaker_id']}: {segment['text']}")
9 changes: 9 additions & 0 deletions docs/voice-agents/assets/known-speakers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from speechmatics.voice import SpeakerIdentifier, VoiceAgentConfig

config = VoiceAgentConfig(
enable_diarization=True,
known_speakers=[
SpeakerIdentifier(label="Alice", speaker_identifiers=["XX...XX"]),
SpeakerIdentifier(label="Bob", speaker_identifiers=["YY...YY"])
]
)
50 changes: 50 additions & 0 deletions docs/voice-agents/assets/quickstart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import asyncio
import os
from speechmatics.rt import Microphone
from speechmatics.voice import VoiceAgentClient, AgentServerMessageType

async def main():
"""Stream microphone audio to Speechmatics Voice Agent using 'scribe' preset"""

# Audio configuration
SAMPLE_RATE = 16000 # Hz
CHUNK_SIZE = 160 # Samples per read
PRESET = "scribe" # Configuration preset

# Create client with preset
client = VoiceAgentClient(
api_key=os.getenv("SPEECHMATICS_API_KEY"),
preset=PRESET
)

# Print finalised segments of speech with speaker ID
@client.on(AgentServerMessageType.ADD_SEGMENT)
def on_segment(message):
for segment in message["segments"]:
speaker = segment["speaker_id"]
text = segment["text"]
print(f"{speaker}: {text}")

# Setup microphone
mic = Microphone(SAMPLE_RATE, CHUNK_SIZE)
if not mic.start():
print("Error: Microphone not available")
return

# Connect to the Voice Agent
await client.connect()

# Stream microphone audio (interruptible using keyboard)
try:
while True:
audio_chunk = await mic.read(CHUNK_SIZE)
if not audio_chunk:
break # Microphone stopped producing data
await client.send_audio(audio_chunk)
except KeyboardInterrupt:
pass
finally:
await client.disconnect()

if __name__ == "__main__":
asyncio.run(main())
15 changes: 15 additions & 0 deletions docs/voice-agents/assets/smart-turn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from speechmatics.voice import (
EndOfUtteranceMode,
SmartTurnConfig,
VoiceAgentConfig,
VoiceAgentConfigPreset,
)

# ADAPTIVE mode + ML-enhanced turn detection
config = VoiceAgentConfig(
end_of_utterance_mode=EndOfUtteranceMode.ADAPTIVE,
smart_turn_config=SmartTurnConfig(enabled=True),
)

# Or use the SMART_TURN preset which bundles this configuration
config = VoiceAgentConfigPreset.SMART_TURN()
7 changes: 7 additions & 0 deletions docs/voice-agents/assets/speaker-focus-handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
@client.on(AgentServerMessageType.ADD_SEGMENT)
def on_segment(message):
for segment in message["segments"]:
if segment["is_active"]:
process_focused_speaker(segment["text"])
else:
process_passive_speaker(segment["speaker_id"], segment["text"])
27 changes: 27 additions & 0 deletions docs/voice-agents/assets/speaker-focus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from speechmatics.voice import SpeakerFocusConfig, SpeakerFocusMode, VoiceAgentConfig

# Focus on specific speakers, keep others as passive
config = VoiceAgentConfig(
enable_diarization=True,
speaker_config=SpeakerFocusConfig(
focus_speakers=["S1", "S2"],
focus_mode=SpeakerFocusMode.RETAIN
)
)

# Focus on specific speakers, exclude everyone else
config = VoiceAgentConfig(
enable_diarization=True,
speaker_config=SpeakerFocusConfig(
focus_speakers=["S1", "S2"],
focus_mode=SpeakerFocusMode.IGNORE
)
)

# Blacklist specific speakers (exclude them from all processing)
config = VoiceAgentConfig(
enable_diarization=True,
speaker_config=SpeakerFocusConfig(
ignore_speakers=["S3"],
)
)
15 changes: 7 additions & 8 deletions docs/voice-agents/overview.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,23 @@ import { Grid } from "@radix-ui/themes";

# Voice agents overview

Our Voice SDK provides features optimized for conversational AI, which we use to build our integrations.
Our integration partners are the quickest way to get a production voice agent up and running,
There are two ways to build voice agents using Speechmatics:

- Integration partners (LiveKit, Pipecat and VAPI): the fastest path to a production voice agent.
- Voice SDK: direct access for custom pipelines or working outside of supported integration platforms.


## Features

Speechmatics provides building blocks you can use through integrations and the Voice SDK.

It includes:
Key features include:

- **Turn detection**: detect when a speaker has finished talking.
- **Intelligent segmentation**: group partial transcripts into clean, speaker-attributed segments.
- **Diarization**: identify and label different speakers.
- **Speaker focus**: focus on or ignore specific speakers in multi-speaker scenarios.
- **Preset configurations**: start quickly with ready-to-use settings.
- **Structured events**: work with clean segments instead of raw word-level events.

## Integrations

Expand Down Expand Up @@ -51,7 +53,4 @@ Use an integration to handle audio transport and wiring, so you can focus on you

Use the Voice SDK to handle turn detection, group transcripts into clean segments, and apply diarization for LLM workflows.

See [Voice SDK](/voice-agents/voice-sdk) for getting started, presets, and configuration.

If you’re building an integration and want to work with us, [contact support](https://support.speechmatics.com).

See [Voice SDK](/voice-agents/voice-sdk) for information on getting started, presets, and configuration.
Loading