From 5ab85a2b6796216873f419268df313f60f9718a4 Mon Sep 17 00:00:00 2001 From: AssemblyAI Date: Tue, 23 Jun 2026 09:56:55 -0600 Subject: [PATCH] Project import generated by Copybara. GitOrigin-RevId: f82b46aca1b2586e24b8d7edb69b5d954f9b536a --- README.md | 16 ++++++++-------- assemblyai/streaming/v3/extras.py | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index d8c6d91..ba44178 100644 --- a/README.md +++ b/README.md @@ -710,11 +710,11 @@ for result in transcript.auto_highlights.results: ### **Streaming Examples** -Real-time speech-to-text via WebSocket against the `u3-rt-pro` model. The SDK ships two clients with identical option/event/handler surfaces — `StreamingClient` (threaded) and `AsyncStreamingClient` (asyncio). Pick whichever fits your codebase. +Real-time speech-to-text via WebSocket against the `universal-3-5-pro` model. The SDK ships two clients with identical option/event/handler surfaces — `StreamingClient` (threaded) and `AsyncStreamingClient` (asyncio). Pick whichever fits your codebase. **Handler contract**: every handler is called as `handler(client, event)`. Plain functions and `async def` functions both work; `AsyncStreamingClient` awaits async handlers inline on the read task, so don't block — use `asyncio.create_task(...)` if you need concurrent work. -[Read more about the streaming service.](https://www.assemblyai.com/docs/streaming/universal-3-pro) +[Read more about the streaming service.](https://www.assemblyai.com/docs/streaming/getting-started/transcribe-streaming-audio)
Stream a local file (sync) @@ -745,7 +745,7 @@ client.on(StreamingEvents.Termination, on_terminated) client.on(StreamingEvents.Error, on_error) client.connect(StreamingParameters( - sample_rate=16000, speech_model="u3-rt-pro", format_turns=True, + sample_rate=16000, speech_model="universal-3-5-pro", format_turns=True, )) try: client.stream(aai.extras.stream_file(filepath="audio.wav", sample_rate=16000)) @@ -775,7 +775,7 @@ def on_turn(client, event): client = StreamingClient(StreamingClientOptions(api_key="")) client.on(StreamingEvents.Turn, on_turn) -client.connect(StreamingParameters(sample_rate=16000, speech_model="u3-rt-pro")) +client.connect(StreamingParameters(sample_rate=16000, speech_model="universal-3-5-pro")) try: client.stream(aai.extras.MicrophoneStream(sample_rate=16000)) @@ -790,7 +790,7 @@ finally: For note-taker apps that capture two live sources (microphone **and** system/speaker output) but want them handled as **one** streaming session — while still knowing which source each word came from — wrap the client in a `ChannelStreamer`. -You declare named channels and feed each channel's PCM separately. The SDK runs per-channel energy VAD, mixes the channels into a single mono stream over one websocket, and — for handlers registered on the coordinator — delivers an enriched `DualChannelTurnEvent` whose words/turn carry their originating channel (`turn.channel` and per-word `word.channel`). The base `Word` / `TurnEvent` stay unchanged, so single-stream payloads aren't affected. Attribution is fully client-side and model-agnostic, so it composes with `speaker_labels`, multilingual, and `u3-rt-pro`. It is a **separate dimension from diarization** — `word.channel` (physical source) is independent of `word.speaker` (voice): two people on the same `system` channel get distinct speaker labels, while one person heard on two channels keeps a single speaker label. +You declare named channels and feed each channel's PCM separately. The SDK runs per-channel energy VAD, mixes the channels into a single mono stream over one websocket, and — for handlers registered on the coordinator — delivers an enriched `DualChannelTurnEvent` whose words/turn carry their originating channel (`turn.channel` and per-word `word.channel`). The base `Word` / `TurnEvent` stay unchanged, so single-stream payloads aren't affected. Attribution is fully client-side and model-agnostic, so it composes with `speaker_labels`, multilingual, and `universal-3-5-pro`. It is a **separate dimension from diarization** — `word.channel` (physical source) is independent of `word.speaker` (voice): two people on the same `system` channel get distinct speaker labels, while one person heard on two channels keeps a single speaker label. Unlike a browser sample, the SDK does not capture audio — you supply 16-bit PCM for each channel (from `sounddevice`, `pyaudio`, a loopback device, files, …). @@ -813,7 +813,7 @@ mixer = ChannelStreamer(client, channels=["mic", "system"], sample_rate=16000) # other events (Begin/Error/…) are forwarded to the client. mixer.on(StreamingEvents.Turn, on_turn) client.connect(StreamingParameters( - sample_rate=16000, speech_model="u3-rt-pro", speaker_labels=True, + sample_rate=16000, speech_model="universal-3-5-pro", speaker_labels=True, )) # Feed each source separately — e.g. from two capture callbacks. Send @@ -874,7 +874,7 @@ async def main(): async with AsyncStreamingClient(StreamingClientOptions(api_key="")) as client: client.on(StreamingEvents.Turn, on_turn) await client.connect(StreamingParameters( - sample_rate=16000, speech_model="u3-rt-pro", format_turns=True, + sample_rate=16000, speech_model="universal-3-5-pro", format_turns=True, )) await client.stream(stream_file_async("audio.wav", 16000)) @@ -954,7 +954,7 @@ async def streaming_token(): ```python client = StreamingClient(StreamingClientOptions(token="")) -client.connect(StreamingParameters(sample_rate=16000, speech_model="u3-rt-pro")) +client.connect(StreamingParameters(sample_rate=16000, speech_model="universal-3-5-pro")) ```
diff --git a/assemblyai/streaming/v3/extras.py b/assemblyai/streaming/v3/extras.py index f29006c..a340a59 100644 --- a/assemblyai/streaming/v3/extras.py +++ b/assemblyai/streaming/v3/extras.py @@ -363,10 +363,10 @@ def resolve_unknown_channels_by_speaker_history( for w in turn.words: if w.channel != UNKNOWN_CHANNEL or not w.speaker: continue - entry_or_none = speaker_history.get(w.speaker) - if not entry_or_none or sum(entry_or_none.values()) < min_rms_evidence: + entry = speaker_history.get(w.speaker) + if not entry or sum(entry.values()) < min_rms_evidence: continue - winner = _top_by_ratio(entry_or_none, dominance_ratio) + winner = _top_by_ratio(entry, dominance_ratio) if winner is not None: w.channel = winner w.channel_resolved = True @@ -709,7 +709,7 @@ def __init__( on_vad: Optional[Callable[[VadFrame], None]] = None, ): super().__init__(channels, sample_rate, attribution, on_vad) - self._client: "AsyncStreamingClient" = client + self._client = client client.on(StreamingEvents.Turn, self._handle_turn) async def _handle_turn(self, client: object, base_turn: TurnEvent) -> None: