From 2bd51008c8f0149c6c660aebf64566106e09f318 Mon Sep 17 00:00:00 2001 From: jean-malo Date: Thu, 26 Feb 2026 11:20:26 +0100 Subject: [PATCH 1/2] feat(realtime): add support for target streaming delay in realtime transcription This commit introduces the ability to specify a target streaming delay in milliseconds for realtime transcription sessions. The changes include: 1. Adding a new parameter `target_streaming_delay_ms` to the realtime transcription API 2. Updating the connection management to handle the new delay parameter 3. Adding support for audio flushing to ensure proper handling of streaming delays 4. Creating a new example demonstrating dual-delay transcription with two parallel streams 5. Improving error handling for microphone access and PyAudio loading The changes allow for more precise control over the latency/accuracy tradeoff in realtime transcription, enabling use cases that require different streaming delays for different purposes. --- ...ime_transcription_dual_delay_microphone.py | 473 ++++++++++++++++++ ...async_realtime_transcription_microphone.py | 32 +- .../async_realtime_transcription_stream.py | 7 + examples/mistral/audio/pyaudio_utils.py | 38 ++ src/mistralai/extra/realtime/connection.py | 61 ++- src/mistralai/extra/realtime/transcription.py | 11 +- 6 files changed, 599 insertions(+), 23 deletions(-) create mode 100644 examples/mistral/audio/async_realtime_transcription_dual_delay_microphone.py create mode 100644 examples/mistral/audio/pyaudio_utils.py diff --git a/examples/mistral/audio/async_realtime_transcription_dual_delay_microphone.py b/examples/mistral/audio/async_realtime_transcription_dual_delay_microphone.py new file mode 100644 index 00000000..7653b0ed --- /dev/null +++ b/examples/mistral/audio/async_realtime_transcription_dual_delay_microphone.py @@ -0,0 +1,473 @@ +#!/usr/bin/env python +# /// script +# requires-python = ">=3.9" +# dependencies = [ +# "mistralai[realtime]", +# "pyaudio", +# "rich", +# ] +# [tool.uv.sources] +# mistralai = { path = "../../..", editable = true } +# /// + +import argparse +import asyncio +import difflib +import os +import sys +from dataclasses import dataclass +from typing import AsyncIterator, Sequence + +from rich.align import Align +from rich.console import Console +from rich.layout import Layout +from rich.live import Live +from rich.panel import Panel +from rich.text import Text + +from mistralai.client import Mistral +from mistralai.extra.realtime import UnknownRealtimeEvent +from mistralai.client.models import ( + AudioFormat, + RealtimeTranscriptionError, + RealtimeTranscriptionSessionCreated, + TranscriptionStreamDone, + TranscriptionStreamTextDelta, +) + +from pyaudio_utils import load_pyaudio + +console = Console() + + +@dataclass +class DualTranscriptState: + """Tracks transcript state for dual-delay transcription.""" + + fast_full_text: str = "" + slow_full_text: str = "" + fast_status: str = "๐Ÿ”Œ Connecting..." + slow_status: str = "๐Ÿ”Œ Connecting..." + error: str | None = None + fast_done: bool = False + slow_done: bool = False + + def set_error(self, message: str) -> None: + self.error = message + self.fast_status = "โŒ Error" + self.slow_status = "โŒ Error" + + +class DualTranscriptDisplay: + """Renders a live dual-delay transcription UI.""" + + def __init__( + self, + *, + model: str, + fast_delay_ms: int, + slow_delay_ms: int, + state: DualTranscriptState, + ) -> None: + self.model = model + self.fast_delay_ms = fast_delay_ms + self.slow_delay_ms = slow_delay_ms + self.state = state + + @staticmethod + def _normalize_word(word: str) -> str: + return word.strip(".,!?;:\"'()[]{}").lower() + + def _compute_display_texts(self) -> tuple[str, str]: + slow_words = self.state.slow_full_text.split() + fast_words = self.state.fast_full_text.split() + + if not slow_words: + partial_text = f" {self.state.fast_full_text}".rstrip() + return "", partial_text + + slow_norm = [self._normalize_word(word) for word in slow_words] + fast_norm = [self._normalize_word(word) for word in fast_words] + + matcher = difflib.SequenceMatcher(None, slow_norm, fast_norm) + last_fast_index = 0 + slow_progress = 0 + for block in matcher.get_matching_blocks(): + if block.size == 0: + continue + slow_end = block.a + block.size + if slow_end > slow_progress: + slow_progress = slow_end + last_fast_index = block.b + block.size + + if last_fast_index < len(fast_words): + ahead_words = fast_words[last_fast_index:] + partial_text = " " + " ".join(ahead_words) if ahead_words else "" + else: + partial_text = "" + + return self.state.slow_full_text, partial_text + + @staticmethod + def _status_style(status: str) -> str: + if "Listening" in status: + return "green" + if "Connecting" in status: + return "yellow dim" + if "Done" in status or "Stopped" in status: + return "dim" + return "red" + + def render(self) -> Layout: + layout = Layout() + + header_text = Text() + header_text.append("โ”‚ ", style="dim") + header_text.append(self.model, style="dim") + header_text.append(" โ”‚ ", style="dim") + header_text.append( + f"fast {self.fast_delay_ms}ms", style="bright_yellow" + ) + header_text.append( + f" {self.state.fast_status}", + style=self._status_style(self.state.fast_status), + ) + header_text.append(" โ”‚ ", style="dim") + header_text.append(f"slow {self.slow_delay_ms}ms", style="white") + header_text.append( + f" {self.state.slow_status}", + style=self._status_style(self.state.slow_status), + ) + + header = Align.left(header_text, vertical="middle", pad=False) + + final_text, partial_text = self._compute_display_texts() + transcript_text = Text() + if final_text or partial_text: + transcript_text.append(final_text, style="white") + transcript_text.append(partial_text, style="bright_yellow") + else: + transcript_text.append("...", style="dim") + + transcript = Panel( + Align.left(transcript_text, vertical="top"), + border_style="dim", + padding=(1, 2), + ) + + footer_text = Text() + footer_text.append("ctrl+c", style="dim") + footer_text.append(" quit", style="dim italic") + footer = Align.left(footer_text, vertical="middle", pad=False) + + if self.state.error: + layout.split_column( + Layout(header, name="header", size=1), + Layout(transcript, name="body"), + Layout( + Panel(Text(self.state.error, style="red"), border_style="red"), + name="error", + size=4, + ), + Layout(footer, name="footer", size=1), + ) + else: + layout.split_column( + Layout(header, name="header", size=1), + Layout(transcript, name="body"), + Layout(footer, name="footer", size=1), + ) + + return layout + + +async def iter_microphone( + *, + sample_rate: int, + chunk_duration_ms: int, +) -> AsyncIterator[bytes]: + """ + Yield microphone PCM chunks using PyAudio (16-bit mono). + Encoding is always pcm_s16le. + """ + pyaudio = load_pyaudio() + + p = pyaudio.PyAudio() + chunk_samples = int(sample_rate * chunk_duration_ms / 1000) + + stream = p.open( + format=pyaudio.paInt16, + channels=1, + rate=sample_rate, + input=True, + frames_per_buffer=chunk_samples, + ) + + loop = asyncio.get_running_loop() + try: + while True: + data = await loop.run_in_executor(None, stream.read, chunk_samples, False) + yield data + finally: + stream.stop_stream() + stream.close() + p.terminate() + + +async def queue_audio_iter( + queue: asyncio.Queue[bytes | None], +) -> AsyncIterator[bytes]: + """Yield audio chunks from a queue until a None sentinel is received.""" + while True: + chunk = await queue.get() + if chunk is None: + break + yield chunk + + +async def broadcast_microphone( + *, + sample_rate: int, + chunk_duration_ms: int, + queues: Sequence[asyncio.Queue[bytes | None]], +) -> None: + """Read from the microphone once and broadcast to multiple queues.""" + try: + async for chunk in iter_microphone( + sample_rate=sample_rate, chunk_duration_ms=chunk_duration_ms + ): + for queue in queues: + await queue.put(chunk) + finally: + for queue in queues: + while True: + try: + queue.put_nowait(None) + break + except asyncio.QueueFull: + try: + queue.get_nowait() + except asyncio.QueueEmpty: + break + + +def _status_for_event(event: object) -> str: + if isinstance(event, RealtimeTranscriptionSessionCreated): + return "๐ŸŽค Listening..." + return "โœ… Done" + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Dual-delay real-time microphone transcription." + ) + parser.add_argument( + "--model", + default="voxtral-mini-transcribe-realtime-2602", + help="Model ID", + ) + parser.add_argument( + "--fast-delay-ms", + type=int, + default=240, + help="Fast target streaming delay in ms", + ) + parser.add_argument( + "--slow-delay-ms", + type=int, + default=2400, + help="Slow target streaming delay in ms", + ) + parser.add_argument( + "--sample-rate", + type=int, + default=16000, + choices=[8000, 16000, 22050, 44100, 48000], + help="Sample rate in Hz", + ) + parser.add_argument( + "--chunk-duration", + type=int, + default=10, + help="Chunk duration in ms", + ) + parser.add_argument( + "--api-key", + default=os.environ.get("MISTRAL_API_KEY"), + help="Mistral API key", + ) + parser.add_argument( + "--base-url", + default=os.environ.get("MISTRAL_BASE_URL", "wss://api.mistral.ai"), + ) + return parser.parse_args() + + +async def run_stream( + *, + client: Mistral, + model: str, + delay_ms: int, + audio_stream: AsyncIterator[bytes], + audio_format: AudioFormat, + state: DualTranscriptState, + update_queue: asyncio.Queue[None], + is_fast: bool, +) -> None: + try: + async for event in client.audio.realtime.transcribe_stream( + audio_stream=audio_stream, + model=model, + audio_format=audio_format, + target_streaming_delay_ms=delay_ms, + ): + if isinstance(event, RealtimeTranscriptionSessionCreated): + if is_fast: + state.fast_status = _status_for_event(event) + else: + state.slow_status = _status_for_event(event) + elif isinstance(event, TranscriptionStreamTextDelta): + if is_fast: + state.fast_full_text += event.text + else: + state.slow_full_text += event.text + elif isinstance(event, TranscriptionStreamDone): + if is_fast: + state.fast_status = _status_for_event(event) + state.fast_done = True + else: + state.slow_status = _status_for_event(event) + state.slow_done = True + break + elif isinstance(event, RealtimeTranscriptionError): + state.set_error(str(event.error)) + break + elif isinstance(event, UnknownRealtimeEvent): + continue + + if update_queue.empty(): + update_queue.put_nowait(None) + except Exception as exc: # pragma: no cover - safety net for UI demo + state.set_error(str(exc)) + if update_queue.empty(): + update_queue.put_nowait(None) + + +async def ui_loop( + display: DualTranscriptDisplay, + update_queue: asyncio.Queue[None], + stop_event: asyncio.Event, + *, + refresh_hz: float = 12.0, +) -> None: + with Live( + display.render(), console=console, refresh_per_second=refresh_hz, screen=True + ) as live: + while not stop_event.is_set(): + try: + await asyncio.wait_for(update_queue.get(), timeout=0.25) + except asyncio.TimeoutError: + pass + live.update(display.render()) + + +async def main() -> int: + args = parse_args() + api_key = args.api_key or os.environ["MISTRAL_API_KEY"] + + try: + load_pyaudio() + except RuntimeError as exc: + console.print(str(exc), style="red") + return 1 + + state = DualTranscriptState() + display = DualTranscriptDisplay( + model=args.model, + fast_delay_ms=args.fast_delay_ms, + slow_delay_ms=args.slow_delay_ms, + state=state, + ) + + client = Mistral(api_key=api_key, server_url=args.base_url) + audio_format = AudioFormat(encoding="pcm_s16le", sample_rate=args.sample_rate) + + fast_queue: asyncio.Queue[bytes | None] = asyncio.Queue(maxsize=50) + slow_queue: asyncio.Queue[bytes | None] = asyncio.Queue(maxsize=50) + + stop_event = asyncio.Event() + update_queue: asyncio.Queue[None] = asyncio.Queue(maxsize=1) + + broadcaster = asyncio.create_task( + broadcast_microphone( + sample_rate=args.sample_rate, + chunk_duration_ms=args.chunk_duration, + queues=(fast_queue, slow_queue), + ) + ) + + fast_task = asyncio.create_task( + run_stream( + client=client, + model=args.model, + delay_ms=args.fast_delay_ms, + audio_stream=queue_audio_iter(fast_queue), + audio_format=audio_format, + state=state, + update_queue=update_queue, + is_fast=True, + ) + ) + + slow_task = asyncio.create_task( + run_stream( + client=client, + model=args.model, + delay_ms=args.slow_delay_ms, + audio_stream=queue_audio_iter(slow_queue), + audio_format=audio_format, + state=state, + update_queue=update_queue, + is_fast=False, + ) + ) + + ui_task = asyncio.create_task( + ui_loop(display, update_queue, stop_event, refresh_hz=12.0) + ) + + try: + while True: + await asyncio.sleep(0.1) + for task in (broadcaster, fast_task, slow_task): + if not task.done(): + continue + exc = task.exception() + if exc: + state.set_error(str(exc)) + if update_queue.empty(): + update_queue.put_nowait(None) + stop_event.set() + break + if state.error: + stop_event.set() + break + if state.fast_done and state.slow_done: + stop_event.set() + break + except KeyboardInterrupt: + state.fast_status = "โน๏ธ Stopped" + state.slow_status = "โน๏ธ Stopped" + stop_event.set() + finally: + broadcaster.cancel() + fast_task.cancel() + slow_task.cancel() + await asyncio.gather(broadcaster, fast_task, slow_task, return_exceptions=True) + await ui_task + + return 0 if not state.error else 1 + + +if __name__ == "__main__": + sys.exit(asyncio.run(main())) diff --git a/examples/mistral/audio/async_realtime_transcription_microphone.py b/examples/mistral/audio/async_realtime_transcription_microphone.py index 191a21e4..49568aea 100644 --- a/examples/mistral/audio/async_realtime_transcription_microphone.py +++ b/examples/mistral/audio/async_realtime_transcription_microphone.py @@ -33,14 +33,17 @@ TranscriptionStreamTextDelta, ) +from pyaudio_utils import load_pyaudio + console = Console() class TranscriptDisplay: """Manages the live transcript display.""" - def __init__(self, model: str) -> None: + def __init__(self, model: str, target_streaming_delay_ms: int | None) -> None: self.model = model + self.target_streaming_delay_ms = target_streaming_delay_ms self.transcript = "" self.status = "๐Ÿ”Œ Connecting..." self.error: str | None = None @@ -65,6 +68,10 @@ def render(self) -> Layout: header_text = Text() header_text.append("โ”‚ ", style="dim") header_text.append(self.model, style="dim") + if self.target_streaming_delay_ms is not None: + header_text.append( + f" ยท delay {self.target_streaming_delay_ms}ms", style="dim" + ) header_text.append(" โ”‚ ", style="dim") if "Listening" in self.status: @@ -126,7 +133,7 @@ async def iter_microphone( Yield microphone PCM chunks using PyAudio (16-bit mono). Encoding is always pcm_s16le. """ - import pyaudio + pyaudio = load_pyaudio() p = pyaudio.PyAudio() chunk_samples = int(sample_rate * chunk_duration_ms / 1000) @@ -164,6 +171,12 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--chunk-duration", type=int, default=10, help="Chunk duration in ms" ) + parser.add_argument( + "--target-streaming-delay-ms", + type=int, + default=None, + help="Target streaming delay in milliseconds", + ) parser.add_argument( "--api-key", default=os.environ.get("MISTRAL_API_KEY"), help="Mistral API key" ) @@ -178,6 +191,12 @@ async def main() -> int: args = parse_args() api_key = args.api_key or os.environ["MISTRAL_API_KEY"] + try: + load_pyaudio() + except RuntimeError as exc: + console.print(str(exc), style="red") + return 1 + client = Mistral(api_key=api_key, server_url=args.base_url) # microphone is always pcm_s16le here @@ -187,7 +206,9 @@ async def main() -> int: sample_rate=args.sample_rate, chunk_duration_ms=args.chunk_duration ) - display = TranscriptDisplay(model=args.model) + display = TranscriptDisplay( + model=args.model, target_streaming_delay_ms=args.target_streaming_delay_ms + ) with Live( display.render(), console=console, refresh_per_second=10, screen=True @@ -197,6 +218,7 @@ async def main() -> int: audio_stream=mic_stream, model=args.model, audio_format=audio_format, + target_streaming_delay_ms=args.target_streaming_delay_ms, ): if isinstance(event, RealtimeTranscriptionSessionCreated): display.set_listening() @@ -217,6 +239,10 @@ async def main() -> int: except KeyboardInterrupt: display.status = "โน๏ธ Stopped" live.update(display.render()) + except Exception as exc: + display.set_error(str(exc)) + live.update(display.render()) + return 1 return 0 diff --git a/examples/mistral/audio/async_realtime_transcription_stream.py b/examples/mistral/audio/async_realtime_transcription_stream.py index 0a0ac609..c005cf3f 100644 --- a/examples/mistral/audio/async_realtime_transcription_stream.py +++ b/examples/mistral/audio/async_realtime_transcription_stream.py @@ -90,6 +90,12 @@ def parse_args() -> argparse.Namespace: default=0.01, help="Delay between chunks in seconds", ) + parser.add_argument( + "--target-streaming-delay-ms", + type=int, + default=None, + help="Target streaming delay in milliseconds", + ) parser.add_argument( "--no-convert", action="store_true", @@ -120,6 +126,7 @@ async def main() -> int: ), model=args.model, audio_format=AudioFormat(encoding="pcm_s16le", sample_rate=16000), + target_streaming_delay_ms=args.target_streaming_delay_ms, ): if isinstance(event, TranscriptionStreamTextDelta): print(event.text, end="", flush=True) diff --git a/examples/mistral/audio/pyaudio_utils.py b/examples/mistral/audio/pyaudio_utils.py new file mode 100644 index 00000000..af72a885 --- /dev/null +++ b/examples/mistral/audio/pyaudio_utils.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from types import ModuleType + + +def load_pyaudio() -> ModuleType: + """ + Import PyAudio with a friendly error when PortAudio is missing. + + Raises: + RuntimeError: If PyAudio/PortAudio cannot be imported. + """ + try: + import pyaudio + except Exception as exc: + details = str(exc).lower() + if isinstance(exc, ModuleNotFoundError) and exc.name == "pyaudio": + message = ( + "PyAudio is required to use the microphone.\n" + "Install PortAudio (eg. for macos: brew install portaudio), then " + "reinstall PyAudio." + ) + elif "pyaudio._portaudio" in details or "portaudio" in details: + message = ( + "PyAudio is installed, but the PortAudio native library is missing or " + "failed to load.\n" + "Install PortAudio (eg. for macos: brew install portaudio), then " + "reinstall PyAudio." + ) + else: + message = ( + "PyAudio is required to use the microphone, but it could not be " + "imported.\n" + "Install PortAudio (eg. for macos: brew install portaudio), then " + "reinstall PyAudio." + ) + raise RuntimeError(message) from exc + return pyaudio diff --git a/src/mistralai/extra/realtime/connection.py b/src/mistralai/extra/realtime/connection.py index ffbbc735..6547052b 100644 --- a/src/mistralai/extra/realtime/connection.py +++ b/src/mistralai/extra/realtime/connection.py @@ -18,15 +18,21 @@ from mistralai.client.models import ( AudioFormat, + RealtimeTranscriptionInputAudioAppend, + RealtimeTranscriptionInputAudioEnd, + RealtimeTranscriptionInputAudioFlush, RealtimeTranscriptionError, RealtimeTranscriptionSession, RealtimeTranscriptionSessionCreated, RealtimeTranscriptionSessionUpdated, + RealtimeTranscriptionSessionUpdateMessage, + RealtimeTranscriptionSessionUpdatePayload, TranscriptionStreamDone, TranscriptionStreamLanguage, TranscriptionStreamSegmentDelta, TranscriptionStreamTextDelta, ) +from mistralai.client.types import UNSET class UnknownRealtimeEvent(BaseModel): @@ -36,6 +42,7 @@ class UnknownRealtimeEvent(BaseModel): - invalid JSON payload - schema validation failure """ + type: Optional[str] content: Any error: Optional[str] = None @@ -56,7 +63,6 @@ class UnknownRealtimeEvent(BaseModel): UnknownRealtimeEvent, ] - _MESSAGE_MODELS: dict[str, Any] = { "session.created": RealtimeTranscriptionSessionCreated, "session.updated": RealtimeTranscriptionSessionUpdated, @@ -108,7 +114,6 @@ def __init__( ) -> None: self._websocket = websocket self._session = session - self._audio_format = session.audio_format self._closed = False self._initial_events: Deque[RealtimeEvent] = deque(initial_events or []) @@ -122,7 +127,7 @@ def session(self) -> RealtimeTranscriptionSession: @property def audio_format(self) -> AudioFormat: - return self._audio_format + return self._session.audio_format @property def is_closed(self) -> bool: @@ -134,27 +139,46 @@ async def send_audio( if self._closed: raise RuntimeError("Connection is closed") - message = { - "type": "input_audio.append", - "audio": base64.b64encode(bytes(audio_bytes)).decode("ascii"), - } - await self._websocket.send(json.dumps(message)) + message = RealtimeTranscriptionInputAudioAppend( + audio=base64.b64encode(bytes(audio_bytes)).decode("ascii") + ) + await self._websocket.send(message.model_dump_json()) - async def update_session(self, audio_format: AudioFormat) -> None: + async def flush_audio(self) -> None: if self._closed: raise RuntimeError("Connection is closed") + await self._websocket.send( + RealtimeTranscriptionInputAudioFlush().model_dump_json() + ) - self._audio_format = audio_format - message = { - "type": "session.update", - "session": {"audio_format": audio_format.model_dump(mode="json")}, - } - await self._websocket.send(json.dumps(message)) + async def update_session( + self, + audio_format: Optional[AudioFormat] = None, + *, + target_streaming_delay_ms: Optional[int] = None, + ) -> None: + if self._closed: + raise RuntimeError("Connection is closed") + + if audio_format is None and target_streaming_delay_ms is None: + raise ValueError("At least one session field must be provided") + + message = RealtimeTranscriptionSessionUpdateMessage( + session=RealtimeTranscriptionSessionUpdatePayload( + audio_format=audio_format if audio_format is not None else UNSET, + target_streaming_delay_ms=target_streaming_delay_ms + if target_streaming_delay_ms is not None + else UNSET, + ) + ) + await self._websocket.send(message.model_dump_json()) async def end_audio(self) -> None: if self._closed: return - await self._websocket.send(json.dumps({"type": "input_audio.end"})) + await self._websocket.send( + RealtimeTranscriptionInputAudioEnd().model_dump_json() + ) async def close(self, *, code: int = 1000, reason: str = "") -> None: if self._closed: @@ -202,6 +226,7 @@ async def events(self) -> AsyncIterator[RealtimeEvent]: await self.close() def _apply_session_updates(self, ev: RealtimeEvent) -> None: - if isinstance(ev, RealtimeTranscriptionSessionCreated) or isinstance(ev, RealtimeTranscriptionSessionUpdated): + if isinstance(ev, RealtimeTranscriptionSessionCreated) or isinstance( + ev, RealtimeTranscriptionSessionUpdated + ): self._session = ev.session - self._audio_format = ev.session.audio_format diff --git a/src/mistralai/extra/realtime/transcription.py b/src/mistralai/extra/realtime/transcription.py index 655fd9c1..b216e676 100644 --- a/src/mistralai/extra/realtime/transcription.py +++ b/src/mistralai/extra/realtime/transcription.py @@ -67,6 +67,7 @@ async def connect( self, model: str, audio_format: Optional[AudioFormat] = None, + target_streaming_delay_ms: Optional[int] = None, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, http_headers: Optional[Mapping[str, str]] = None, @@ -122,8 +123,11 @@ async def connect( initial_events=initial_events, ) - if audio_format is not None: - await connection.update_session(audio_format) + if audio_format is not None or target_streaming_delay_ms is not None: + await connection.update_session( + audio_format, + target_streaming_delay_ms=target_streaming_delay_ms, + ) return connection @@ -141,6 +145,7 @@ async def transcribe_stream( audio_stream: AsyncIterator[bytes], model: str, audio_format: Optional[AudioFormat] = None, + target_streaming_delay_ms: Optional[int] = None, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, http_headers: Optional[Mapping[str, str]] = None, @@ -154,6 +159,7 @@ async def transcribe_stream( async with await self.connect( model=model, audio_format=audio_format, + target_streaming_delay_ms=target_streaming_delay_ms, server_url=server_url, timeout_ms=timeout_ms, http_headers=http_headers, @@ -164,6 +170,7 @@ async def _send() -> None: if connection.is_closed: break await connection.send_audio(chunk) + await connection.flush_audio() await connection.end_audio() send_task = asyncio.create_task(_send()) From d303706ac9041713aa7daa886f3a7bb865918cfb Mon Sep 17 00:00:00 2001 From: jean-malo Date: Thu, 26 Feb 2026 11:28:51 +0100 Subject: [PATCH 2/2] ci: skip dual-delay realtime example in run_examples --- scripts/run_examples.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/run_examples.sh b/scripts/run_examples.sh index 998b8dbe..eca854b4 100755 --- a/scripts/run_examples.sh +++ b/scripts/run_examples.sh @@ -48,6 +48,7 @@ exclude_files=( "examples/mistral/agents/async_conversation_run_mcp_remote.py" "examples/mistral/audio/async_realtime_transcription_microphone.py" "examples/mistral/audio/async_realtime_transcription_stream.py" + "examples/mistral/audio/async_realtime_transcription_dual_delay_microphone.py" ) # Files that require extra dependencies (agents, mcp, audio, etc.)