From 2271986fffc24e66e20569c163eafdaf6470fee4 Mon Sep 17 00:00:00 2001 From: Georgios Hadjiharalambous Date: Wed, 25 Feb 2026 17:27:46 +0000 Subject: [PATCH] add timestamp docs --- docs/speech-to-text/realtime/turn-detection.mdx | 9 +++++++++ spec/realtime.yaml | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/docs/speech-to-text/realtime/turn-detection.mdx b/docs/speech-to-text/realtime/turn-detection.mdx index 54f93a7..af22593 100644 --- a/docs/speech-to-text/realtime/turn-detection.mdx +++ b/docs/speech-to-text/realtime/turn-detection.mdx @@ -111,6 +111,15 @@ The `ForceEndOfUtterance` message is sent to the server to trigger an end of utt } ``` +For higher accuracy you can add an (optional) timestamp parameter to the message. It is the timestamp of the audio data that corresponds to the force end of utterance request. +It's the number of seconds since the beginning of the audio. + +```json +{ + "message": "ForceEndOfUtterance", + "timestamp": 63.5 +} + You can also use `ForceEndOfUtterance` with multi-channel diarization: ```json diff --git a/spec/realtime.yaml b/spec/realtime.yaml index 5e73851..276dac4 100644 --- a/spec/realtime.yaml +++ b/spec/realtime.yaml @@ -393,6 +393,11 @@ components: channel: type: string description: The channel to request finalized transcript from. This field is only seen in multichannel. + timestamp: + type: 'number' + format: 'float' + minimum: 0, + description: "Timestamp of the audio data that corresponds to the force end of utterance request. It's the number of seconds since the beginning of the audio." required: - message SetRecognitionConfig: