From 5330a216b66e807fcba5df96e68ce5bf14530eba Mon Sep 17 00:00:00 2001
From: Tim Cadenbach <DeeJay@tcdev.de>
Date: Tue, 31 Mar 2026 11:22:53 +0200
Subject: [PATCH 1/2] Draft for a DeepL Voice implementation

---
 DeepL/DeepL.csproj               |   1 +
 DeepL/DeepLClient.cs             |  76 ++++++++-
 DeepL/IVoiceManager.cs           |  28 ++++
 DeepL/IVoiceSession.cs           |  77 +++++++++
 DeepL/Model/TargetMediaChunk.cs  |  68 ++++++++
 DeepL/Model/TranscriptSegment.cs |  29 ++++
 DeepL/Model/TranscriptUpdate.cs  |  41 +++++
 DeepL/Model/VoiceSessionInfo.cs  |  40 +++++
 DeepL/Model/VoiceStreamError.cs  |  41 +++++
 DeepL/SourceLanguageMode.cs      |  29 ++++
 DeepL/SourceMediaContentType.cs  |  68 ++++++++
 DeepL/TargetMediaVoice.cs        |  32 ++++
 DeepL/VoiceMessageFormat.cs      |  29 ++++
 DeepL/VoiceSession.cs            | 258 +++++++++++++++++++++++++++++++
 DeepL/VoiceSessionOptions.cs     |  70 +++++++++
 DeepLTests/VoiceSessionTest.cs   | 180 +++++++++++++++++++++
 16 files changed, 1066 insertions(+), 1 deletion(-)
 create mode 100644 DeepL/IVoiceManager.cs
 create mode 100644 DeepL/IVoiceSession.cs
 create mode 100644 DeepL/Model/TargetMediaChunk.cs
 create mode 100644 DeepL/Model/TranscriptSegment.cs
 create mode 100644 DeepL/Model/TranscriptUpdate.cs
 create mode 100644 DeepL/Model/VoiceSessionInfo.cs
 create mode 100644 DeepL/Model/VoiceStreamError.cs
 create mode 100644 DeepL/SourceLanguageMode.cs
 create mode 100644 DeepL/SourceMediaContentType.cs
 create mode 100644 DeepL/TargetMediaVoice.cs
 create mode 100644 DeepL/VoiceMessageFormat.cs
 create mode 100644 DeepL/VoiceSession.cs
 create mode 100644 DeepL/VoiceSessionOptions.cs
 create mode 100644 DeepLTests/VoiceSessionTest.cs
diff --git a/DeepL/DeepL.csproj b/DeepL/DeepL.csproj
index f6319aa..c6c8400 100644
--- a/DeepL/DeepL.csproj
+++ b/DeepL/DeepL.csproj
@@ -34,6 +34,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.Http.Polly" Version="5.0.1"/>
     <PackageReference Include="System.Text.Json" Version="5.0.2"/>
+    <PackageReference Include="System.Net.WebSockets.Client" Version="4.3.2" Condition="'$(TargetFramework)' == 'netstandard2.0'"/>
   </ItemGroup>
 
 
diff --git a/DeepL/DeepLClient.cs b/DeepL/DeepLClient.cs
index 6a2dc91..73cb4d4 100644
--- a/DeepL/DeepLClient.cs
+++ b/DeepL/DeepLClient.cs
@@ -6,6 +6,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
+using System.Net.WebSockets;
 using System.Text.Json;
 using System.Text.Json.Serialization;
 using System.Threading;
@@ -54,7 +55,7 @@ Task<WriteResult> RephraseTextAsync(
   ///   Client for the DeepL API. To use the DeepL API, initialize an instance of this class using your DeepL
   ///   Authentication Key. All functions are thread-safe, aside from <see cref="DeepLClient.Dispose" />.
   /// </summary>
-  public sealed class DeepLClient : Translator, IWriter, IGlossaryManager, IStyleRuleManager {
+  public sealed class DeepLClient : Translator, IWriter, IGlossaryManager, IStyleRuleManager, IVoiceManager {
     /// <summary>Initializes a new instance of the <see cref="AuthorizationException" /> class.</summary>
     /// <param name="message">The message that describes the error.</param>
     public DeepLClient(string authKey, DeepLClientOptions? options = null) : base(authKey, options) { }
@@ -939,6 +940,79 @@ private static (string Key, string Value)[] CreateLanguageQueryParams(
       DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
     };
 
+    /// <inheritdoc />
+    public async Task<IVoiceSession> CreateVoiceSessionAsync(
+          VoiceSessionOptions options,
+          CancellationToken cancellationToken = default) {
+      if (options == null) {
+        throw new ArgumentNullException(nameof(options));
+      }
+
+      if (options.TargetLanguages == null || options.TargetLanguages.Length == 0) {
+        throw new ArgumentException("At least one target language must be specified");
+      }
+
+      if (options.TargetLanguages.Length > 5) {
+        throw new ArgumentException("Maximum 5 target languages per session");
+      }
+
+      var requestData = new Dictionary<string, object> {
+        ["source_media_content_type"] = options.SourceMediaContentType,
+        ["target_languages"] = options.TargetLanguages
+      };
+
+      if (options.MessageFormat != null) {
+        requestData["message_format"] = options.MessageFormat.Value.ToApiValue();
+      }
+
+      if (options.SourceLanguage != null) {
+        requestData["source_language"] = options.SourceLanguage;
+      }
+
+      if (options.SourceLanguageMode != null) {
+        requestData["source_language_mode"] = options.SourceLanguageMode.Value.ToApiValue();
+      }
+
+      if (options.TargetMediaLanguages != null) {
+        requestData["target_media_languages"] = options.TargetMediaLanguages;
+      }
+
+      if (options.TargetMediaContentType != null) {
+        requestData["target_media_content_type"] = options.TargetMediaContentType;
+      }
+
+      if (options.TargetMediaVoice != null) {
+        requestData["target_media_voice"] = options.TargetMediaVoice.Value.ToApiValue();
+      }
+
+      if (options.GlossaryId != null) {
+        requestData["glossary_id"] = options.GlossaryId;
+      }
+
+      if (options.Formality != null) {
+        requestData["formality"] = options.Formality;
+      }
+
+      using var responseMessage = await _client
+            .ApiPostJsonAsync("v3/voice/realtime", cancellationToken, requestData, SerializationOptions)
+            .ConfigureAwait(false);
+
+      await DeepLHttpClient.CheckStatusCodeAsync(responseMessage).ConfigureAwait(false);
+      var sessionInfo = await JsonUtils.DeserializeAsync<VoiceSessionInfo>(responseMessage).ConfigureAwait(false);
+
+      // Establish WebSocket connection
+      var wsUri = new Uri($"{sessionInfo.StreamingUrl}?token={Uri.EscapeDataString(sessionInfo.Token)}");
+      var webSocket = new ClientWebSocket();
+      try {
+        await webSocket.ConnectAsync(wsUri, cancellationToken).ConfigureAwait(false);
+      } catch (Exception ex) {
+        webSocket.Dispose();
+        throw new DeepLException("Failed to establish Voice API WebSocket connection", ex);
+      }
+
+      return new VoiceSession(_client, webSocket, sessionInfo);
+    }
+
     /// <summary>Class used for JSON-deserialization of style rule list results.</summary>
     private readonly struct StyleRuleListResult {
       /// <summary>Initializes a new instance of <see cref="StyleRuleListResult" />, used for JSON deserialization.</summary>
diff --git a/DeepL/IVoiceManager.cs b/DeepL/IVoiceManager.cs
new file mode 100644
index 0000000..afc2e6f
--- /dev/null
+++ b/DeepL/IVoiceManager.cs
@@ -0,0 +1,28 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace DeepL {
+  /// <summary>Interface for creating Voice API streaming sessions.</summary>
+  public interface IVoiceManager : IDisposable {
+    /// <summary>
+    ///   Creates a new Voice API streaming session for real-time speech transcription and translation.
+    ///   This requests a session from the DeepL API and establishes a WebSocket connection.
+    /// </summary>
+    /// <param name="options">Options controlling session configuration including audio format, languages, etc.</param>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <returns>An <see cref="IVoiceSession" /> for streaming audio and receiving transcripts.</returns>
+    /// <exception cref="ArgumentException">If any option is invalid.</exception>
+    /// <exception cref="DeepLException">
+    ///   If any error occurs while communicating with the DeepL API, a
+    ///   <see cref="DeepLException" /> or a derived class will be thrown.
+    /// </exception>
+    Task<IVoiceSession> CreateVoiceSessionAsync(
+          VoiceSessionOptions options,
+          CancellationToken cancellationToken = default);
+  }
+}
diff --git a/DeepL/IVoiceSession.cs b/DeepL/IVoiceSession.cs
new file mode 100644
index 0000000..d5d0e6c
--- /dev/null
+++ b/DeepL/IVoiceSession.cs
@@ -0,0 +1,77 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+using DeepL.Model;
+
+namespace DeepL {
+  /// <summary>
+  ///   Represents an active Voice API streaming session. Provides methods for sending audio data and receiving
+  ///   real-time transcriptions and translations via events.
+  /// </summary>
+  /// <remarks>
+  ///   Events fire on a background thread. Consumers are responsible for marshaling to the appropriate
+  ///   synchronization context if needed. Dispose the session to close the WebSocket connection.
+  /// </remarks>
+  public interface IVoiceSession : IDisposable {
+    /// <summary>Raised when a source transcript update is received from the server.</summary>
+    event EventHandler<TranscriptUpdate>? SourceTranscriptUpdated;
+
+    /// <summary>Raised when a target transcript update is received from the server.</summary>
+    event EventHandler<TranscriptUpdate>? TargetTranscriptUpdated;
+
+    /// <summary>
+    ///   Raised when a target media audio chunk is received from the server. This feature is in closed beta.
+    /// </summary>
+    event EventHandler<TargetMediaChunk>? TargetMediaChunkReceived;
+
+    /// <summary>Raised when an error message is received from the WebSocket connection.</summary>
+    event EventHandler<VoiceStreamError>? ErrorReceived;
+
+    /// <summary>Raised when the end-of-stream message is received, indicating all outputs are complete.</summary>
+    event EventHandler? StreamEnded;
+
+    /// <summary>The unique session identifier.</summary>
+    string? SessionId { get; }
+
+    /// <summary>Whether the WebSocket connection is currently open.</summary>
+    bool IsConnected { get; }
+
+    /// <summary>
+    ///   Sends a chunk of audio data to the server. The audio encoding must match the
+    ///   <see cref="VoiceSessionOptions.SourceMediaContentType" /> specified when creating the session.
+    /// </summary>
+    /// <param name="audioData">Audio data to send. Must not exceed 100 KB or 1 second duration.</param>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <exception cref="DeepLException">If the session is not connected or sending fails.</exception>
+    Task SendAudioAsync(byte[] audioData, CancellationToken cancellationToken = default);
+
+    /// <summary>
+    ///   Sends a chunk of audio data to the server using a memory-efficient overload.
+    /// </summary>
+    /// <param name="audioData">Audio data to send. Must not exceed 100 KB or 1 second duration.</param>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <exception cref="DeepLException">If the session is not connected or sending fails.</exception>
+    Task SendAudioAsync(ArraySegment<byte> audioData, CancellationToken cancellationToken = default);
+
+    /// <summary>
+    ///   Signals the end of the audio stream. Causes finalization of tentative transcript segments and
+    ///   triggers emission of final transcript updates, end-of-transcript, and end-of-stream messages.
+    ///   No more audio data can be sent after calling this method.
+    /// </summary>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <exception cref="DeepLException">If the session is not connected or sending fails.</exception>
+    Task EndAudioAsync(CancellationToken cancellationToken = default);
+
+    /// <summary>
+    ///   Requests a reconnection token and establishes a new WebSocket connection, resuming the session.
+    ///   This should be called when the WebSocket connection is lost unexpectedly.
+    /// </summary>
+    /// <param name="cancellationToken">The cancellation token to cancel the operation.</param>
+    /// <exception cref="DeepLException">If reconnection fails.</exception>
+    Task ReconnectAsync(CancellationToken cancellationToken = default);
+  }
+}
diff --git a/DeepL/Model/TargetMediaChunk.cs b/DeepL/Model/TargetMediaChunk.cs
new file mode 100644
index 0000000..f6b1522
--- /dev/null
+++ b/DeepL/Model/TargetMediaChunk.cs
@@ -0,0 +1,68 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>
+  ///   Represents a translated audio chunk from the Voice API. This feature is currently in closed beta.
+  ///   Audio data is provided as an array of base64-encoded indivisible chunks.
+  /// </summary>
+  public sealed class TargetMediaChunk {
+    /// <summary>Initializes a new instance of <see cref="TargetMediaChunk" />.</summary>
+    /// <param name="contentType">The content type of the audio data. Present in the first message.</param>
+    /// <param name="headers">Number of header packets at the start of the data array, or null if all are audio.</param>
+    /// <param name="data">Array of base64-encoded audio data packets.</param>
+    /// <param name="text">Text corresponding to this audio chunk, for subtitle synchronization.</param>
+    /// <param name="language">The target language of this audio chunk.</param>
+    /// <param name="duration">Duration of this audio chunk in seconds.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public TargetMediaChunk(
+          string? contentType,
+          int? headers,
+          string[] data,
+          string? text,
+          string? language,
+          double? duration) {
+      ContentType = contentType;
+      Headers = headers;
+      Data = data;
+      Text = text;
+      Language = language;
+      Duration = duration;
+    }
+
+    /// <summary>The content type of the audio data. Present in the first message of a sequence.</summary>
+    [JsonPropertyName("content_type")]
+    public string? ContentType { get; }
+
+    /// <summary>
+    ///   Number of packets at the start of <see cref="Data" /> that contain initialization/header data.
+    ///   Null or absent when all packets are audio data.
+    /// </summary>
+    [JsonPropertyName("headers")]
+    public int? Headers { get; }
+
+    /// <summary>Array of base64-encoded indivisible audio data packets.</summary>
+    [JsonPropertyName("data")]
+    public string[] Data { get; }
+
+    /// <summary>Text corresponding to this audio chunk, for subtitle synchronization.</summary>
+    [JsonPropertyName("text")]
+    public string? Text { get; }
+
+    /// <summary>The target language of this audio chunk.</summary>
+    [JsonPropertyName("language")]
+    public string? Language { get; }
+
+    /// <summary>Duration of this audio chunk in seconds.</summary>
+    [JsonPropertyName("duration")]
+    public double? Duration { get; }
+  }
+}
diff --git a/DeepL/Model/TranscriptSegment.cs b/DeepL/Model/TranscriptSegment.cs
new file mode 100644
index 0000000..b678ce2
--- /dev/null
+++ b/DeepL/Model/TranscriptSegment.cs
@@ -0,0 +1,29 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>A single text segment within a Voice API transcript update.</summary>
+  public sealed class TranscriptSegment {
+    /// <summary>Initializes a new instance of <see cref="TranscriptSegment" />.</summary>
+    /// <param name="text">The text content of this segment.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public TranscriptSegment(string text) {
+      Text = text;
+    }
+
+    /// <summary>The text content of this segment.</summary>
+    [JsonPropertyName("text")]
+    public string Text { get; }
+
+    /// <summary>Returns the text content of this segment.</summary>
+    public override string ToString() => Text;
+  }
+}
diff --git a/DeepL/Model/TranscriptUpdate.cs b/DeepL/Model/TranscriptUpdate.cs
new file mode 100644
index 0000000..9db2adc
--- /dev/null
+++ b/DeepL/Model/TranscriptUpdate.cs
@@ -0,0 +1,41 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>
+  ///   Represents a transcript update from the Voice API, containing concluded (finalized) and tentative
+  ///   (in-progress) text segments. Used for both source and target transcript updates.
+  /// </summary>
+  public sealed class TranscriptUpdate {
+    /// <summary>Initializes a new instance of <see cref="TranscriptUpdate" />.</summary>
+    /// <param name="concluded">Finalized text segments that will not change.</param>
+    /// <param name="tentative">Preliminary text segments that may be refined.</param>
+    /// <param name="language">The language code of this transcript update. Only present on target updates.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public TranscriptUpdate(TranscriptSegment[] concluded, TranscriptSegment[] tentative, string? language) {
+      Concluded = concluded;
+      Tentative = tentative;
+      Language = language;
+    }
+
+    /// <summary>Finalized text segments that will not change. These segments are sent once and remain fixed.</summary>
+    [JsonPropertyName("concluded")]
+    public TranscriptSegment[] Concluded { get; }
+
+    /// <summary>Preliminary text segments that may be refined as more audio context becomes available.</summary>
+    [JsonPropertyName("tentative")]
+    public TranscriptSegment[] Tentative { get; }
+
+    /// <summary>The language code of this transcript update. Only present on target transcript updates.</summary>
+    [JsonPropertyName("language")]
+    public string? Language { get; }
+  }
+}
diff --git a/DeepL/Model/VoiceSessionInfo.cs b/DeepL/Model/VoiceSessionInfo.cs
new file mode 100644
index 0000000..45aa899
--- /dev/null
+++ b/DeepL/Model/VoiceSessionInfo.cs
@@ -0,0 +1,40 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>Information about a Voice API session, received from the session request endpoint.</summary>
+  public sealed class VoiceSessionInfo {
+    /// <summary>Initializes a new instance of <see cref="VoiceSessionInfo" />.</summary>
+    /// <param name="streamingUrl">The WebSocket URL for establishing the stream connection.</param>
+    /// <param name="token">Ephemeral authentication token for the streaming endpoint.</param>
+    /// <param name="sessionId">Unique identifier for the session.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public VoiceSessionInfo(string streamingUrl, string token, string? sessionId) {
+      StreamingUrl = streamingUrl;
+      Token = token;
+      SessionId = sessionId;
+    }
+
+    /// <summary>The WebSocket URL to use for establishing the stream connection.</summary>
+    [JsonPropertyName("streaming_url")]
+    public string StreamingUrl { get; }
+
+    /// <summary>
+    ///   Ephemeral authentication token for the streaming endpoint. Valid for one-time use only.
+    /// </summary>
+    [JsonPropertyName("token")]
+    public string Token { get; }
+
+    /// <summary>Unique identifier for the session.</summary>
+    [JsonPropertyName("session_id")]
+    public string? SessionId { get; }
+  }
+}
diff --git a/DeepL/Model/VoiceStreamError.cs b/DeepL/Model/VoiceStreamError.cs
new file mode 100644
index 0000000..80a0311
--- /dev/null
+++ b/DeepL/Model/VoiceStreamError.cs
@@ -0,0 +1,41 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System.Text.Json.Serialization;
+
+namespace DeepL.Model {
+  /// <summary>Represents an error message received from the Voice API WebSocket connection.</summary>
+  public sealed class VoiceStreamError {
+    /// <summary>Initializes a new instance of <see cref="VoiceStreamError" />.</summary>
+    /// <param name="code">The error code.</param>
+    /// <param name="reason">The reason code for the error.</param>
+    /// <param name="message">A human-readable error message.</param>
+    /// <remarks>
+    ///   The constructor for this class (and all other Model classes) should not be used by library users. Ideally it
+    ///   would be marked <see langword="internal" />, but needs to be <see langword="public" /> for JSON deserialization.
+    ///   In future this function may have backwards-incompatible changes.
+    /// </remarks>
+    [JsonConstructor]
+    public VoiceStreamError(string? code, string? reason, string? message) {
+      Code = code;
+      Reason = reason;
+      Message = message;
+    }
+
+    /// <summary>The error code.</summary>
+    [JsonPropertyName("code")]
+    public string? Code { get; }
+
+    /// <summary>The reason code for the error.</summary>
+    [JsonPropertyName("reason")]
+    public string? Reason { get; }
+
+    /// <summary>A human-readable error message.</summary>
+    [JsonPropertyName("message")]
+    public string? Message { get; }
+
+    /// <summary>Returns the error message.</summary>
+    public override string ToString() => $"VoiceStreamError(code={Code}, reason={Reason}, message={Message})";
+  }
+}
diff --git a/DeepL/SourceLanguageMode.cs b/DeepL/SourceLanguageMode.cs
new file mode 100644
index 0000000..521037f
--- /dev/null
+++ b/DeepL/SourceLanguageMode.cs
@@ -0,0 +1,29 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+
+namespace DeepL {
+  /// <summary>Controls how the source language value is used in Voice API sessions.</summary>
+  public enum SourceLanguageMode {
+    /// <summary>Treats source language as a hint; server can override.</summary>
+    Auto,
+
+    /// <summary>Treats source language as mandatory; server must use this language.</summary>
+    Fixed
+  }
+
+  /// <summary>Extension methods for <see cref="SourceLanguageMode" />.</summary>
+  public static class SourceLanguageModeExtensions {
+    /// <summary>Retrieves the string representation used by the DeepL API.</summary>
+    /// <exception cref="ArgumentOutOfRangeException">If an unknown enum value is passed.</exception>
+    public static string ToApiValue(this SourceLanguageMode mode) {
+      return mode switch {
+        SourceLanguageMode.Auto => "auto",
+        SourceLanguageMode.Fixed => "fixed",
+        _ => throw new ArgumentOutOfRangeException(nameof(mode), mode, "Unrecognized source language mode value")
+      };
+    }
+  }
+}
diff --git a/DeepL/SourceMediaContentType.cs b/DeepL/SourceMediaContentType.cs
new file mode 100644
index 0000000..fe48105
--- /dev/null
+++ b/DeepL/SourceMediaContentType.cs
@@ -0,0 +1,68 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+namespace DeepL {
+  /// <summary>
+  ///   String constants for audio format content types supported by the DeepL Voice API.
+  ///   Use these when configuring <see cref="VoiceSessionOptions.SourceMediaContentType" />.
+  /// </summary>
+  public static class SourceMediaContentType {
+    /// <summary>Auto-detect container and codec. Supported for all formats except PCM.</summary>
+    public const string Auto = "audio/auto";
+
+    /// <summary>FLAC container with FLAC codec.</summary>
+    public const string Flac = "audio/flac";
+
+    /// <summary>MPEG container with MP3 codec.</summary>
+    public const string Mpeg = "audio/mpeg";
+
+    /// <summary>Ogg container with auto-detected codec (FLAC or OPUS).</summary>
+    public const string Ogg = "audio/ogg";
+
+    /// <summary>WebM container with OPUS codec.</summary>
+    public const string WebM = "audio/webm";
+
+    /// <summary>Matroska container with auto-detected codec.</summary>
+    public const string Matroska = "audio/x-matroska";
+
+    /// <summary>Ogg container with FLAC codec.</summary>
+    public const string OggFlac = "audio/ogg;codecs=flac";
+
+    /// <summary>Ogg container with OPUS codec.</summary>
+    public const string OggOpus = "audio/ogg;codecs=opus";
+
+    /// <summary>PCM signed 16-bit little-endian at 8000 Hz.</summary>
+    public const string PcmS16le8000 = "audio/pcm;encoding=s16le;rate=8000";
+
+    /// <summary>PCM signed 16-bit little-endian at 16000 Hz. Recommended for general use.</summary>
+    public const string PcmS16le16000 = "audio/pcm;encoding=s16le;rate=16000";
+
+    /// <summary>PCM signed 16-bit little-endian at 44100 Hz.</summary>
+    public const string PcmS16le44100 = "audio/pcm;encoding=s16le;rate=44100";
+
+    /// <summary>PCM signed 16-bit little-endian at 48000 Hz.</summary>
+    public const string PcmS16le48000 = "audio/pcm;encoding=s16le;rate=48000";
+
+    /// <summary>PCM A-Law at 8000 Hz (G.711).</summary>
+    public const string PcmAlaw8000 = "audio/pcm;encoding=alaw;rate=8000";
+
+    /// <summary>PCM µ-Law at 8000 Hz (G.711).</summary>
+    public const string PcmUlaw8000 = "audio/pcm;encoding=ulaw;rate=8000";
+
+    /// <summary>WebM container with OPUS codec (explicit).</summary>
+    public const string WebMOpus = "audio/webm;codecs=opus";
+
+    /// <summary>Matroska container with AAC codec.</summary>
+    public const string MatroskaAac = "audio/x-matroska;codecs=aac";
+
+    /// <summary>Matroska container with FLAC codec.</summary>
+    public const string MatroskaFlac = "audio/x-matroska;codecs=flac";
+
+    /// <summary>Matroska container with MP3 codec.</summary>
+    public const string MatroskaMp3 = "audio/x-matroska;codecs=mp3";
+
+    /// <summary>Matroska container with OPUS codec.</summary>
+    public const string MatroskaOpus = "audio/x-matroska;codecs=opus";
+  }
+}
diff --git a/DeepL/TargetMediaVoice.cs b/DeepL/TargetMediaVoice.cs
new file mode 100644
index 0000000..10b5c33
--- /dev/null
+++ b/DeepL/TargetMediaVoice.cs
@@ -0,0 +1,32 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+
+namespace DeepL {
+  /// <summary>
+  ///   Target audio voice selection for synthesized speech in Voice API sessions.
+  ///   This feature is currently in closed beta.
+  /// </summary>
+  public enum TargetMediaVoice {
+    /// <summary>Male voice.</summary>
+    Male,
+
+    /// <summary>Female voice.</summary>
+    Female
+  }
+
+  /// <summary>Extension methods for <see cref="TargetMediaVoice" />.</summary>
+  public static class TargetMediaVoiceExtensions {
+    /// <summary>Retrieves the string representation used by the DeepL API.</summary>
+    /// <exception cref="ArgumentOutOfRangeException">If an unknown enum value is passed.</exception>
+    public static string ToApiValue(this TargetMediaVoice voice) {
+      return voice switch {
+        TargetMediaVoice.Male => "male",
+        TargetMediaVoice.Female => "female",
+        _ => throw new ArgumentOutOfRangeException(nameof(voice), voice, "Unrecognized target media voice value")
+      };
+    }
+  }
+}
diff --git a/DeepL/VoiceMessageFormat.cs b/DeepL/VoiceMessageFormat.cs
new file mode 100644
index 0000000..d4aace6
--- /dev/null
+++ b/DeepL/VoiceMessageFormat.cs
@@ -0,0 +1,29 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+
+namespace DeepL {
+  /// <summary>Message encoding format for Voice API WebSocket communication.</summary>
+  public enum VoiceMessageFormat {
+    /// <summary>JSON-encoded messages sent as TEXT WebSocket frames. Binary fields are base64-encoded.</summary>
+    Json,
+
+    /// <summary>MessagePack-encoded messages sent as BINARY WebSocket frames. Binary fields are raw binary.</summary>
+    MessagePack
+  }
+
+  /// <summary>Extension methods for <see cref="VoiceMessageFormat" />.</summary>
+  public static class VoiceMessageFormatExtensions {
+    /// <summary>Retrieves the string representation used by the DeepL API.</summary>
+    /// <exception cref="ArgumentOutOfRangeException">If an unknown enum value is passed.</exception>
+    public static string ToApiValue(this VoiceMessageFormat format) {
+      return format switch {
+        VoiceMessageFormat.Json => "json",
+        VoiceMessageFormat.MessagePack => "msgpack",
+        _ => throw new ArgumentOutOfRangeException(nameof(format), format, "Unrecognized message format value")
+      };
+    }
+  }
+}
diff --git a/DeepL/VoiceSession.cs b/DeepL/VoiceSession.cs
new file mode 100644
index 0000000..0ff826f
--- /dev/null
+++ b/DeepL/VoiceSession.cs
@@ -0,0 +1,258 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+using System.Net.WebSockets;
+using System.Text;
+using System.Text.Json;
+using System.Threading;
+using System.Threading.Tasks;
+using DeepL.Internal;
+using DeepL.Model;
+
+namespace DeepL {
+  /// <summary>
+  ///   Internal implementation of <see cref="IVoiceSession" /> that manages a WebSocket connection
+  ///   to the DeepL Voice API for real-time speech transcription and translation.
+  /// </summary>
+  internal sealed class VoiceSession : IVoiceSession {
+    private static readonly JsonSerializerOptions JsonOptions = new JsonSerializerOptions {
+      PropertyNamingPolicy = JsonNamingPolicy.CamelCase
+    };
+
+    private readonly DeepLHttpClient _httpClient;
+    private readonly object _lock = new object();
+    private ClientWebSocket _webSocket;
+    private CancellationTokenSource _receiveCts;
+    private Task? _receiveTask;
+    private string _lastToken;
+    private bool _disposed;
+
+    /// <inheritdoc />
+    public event EventHandler<TranscriptUpdate>? SourceTranscriptUpdated;
+
+    /// <inheritdoc />
+    public event EventHandler<TranscriptUpdate>? TargetTranscriptUpdated;
+
+    /// <inheritdoc />
+    public event EventHandler<TargetMediaChunk>? TargetMediaChunkReceived;
+
+    /// <inheritdoc />
+    public event EventHandler<VoiceStreamError>? ErrorReceived;
+
+    /// <inheritdoc />
+    public event EventHandler? StreamEnded;
+
+    /// <inheritdoc />
+    public string? SessionId { get; private set; }
+
+    /// <inheritdoc />
+    public bool IsConnected {
+      get {
+        lock (_lock) {
+          return !_disposed && _webSocket.State == WebSocketState.Open;
+        }
+      }
+    }
+
+    internal VoiceSession(
+          DeepLHttpClient httpClient,
+          ClientWebSocket webSocket,
+          VoiceSessionInfo sessionInfo) {
+      _httpClient = httpClient;
+      _webSocket = webSocket;
+      _lastToken = sessionInfo.Token;
+      SessionId = sessionInfo.SessionId;
+      _receiveCts = new CancellationTokenSource();
+      _receiveTask = Task.Run(() => ReceiveLoopAsync(_receiveCts.Token));
+    }
+
+    /// <inheritdoc />
+    public async Task SendAudioAsync(byte[] audioData, CancellationToken cancellationToken = default) {
+      await SendAudioAsync(new ArraySegment<byte>(audioData), cancellationToken).ConfigureAwait(false);
+    }
+
+    /// <inheritdoc />
+    public async Task SendAudioAsync(ArraySegment<byte> audioData, CancellationToken cancellationToken = default) {
+      EnsureConnected();
+
+      var base64Data = Convert.ToBase64String(
+            audioData.Array ?? throw new ArgumentException("Audio data array is null"),
+            audioData.Offset,
+            audioData.Count);
+      var message = $"{{\"source_media_chunk\":{{\"data\":\"{base64Data}\"}}}}";
+      var bytes = Encoding.UTF8.GetBytes(message);
+
+      await _webSocket.SendAsync(
+            new ArraySegment<byte>(bytes),
+            WebSocketMessageType.Text,
+            endOfMessage: true,
+            cancellationToken).ConfigureAwait(false);
+    }
+
+    /// <inheritdoc />
+    public async Task EndAudioAsync(CancellationToken cancellationToken = default) {
+      EnsureConnected();
+
+      var message = "{\"end_of_source_media\":{}}";
+      var bytes = Encoding.UTF8.GetBytes(message);
+
+      await _webSocket.SendAsync(
+            new ArraySegment<byte>(bytes),
+            WebSocketMessageType.Text,
+            endOfMessage: true,
+            cancellationToken).ConfigureAwait(false);
+    }
+
+    /// <inheritdoc />
+    public async Task ReconnectAsync(CancellationToken cancellationToken = default) {
+      // Stop current receive loop
+      _receiveCts.Cancel();
+      if (_receiveTask != null) {
+        try {
+          await _receiveTask.ConfigureAwait(false);
+        } catch (OperationCanceledException) {
+          // Expected
+        }
+      }
+
+      // Close existing WebSocket if still open
+      if (_webSocket.State == WebSocketState.Open || _webSocket.State == WebSocketState.CloseReceived) {
+        try {
+          await _webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Reconnecting", CancellationToken.None)
+                .ConfigureAwait(false);
+        } catch (WebSocketException) {
+          // Ignore close errors during reconnection
+        }
+      }
+
+      _webSocket.Dispose();
+
+      // Request new token via GET v3/voice/realtime?token=<lastToken>
+      var queryParams = new[] { ("token", _lastToken) };
+      using var responseMessage = await _httpClient.ApiGetAsync("v3/voice/realtime", cancellationToken, queryParams)
+            .ConfigureAwait(false);
+      await DeepLHttpClient.CheckStatusCodeAsync(responseMessage).ConfigureAwait(false);
+      var sessionInfo = await JsonUtils.DeserializeAsync<VoiceSessionInfo>(responseMessage).ConfigureAwait(false);
+
+      _lastToken = sessionInfo.Token;
+      SessionId = sessionInfo.SessionId;
+
+      // Establish new WebSocket connection
+      var wsUri = new Uri($"{sessionInfo.StreamingUrl}?token={Uri.EscapeDataString(sessionInfo.Token)}");
+      _webSocket = new ClientWebSocket();
+      await _webSocket.ConnectAsync(wsUri, cancellationToken).ConfigureAwait(false);
+
+      // Restart receive loop
+      _receiveCts = new CancellationTokenSource();
+      _receiveTask = Task.Run(() => ReceiveLoopAsync(_receiveCts.Token));
+    }
+
+    /// <summary>Background loop that receives and dispatches WebSocket messages.</summary>
+    private async Task ReceiveLoopAsync(CancellationToken cancellationToken) {
+      var buffer = new byte[64 * 1024]; // 64 KB buffer
+      var messageBuilder = new StringBuilder();
+
+      try {
+        while (!cancellationToken.IsCancellationRequested &&
+               _webSocket.State == WebSocketState.Open) {
+          messageBuilder.Clear();
+          WebSocketReceiveResult result;
+          do {
+            result = await _webSocket.ReceiveAsync(
+                  new ArraySegment<byte>(buffer), cancellationToken).ConfigureAwait(false);
+
+            if (result.MessageType == WebSocketMessageType.Close) {
+              return;
+            }
+
+            if (result.MessageType == WebSocketMessageType.Text) {
+              messageBuilder.Append(Encoding.UTF8.GetString(buffer, 0, result.Count));
+            }
+          } while (!result.EndOfMessage);
+
+          if (messageBuilder.Length > 0) {
+            DispatchMessage(messageBuilder.ToString());
+          }
+        }
+      } catch (OperationCanceledException) {
+        // Normal cancellation
+      } catch (WebSocketException) {
+        // Connection lost — consumer should call ReconnectAsync
+      }
+    }
+
+    /// <summary>Parses a JSON message from the WebSocket and dispatches it to the appropriate event.</summary>
+    private void DispatchMessage(string json) {
+      try {
+        using var document = JsonDocument.Parse(json);
+        var root = document.RootElement;
+
+        if (root.TryGetProperty("source_transcript_update", out var sourceUpdate)) {
+          var update = JsonSerializer.Deserialize<TranscriptUpdate>(sourceUpdate.GetRawText(), JsonOptions);
+          if (update != null) {
+            SourceTranscriptUpdated?.Invoke(this, update);
+          }
+        } else if (root.TryGetProperty("target_transcript_update", out var targetUpdate)) {
+          var update = JsonSerializer.Deserialize<TranscriptUpdate>(targetUpdate.GetRawText(), JsonOptions);
+          if (update != null) {
+            TargetTranscriptUpdated?.Invoke(this, update);
+          }
+        } else if (root.TryGetProperty("target_media_chunk", out var mediaChunk)) {
+          var chunk = JsonSerializer.Deserialize<TargetMediaChunk>(mediaChunk.GetRawText(), JsonOptions);
+          if (chunk != null) {
+            TargetMediaChunkReceived?.Invoke(this, chunk);
+          }
+        } else if (root.TryGetProperty("end_of_source_transcript", out _)) {
+          // Source transcript complete — no special event needed, handled via StreamEnded
+        } else if (root.TryGetProperty("end_of_target_transcript", out _)) {
+          // Target transcript complete — no special event needed, handled via StreamEnded
+        } else if (root.TryGetProperty("end_of_target_media", out _)) {
+          // Target media complete — no special event needed, handled via StreamEnded
+        } else if (root.TryGetProperty("end_of_stream", out _)) {
+          StreamEnded?.Invoke(this, EventArgs.Empty);
+        } else if (root.TryGetProperty("error", out var errorElement)) {
+          var error = JsonSerializer.Deserialize<VoiceStreamError>(errorElement.GetRawText(), JsonOptions);
+          if (error != null) {
+            ErrorReceived?.Invoke(this, error);
+          }
+        }
+      } catch (JsonException) {
+        // Ignore malformed messages
+      }
+    }
+
+    private void EnsureConnected() {
+      if (_disposed) {
+        throw new ObjectDisposedException(nameof(VoiceSession));
+      }
+
+      if (_webSocket.State != WebSocketState.Open) {
+        throw new DeepLException("Voice session WebSocket is not connected");
+      }
+    }
+
+    /// <summary>Releases the WebSocket connection and stops the receive loop.</summary>
+    public void Dispose() {
+      lock (_lock) {
+        if (_disposed) return;
+        _disposed = true;
+      }
+
+      _receiveCts.Cancel();
+
+      try {
+        if (_webSocket.State == WebSocketState.Open) {
+          _webSocket.CloseAsync(WebSocketCloseStatus.NormalClosure, "Disposing", CancellationToken.None)
+                .GetAwaiter().GetResult();
+        }
+      } catch (WebSocketException) {
+        // Ignore errors during disposal
+      }
+
+      _webSocket.Dispose();
+      _receiveCts.Dispose();
+    }
+  }
+}
diff --git a/DeepL/VoiceSessionOptions.cs b/DeepL/VoiceSessionOptions.cs
new file mode 100644
index 0000000..cf1235c
--- /dev/null
+++ b/DeepL/VoiceSessionOptions.cs
@@ -0,0 +1,70 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+namespace DeepL {
+  /// <summary>
+  ///   Options to control Voice API session creation. These options are provided to
+  ///   <see cref="DeepLClient.CreateVoiceSessionAsync" />.
+  /// </summary>
+  public sealed class VoiceSessionOptions {
+    /// <summary>Initializes a new <see cref="VoiceSessionOptions" /> object.</summary>
+    public VoiceSessionOptions() { }
+
+    /// <summary>
+    ///   The audio format for streaming, which specifies container, codec, and encoding parameters.
+    ///   Use constants from <see cref="SourceMediaContentType" /> for supported values. Required.
+    /// </summary>
+    public string SourceMediaContentType { get; set; } = DeepL.SourceMediaContentType.Auto;
+
+    /// <summary>
+    ///   Message encoding format for WebSocket communication. Defaults to <see cref="VoiceMessageFormat.Json" />.
+    /// </summary>
+    public VoiceMessageFormat? MessageFormat { get; set; }
+
+    /// <summary>
+    ///   The source language of the audio stream, or null for auto-detection.
+    ///   Must be a supported Voice API source language complying with IETF BCP 47 language tags.
+    /// </summary>
+    public string? SourceLanguage { get; set; }
+
+    /// <summary>
+    ///   Controls how the <see cref="SourceLanguage" /> value is used.
+    ///   Defaults to <see cref="DeepL.SourceLanguageMode.Auto" /> if not specified.
+    /// </summary>
+    public SourceLanguageMode? SourceLanguageMode { get; set; }
+
+    /// <summary>
+    ///   List of target languages for translation. The stream will emit translations for each language.
+    ///   Maximum 5 target languages per session. Language identifiers must comply with IETF BCP 47.
+    /// </summary>
+    public string[] TargetLanguages { get; set; } = System.Array.Empty<string>();
+
+    /// <summary>
+    ///   List of target languages for which to generate synthesized audio. This feature is in closed beta.
+    ///   Languages specified here will automatically be added to <see cref="TargetLanguages" /> if not already present.
+    ///   Maximum 5 target media languages per session.
+    /// </summary>
+    public string[]? TargetMediaLanguages { get; set; }
+
+    /// <summary>
+    ///   The audio format for synthesized target media streaming. This feature is in closed beta.
+    ///   Defaults to <c>"audio/webm;codecs=opus"</c> if not specified.
+    /// </summary>
+    public string? TargetMediaContentType { get; set; }
+
+    /// <summary>
+    ///   Target audio voice selection for synthesized speech. This feature is in closed beta.
+    /// </summary>
+    public TargetMediaVoice? TargetMediaVoice { get; set; }
+
+    /// <summary>A glossary ID to use for translation.</summary>
+    public string? GlossaryId { get; set; }
+
+    /// <summary>
+    ///   Sets whether the translated text should lean towards formal or informal language.
+    ///   Possible values: <c>"default"</c>, <c>"formal"</c>, <c>"more"</c>, <c>"informal"</c>, <c>"less"</c>.
+    /// </summary>
+    public string? Formality { get; set; }
+  }
+}
diff --git a/DeepLTests/VoiceSessionTest.cs b/DeepLTests/VoiceSessionTest.cs
new file mode 100644
index 0000000..6f494a4
--- /dev/null
+++ b/DeepLTests/VoiceSessionTest.cs
@@ -0,0 +1,180 @@
+// Copyright 2025 DeepL SE (https://www.deepl.com)
+// Use of this source code is governed by an MIT
+// license that can be found in the LICENSE file.
+
+using System;
+using System.Collections.Generic;
+using System.Text.Json;
+using System.Threading.Tasks;
+using DeepL;
+using DeepL.Model;
+using Xunit;
+
+namespace DeepLTests {
+  /// <summary>Unit tests for Voice API types that do not require API access.</summary>
+  public sealed class VoiceSessionUnitTest {
+    [Fact]
+    public void TestVoiceSessionOptionsDefaults() {
+      var options = new VoiceSessionOptions();
+      Assert.Equal(SourceMediaContentType.Auto, options.SourceMediaContentType);
+      Assert.Null(options.MessageFormat);
+      Assert.Null(options.SourceLanguage);
+      Assert.Null(options.SourceLanguageMode);
+      Assert.NotNull(options.TargetLanguages);
+      Assert.Empty(options.TargetLanguages);
+      Assert.Null(options.TargetMediaLanguages);
+      Assert.Null(options.TargetMediaContentType);
+      Assert.Null(options.TargetMediaVoice);
+      Assert.Null(options.GlossaryId);
+      Assert.Null(options.Formality);
+    }
+
+    [Fact]
+    public void TestVoiceSessionOptionsConfiguration() {
+      var options = new VoiceSessionOptions {
+        SourceMediaContentType = SourceMediaContentType.OggOpus,
+        MessageFormat = VoiceMessageFormat.Json,
+        SourceLanguage = "en",
+        SourceLanguageMode = DeepL.SourceLanguageMode.Fixed,
+        TargetLanguages = new[] { "de", "fr", "es" },
+        TargetMediaVoice = TargetMediaVoice.Female,
+        GlossaryId = "test-glossary-id",
+        Formality = "formal"
+      };
+
+      Assert.Equal(SourceMediaContentType.OggOpus, options.SourceMediaContentType);
+      Assert.Equal(VoiceMessageFormat.Json, options.MessageFormat);
+      Assert.Equal("en", options.SourceLanguage);
+      Assert.Equal(DeepL.SourceLanguageMode.Fixed, options.SourceLanguageMode);
+      Assert.Equal(3, options.TargetLanguages.Length);
+      Assert.Equal(TargetMediaVoice.Female, options.TargetMediaVoice);
+      Assert.Equal("test-glossary-id", options.GlossaryId);
+      Assert.Equal("formal", options.Formality);
+    }
+
+    [Fact]
+    public void TestVoiceMessageFormatApiValues() {
+      Assert.Equal("json", VoiceMessageFormat.Json.ToApiValue());
+      Assert.Equal("msgpack", VoiceMessageFormat.MessagePack.ToApiValue());
+    }
+
+    [Fact]
+    public void TestSourceLanguageModeApiValues() {
+      Assert.Equal("auto", DeepL.SourceLanguageMode.Auto.ToApiValue());
+      Assert.Equal("fixed", DeepL.SourceLanguageMode.Fixed.ToApiValue());
+    }
+
+    [Fact]
+    public void TestTargetMediaVoiceApiValues() {
+      Assert.Equal("male", TargetMediaVoice.Male.ToApiValue());
+      Assert.Equal("female", TargetMediaVoice.Female.ToApiValue());
+    }
+
+    [Fact]
+    public void TestVoiceSessionInfoDeserialization() {
+      var json = "{\"streaming_url\":\"wss://api.deepl.com/v3/voice/realtime/connect\"," +
+                 "\"token\":\"test-token-123\"," +
+                 "\"session_id\":\"test-session-456\"}";
+      var info = JsonSerializer.Deserialize<VoiceSessionInfo>(json);
+      Assert.NotNull(info);
+      Assert.Equal("wss://api.deepl.com/v3/voice/realtime/connect", info!.StreamingUrl);
+      Assert.Equal("test-token-123", info.Token);
+      Assert.Equal("test-session-456", info.SessionId);
+    }
+
+    [Fact]
+    public void TestTranscriptUpdateDeserialization() {
+      var json = "{\"concluded\":[{\"text\":\"Hello \"}],\"tentative\":[{\"text\":\"world\"}],\"language\":\"de\"}";
+      var update = JsonSerializer.Deserialize<TranscriptUpdate>(json);
+      Assert.NotNull(update);
+      Assert.Single(update!.Concluded);
+      Assert.Equal("Hello ", update.Concluded[0].Text);
+      Assert.Single(update.Tentative);
+      Assert.Equal("world", update.Tentative[0].Text);
+      Assert.Equal("de", update.Language);
+    }
+
+    [Fact]
+    public void TestTranscriptSegmentDeserialization() {
+      var json = "{\"text\":\"Hello world\"}";
+      var segment = JsonSerializer.Deserialize<TranscriptSegment>(json);
+      Assert.NotNull(segment);
+      Assert.Equal("Hello world", segment!.Text);
+      Assert.Equal("Hello world", segment.ToString());
+    }
+
+    [Fact]
+    public void TestTargetMediaChunkDeserialization() {
+      var json = "{\"content_type\":\"audio/webm;codecs=opus\"," +
+                 "\"headers\":1," +
+                 "\"data\":[\"base64data1\",\"base64data2\"]," +
+                 "\"text\":\"Hallo Welt\"," +
+                 "\"language\":\"de\"," +
+                 "\"duration\":1.5}";
+      var chunk = JsonSerializer.Deserialize<TargetMediaChunk>(json);
+      Assert.NotNull(chunk);
+      Assert.Equal("audio/webm;codecs=opus", chunk!.ContentType);
+      Assert.Equal(1, chunk.Headers);
+      Assert.Equal(2, chunk.Data.Length);
+      Assert.Equal("base64data1", chunk.Data[0]);
+      Assert.Equal("Hallo Welt", chunk.Text);
+      Assert.Equal("de", chunk.Language);
+      Assert.Equal(1.5, chunk.Duration);
+    }
+
+    [Fact]
+    public void TestVoiceStreamErrorDeserialization() {
+      var json = "{\"code\":\"4001\",\"reason\":\"invalid_audio\",\"message\":\"Audio format not supported\"}";
+      var error = JsonSerializer.Deserialize<VoiceStreamError>(json);
+      Assert.NotNull(error);
+      Assert.Equal("4001", error!.Code);
+      Assert.Equal("invalid_audio", error.Reason);
+      Assert.Equal("Audio format not supported", error.Message);
+    }
+
+    [Fact]
+    public void TestSourceMediaContentTypeConstants() {
+      Assert.Equal("audio/auto", SourceMediaContentType.Auto);
+      Assert.Equal("audio/flac", SourceMediaContentType.Flac);
+      Assert.Equal("audio/mpeg", SourceMediaContentType.Mpeg);
+      Assert.Equal("audio/ogg", SourceMediaContentType.Ogg);
+      Assert.Equal("audio/webm", SourceMediaContentType.WebM);
+      Assert.Equal("audio/x-matroska", SourceMediaContentType.Matroska);
+      Assert.Equal("audio/ogg;codecs=flac", SourceMediaContentType.OggFlac);
+      Assert.Equal("audio/ogg;codecs=opus", SourceMediaContentType.OggOpus);
+      Assert.Equal("audio/pcm;encoding=s16le;rate=16000", SourceMediaContentType.PcmS16le16000);
+      Assert.Equal("audio/webm;codecs=opus", SourceMediaContentType.WebMOpus);
+    }
+  }
+
+  /// <summary>Tests for Voice API session creation that require API access.</summary>
+  public sealed class VoiceSessionClientTest : BaseDeepLTest {
+    [Fact]
+    public async Task TestCreateSessionRequiresTargetLanguages() {
+      var client = CreateTestClient();
+      var options = new VoiceSessionOptions {
+        SourceMediaContentType = SourceMediaContentType.OggOpus
+      };
+      await Assert.ThrowsAsync<ArgumentException>(
+            () => client.CreateVoiceSessionAsync(options));
+    }
+
+    [Fact]
+    public async Task TestCreateSessionRejectsExcessiveTargetLanguages() {
+      var client = CreateTestClient();
+      var options = new VoiceSessionOptions {
+        SourceMediaContentType = SourceMediaContentType.OggOpus,
+        TargetLanguages = new[] { "de", "fr", "es", "it", "nl", "pt" }
+      };
+      await Assert.ThrowsAsync<ArgumentException>(
+            () => client.CreateVoiceSessionAsync(options));
+    }
+
+    [Fact]
+    public async Task TestCreateSessionRejectsNullOptions() {
+      var client = CreateTestClient();
+      await Assert.ThrowsAsync<ArgumentNullException>(
+            () => client.CreateVoiceSessionAsync(null!));
+    }
+  }
+}

From 97f977abf2031ae30c86ce8fe82a18483a2ce918 Mon Sep 17 00:00:00 2001
From: Tim Cadenbach <DeeJay@tcdev.de>
Date: Wed, 8 Apr 2026 21:20:38 +0200
Subject: [PATCH 2/2] Update DeepL/VoiceSessionOptions.cs

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 DeepL/VoiceSessionOptions.cs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/DeepL/VoiceSessionOptions.cs b/DeepL/VoiceSessionOptions.cs
index cf1235c..81a6cac 100644
--- a/DeepL/VoiceSessionOptions.cs
+++ b/DeepL/VoiceSessionOptions.cs
@@ -18,7 +18,8 @@ public VoiceSessionOptions() { }
     public string SourceMediaContentType { get; set; } = DeepL.SourceMediaContentType.Auto;
 
     /// <summary>
-    ///   Message encoding format for WebSocket communication. Defaults to <see cref="VoiceMessageFormat.Json" />.
+    ///   Message encoding format for WebSocket communication. If <c>null</c>, the API default is used
+    ///   (currently <see cref="VoiceMessageFormat.Json" />).
     /// </summary>
     public VoiceMessageFormat? MessageFormat { get; set; }