diff --git a/.gitignore b/.gitignore
index d5a631f..d8789d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,9 @@ Cargo.lock
 .DS_Store
 .cargo/config.toml
 
+# Claude Code runtime state
+.claude/scheduled_tasks.lock
+
 # Python tooling (scripts/)
 scripts/.venv/
 scripts/__pycache__/
diff --git a/Makefile b/Makefile
index 4f1b0bd..2ae6151 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: help check test fmt lint doc ci accuracy mel example-controller
+.PHONY: help check test fmt lint doc ci accuracy mel hf-smoke example-controller
 
 help:
 	@echo "Available targets:"
@@ -6,6 +6,7 @@ help:
 	@echo "  test      Run all tests"
 	@echo "  accuracy  Cross-validate Rust pipeline against Python reference"
 	@echo "  mel       Compare Rust vs Python mel spectrograms element-wise"
+	@echo "  hf-smoke  Download wavekat/smart-turn-ONNX from HF and run zh fixtures"
 	@echo "  fmt       Format code"
 	@echo "  lint      Run clippy with warnings as errors"
 	@echo "  doc       Build and open docs in browser"
@@ -28,6 +29,13 @@ accuracy:
 mel:
 	cargo test --features pipecat -- mel_report --ignored --nocapture
 
+# Download wavekat/smart-turn-ONNX from HuggingFace and assert the zh fine-tune
+# correctly classifies the Mandarin fixtures. Requires network on first run;
+# subsequent runs hit the HF cache under $HF_HOME/hub/.
+hf-smoke:
+	cargo test --features wavekat-smart-turn --test pipecat \
+	    -- --ignored wavekat_hf_download_smoke --nocapture
+
 # Run TurnController example
 example-controller:
 	cargo run --features pipecat --example controller
diff --git a/README.md b/README.md
index c560e4b..49995ba 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,15 @@ models behind common Rust traits. Same pattern as
 | Backend | Feature flag | Input | Model size | Inference | License |
 |---------|-------------|-------|------------|-----------|---------|
 | [Pipecat Smart Turn v3](https://github.com/pipecat-ai/smart-turn) | `pipecat` | Audio (16 kHz PCM) | ~8 MB (int8 ONNX) | ~12 ms CPU | BSD 2-Clause |
+| WaveKat Smart Turn fine-tunes ([HF](https://huggingface.co/wavekat/smart-turn-ONNX)) | `wavekat-smart-turn` | Audio (16 kHz PCM) | ~8 MB (int8 ONNX) | ~12 ms CPU | BSD 2-Clause |
 | [LiveKit Turn Detector](https://github.com/livekit/turn-detector) | `livekit` | Text (ASR transcript) | ~400 MB (ONNX) | ~25 ms CPU | LiveKit Model License |
 
+The WaveKat fine-tunes share the upstream Pipecat ONNX contract (same input
+shape, same tensor names) — they're language-specialized weights for the
+same architecture. Use them when you want better behavior on a specific
+language; today Mandarin (`zh`) is the only one shipped, but more will land
+in the same HF repo over time.
+
 ## Quick Start
 
 ```sh
@@ -92,8 +99,33 @@ wavekat-voice -->  orchestrates VAD + turn + ASR + LLM + TTS
 | Flag | Default | Description |
 |------|---------|-------------|
 | `pipecat` | off | Pipecat Smart Turn v3 audio backend (requires `ort`, `ndarray`) |
+| `wavekat-smart-turn` | off | WaveKat language-specialized fine-tunes; implies `pipecat`, adds `hf-hub` runtime download |
 | `livekit` | off | LiveKit text-based backend (requires `ort`, `ndarray`) |
 
+### Selecting a Smart Turn variant
+
+```rust
+use wavekat_turn::audio::{PipecatSmartTurn, SmartTurnVariant};
+# #[cfg(feature = "wavekat-smart-turn")]
+use wavekat_turn::audio::SmartTurnLang;
+
+// Embedded upstream weights — works offline, no setup.
+let detector = PipecatSmartTurn::new()?;
+
+# #[cfg(feature = "wavekat-smart-turn")]
+// WaveKat Mandarin fine-tune — downloaded from HuggingFace on first call,
+// then cached under $HF_HOME/hub/.
+let detector = PipecatSmartTurn::with_variant(
+    SmartTurnVariant::Wavekat(SmartTurnLang::Zh),
+)?;
+```
+
+The first call for a WaveKat variant downloads the ONNX from
+[`wavekat/smart-turn-ONNX`](https://huggingface.co/wavekat/smart-turn-ONNX)
+and caches it under `$HF_HOME/hub/` (default `~/.cache/huggingface/hub/`).
+For offline builds, set `WAVEKAT_TURN_MODEL_DIR` to a directory containing
+`<lang>/smart-turn-cpu.onnx` to skip the download.
+
 ## Important Notes
 
 - **8 kHz telephony audio must be upsampled to 16 kHz** before passing to
diff --git a/crates/wavekat-turn/Cargo.toml b/crates/wavekat-turn/Cargo.toml
index aa9d3f4..865211c 100644
--- a/crates/wavekat-turn/Cargo.toml
+++ b/crates/wavekat-turn/Cargo.toml
@@ -17,6 +17,10 @@ build = "build.rs"
 default = []
 pipecat = ["dep:ort", "dep:ndarray", "dep:realfft", "dep:ureq"]
 livekit = ["dep:ort", "dep:ndarray"]
+# WaveKat language-specialized Smart Turn fine-tunes, fetched from HuggingFace
+# at runtime via `hf-hub`. The language is chosen at runtime through
+# `SmartTurnVariant::Wavekat(SmartTurnLang::…)`.
+wavekat-smart-turn = ["pipecat", "dep:hf-hub"]
 
 [dependencies]
 wavekat-core = "0.0.4"
@@ -26,6 +30,9 @@ thiserror = "2"
 ort = { version = "2.0.0-rc.12", optional = true, features = ["ndarray"] }
 ndarray = { version = "0.17", optional = true }
 realfft = { version = "3", optional = true }
+# Runtime HuggingFace downloads for WaveKat fine-tunes (gated on
+# `wavekat-smart-turn`). A blocking ureq backend keeps us off tokio.
+hf-hub = { version = "0.5", optional = true, default-features = false, features = ["ureq"] }
 
 [build-dependencies]
 ureq = { version = "3", optional = true }
diff --git a/crates/wavekat-turn/src/audio/mod.rs b/crates/wavekat-turn/src/audio/mod.rs
index 2e3869b..3f5f198 100644
--- a/crates/wavekat-turn/src/audio/mod.rs
+++ b/crates/wavekat-turn/src/audio/mod.rs
@@ -2,9 +2,21 @@
 //!
 //! These backends operate directly on raw audio frames and do not
 //! require an upstream ASR transcript.
+//!
+//! [`PipecatSmartTurn`] is the entry point; [`SmartTurnVariant`] selects
+//! which set of weights to load (upstream Pipecat vs WaveKat fine-tunes).
+//! When the `wavekat-smart-turn` feature is enabled, [`SmartTurnLang`]
+//! enumerates the language-specialized fine-tunes available on
+//! HuggingFace.
 
 #[cfg(feature = "pipecat")]
 mod pipecat;
 
+#[cfg(feature = "wavekat-smart-turn")]
+pub(crate) mod wavekat_download;
+
 #[cfg(feature = "pipecat")]
-pub use pipecat::PipecatSmartTurn;
+pub use pipecat::{PipecatSmartTurn, SmartTurnVariant};
+
+#[cfg(feature = "wavekat-smart-turn")]
+pub use pipecat::SmartTurnLang;
diff --git a/crates/wavekat-turn/src/audio/pipecat.rs b/crates/wavekat-turn/src/audio/pipecat.rs
index eefe115..1ac38cd 100644
--- a/crates/wavekat-turn/src/audio/pipecat.rs
+++ b/crates/wavekat-turn/src/audio/pipecat.rs
@@ -57,6 +57,40 @@ use realfft::{RealFftPlanner, RealToComplex};
 use crate::onnx;
 use crate::{AudioFrame, AudioTurnDetector, StageTiming, TurnError, TurnPrediction, TurnState};
 
+// ---------------------------------------------------------------------------
+// Model variants
+// ---------------------------------------------------------------------------
+
+/// Language for a WaveKat fine-tune of Pipecat Smart Turn.
+///
+/// Each variant resolves to a `<lang>/smart-turn-cpu.onnx` file inside the
+/// language-agnostic HuggingFace repo `wavekat/smart-turn-ONNX`. The set is
+/// marked `#[non_exhaustive]` because adding a new language must not be a
+/// breaking change.
+#[cfg(feature = "wavekat-smart-turn")]
+#[non_exhaustive]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SmartTurnLang {
+    /// Mandarin Chinese.
+    Zh,
+}
+
+/// Which set of Smart Turn weights to load.
+///
+/// All variants share the same architecture (Whisper-Tiny encoder + binary
+/// classification head) and ONNX tensor contract — only the weights differ.
+#[non_exhaustive]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SmartTurnVariant {
+    /// Upstream multilingual Pipecat Smart Turn v3 (embedded in the crate).
+    PipecatV3,
+    /// WaveKat language-specialized fine-tune. Resolved at runtime through
+    /// HuggingFace (cached under `$HF_HOME/hub/`) and overridable via
+    /// `WAVEKAT_TURN_MODEL_DIR`.
+    #[cfg(feature = "wavekat-smart-turn")]
+    Wavekat(SmartTurnLang),
+}
+
 // ---------------------------------------------------------------------------
 // Constants
 // ---------------------------------------------------------------------------
@@ -373,6 +407,10 @@ fn prepare_audio(samples: &[f32]) -> Vec<f32> {
 
 /// Pipecat Smart Turn v3 detector.
 ///
+/// Wraps the Smart Turn v3 architecture (Whisper-Tiny encoder + binary
+/// classification head). Use [`new`] for the embedded upstream weights, or
+/// [`with_variant`] to pick a WaveKat fine-tune at runtime.
+///
 /// Buffers up to 8 seconds of audio internally. Call [`push_audio`] with
 /// every incoming 16 kHz frame, then call [`predict`] when the VAD fires
 /// end-of-speech to get a [`TurnPrediction`].
@@ -392,6 +430,8 @@ fn prepare_audio(samples: &[f32]) -> Vec<f32> {
 /// # }
 /// ```
 ///
+/// [`new`]: Self::new
+/// [`with_variant`]: Self::with_variant
 /// [`push_audio`]: AudioTurnDetector::push_audio
 /// [`predict`]: AudioTurnDetector::predict
 pub struct PipecatSmartTurn {
@@ -409,12 +449,34 @@ unsafe impl Send for PipecatSmartTurn {}
 unsafe impl Sync for PipecatSmartTurn {}
 
 impl PipecatSmartTurn {
-    /// Load the Smart Turn v3.2 model embedded at compile time.
+    /// Load the upstream Pipecat Smart Turn v3.2 model embedded at compile time.
+    ///
+    /// Equivalent to [`with_variant(SmartTurnVariant::PipecatV3)`](Self::with_variant).
     pub fn new() -> Result<Self, TurnError> {
         let session = onnx::session_from_memory(MODEL_BYTES)?;
         Ok(Self::build(session))
     }
 
+    /// Load a specific variant of the Smart Turn model.
+    ///
+    /// - [`SmartTurnVariant::PipecatV3`] uses the embedded ONNX bytes — no
+    ///   network required.
+    /// - [`SmartTurnVariant::Wavekat`] (when the `wavekat-smart-turn` feature
+    ///   is enabled) downloads the corresponding language file from the
+    ///   `wavekat/smart-turn-ONNX` HuggingFace repo and caches it under
+    ///   `$HF_HOME/hub/`. Set `WAVEKAT_TURN_MODEL_DIR` to point at a
+    ///   pre-populated directory (offline / CI use).
+    pub fn with_variant(variant: SmartTurnVariant) -> Result<Self, TurnError> {
+        match variant {
+            SmartTurnVariant::PipecatV3 => Self::new(),
+            #[cfg(feature = "wavekat-smart-turn")]
+            SmartTurnVariant::Wavekat(lang) => {
+                let path = crate::audio::wavekat_download::resolve_model(lang)?;
+                Self::from_file(path)
+            }
+        }
+    }
+
     /// Load a model from a custom path on disk.
     ///
     /// Useful for CI environments that supply the model file separately, or
diff --git a/crates/wavekat-turn/src/audio/wavekat_download.rs b/crates/wavekat-turn/src/audio/wavekat_download.rs
new file mode 100644
index 0000000..697564c
--- /dev/null
+++ b/crates/wavekat-turn/src/audio/wavekat_download.rs
@@ -0,0 +1,66 @@
+//! Runtime download of WaveKat-trained Smart Turn weights from HuggingFace.
+//!
+//! Mirrors the `wavekat-tts` pattern: one language-agnostic HF repo with
+//! per-language subdirectories, a dated `REVISION` pinned in code so that
+//! model updates ship via a crate release, and a `WAVEKAT_TURN_MODEL_DIR`
+//! escape hatch for offline / CI builds.
+
+use std::path::PathBuf;
+
+use hf_hub::api::sync::ApiBuilder;
+use hf_hub::{Repo, RepoType};
+
+use super::pipecat::SmartTurnLang;
+use crate::error::TurnError;
+
+/// HuggingFace repo holding all WaveKat Smart Turn fine-tunes.
+const REPO_ID: &str = "wavekat/smart-turn-ONNX";
+
+/// Pinned model revision. Bumping this string is the way to ship updated
+/// weights to consumers — same pattern as `wavekat-tts`.
+const REVISION: &str = "main";
+
+/// Env var that lets callers point at a local directory containing
+/// `<lang>/smart-turn-cpu.onnx`, skipping the HuggingFace download entirely.
+const LOCAL_DIR_ENV: &str = "WAVEKAT_TURN_MODEL_DIR";
+
+/// Map a language to its file path inside the HF repo.
+fn relative_path(lang: SmartTurnLang) -> &'static str {
+    match lang {
+        SmartTurnLang::Zh => "zh/smart-turn-cpu.onnx",
+    }
+}
+
+/// Resolve the on-disk path for `lang`, downloading from HuggingFace if needed.
+pub(crate) fn resolve_model(lang: SmartTurnLang) -> Result<PathBuf, TurnError> {
+    let rel = relative_path(lang);
+
+    if let Some(dir) = std::env::var_os(LOCAL_DIR_ENV) {
+        let candidate = PathBuf::from(dir).join(rel);
+        if !candidate.exists() {
+            return Err(TurnError::ModelNotLoaded(format!(
+                "{LOCAL_DIR_ENV} is set but {} does not exist",
+                candidate.display()
+            )));
+        }
+        return Ok(candidate);
+    }
+
+    let api = ApiBuilder::new()
+        .with_token(std::env::var("HF_TOKEN").ok())
+        .build()
+        .map_err(|e| TurnError::BackendError(format!("failed to build hf-hub client: {e}")))?;
+
+    let repo = api.repo(Repo::with_revision(
+        REPO_ID.to_string(),
+        RepoType::Model,
+        REVISION.to_string(),
+    ));
+
+    repo.get(rel).map_err(|e| {
+        TurnError::BackendError(format!(
+            "failed to download {REPO_ID}@{REVISION}:{rel} from HuggingFace: {e}. \
+             Set {LOCAL_DIR_ENV} to a directory containing {rel} to skip the download."
+        ))
+    })
+}
diff --git a/crates/wavekat-turn/src/lib.rs b/crates/wavekat-turn/src/lib.rs
index fa68fa2..a696553 100644
--- a/crates/wavekat-turn/src/lib.rs
+++ b/crates/wavekat-turn/src/lib.rs
@@ -17,8 +17,15 @@
 //!
 //! | Feature | Backend | Input |
 //! |---------|---------|-------|
-//! | `pipecat` | Pipecat Smart Turn v3 (ONNX) | Audio (16 kHz) |
+//! | `pipecat` | Pipecat Smart Turn v3 (ONNX, embedded) | Audio (16 kHz) |
+//! | `wavekat-smart-turn` | WaveKat language-specialized fine-tunes (ONNX, runtime download) | Audio (16 kHz) |
 //! | `livekit` | LiveKit Turn Detector (ONNX) | Text |
+//!
+//! `wavekat-smart-turn` implies `pipecat` and adds an `hf-hub` runtime
+//! dependency. Weights live in
+//! [`wavekat/smart-turn-ONNX`](https://huggingface.co/wavekat/smart-turn-ONNX)
+//! and are cached under `$HF_HOME/hub/`. Set `WAVEKAT_TURN_MODEL_DIR` to a
+//! directory containing `<lang>/smart-turn-cpu.onnx` to skip the download.
 
 pub mod controller;
 pub mod error;
diff --git a/crates/wavekat-turn/tests/pipecat.rs b/crates/wavekat-turn/tests/pipecat.rs
index 0804308..018936c 100644
--- a/crates/wavekat-turn/tests/pipecat.rs
+++ b/crates/wavekat-turn/tests/pipecat.rs
@@ -47,6 +47,46 @@ fn test_new_loads_model() {
     PipecatSmartTurn::new().expect("PipecatSmartTurn::new() should succeed");
 }
 
+#[test]
+fn test_with_variant_pipecat_v3_loads_model() {
+    use wavekat_turn::audio::SmartTurnVariant;
+    PipecatSmartTurn::with_variant(SmartTurnVariant::PipecatV3)
+        .expect("with_variant(PipecatV3) should succeed");
+}
+
+/// Exercise the WAVEKAT_TURN_MODEL_DIR override path without touching the
+/// network: drop the embedded Pipecat ONNX into a temp dir under the
+/// expected `<lang>/smart-turn-cpu.onnx` layout and confirm the variant
+/// loader picks it up. The bytes happen to be the upstream model — that's
+/// fine; we are only asserting the file resolution path works.
+#[cfg(feature = "wavekat-smart-turn")]
+#[test]
+fn test_wavekat_variant_uses_local_dir_override() {
+    use wavekat_turn::audio::{SmartTurnLang, SmartTurnVariant};
+
+    let tmp = std::env::temp_dir().join("wavekat_turn_local_dir_test");
+    let lang_dir = tmp.join("zh");
+    std::fs::create_dir_all(&lang_dir).unwrap();
+    let path = lang_dir.join("smart-turn-cpu.onnx");
+    let model_bytes = include_bytes!(concat!(env!("OUT_DIR"), "/smart-turn-v3.2-cpu.onnx"));
+    std::fs::write(&path, model_bytes).unwrap();
+
+    // SAFETY: tests inside this crate that mutate env vars run on the same
+    // process. `cargo test` defaults to single-threaded for harness=false,
+    // but the std test harness parallelises — keep the env var set for the
+    // duration of this test and accept that no other test reads it.
+    unsafe {
+        std::env::set_var("WAVEKAT_TURN_MODEL_DIR", &tmp);
+    }
+    let result = PipecatSmartTurn::with_variant(SmartTurnVariant::Wavekat(SmartTurnLang::Zh));
+    unsafe {
+        std::env::remove_var("WAVEKAT_TURN_MODEL_DIR");
+    }
+
+    let _ = std::fs::remove_dir_all(&tmp);
+    result.expect("with_variant(Wavekat(Zh)) should pick up the local override");
+}
+
 #[test]
 fn test_from_file_loads_model() {
     let tmp = std::env::temp_dir().join("wavekat_turn_test");
@@ -152,6 +192,115 @@ fn test_from_file_invalid_path_returns_error() {
     );
 }
 
+/// End-to-end smoke test for the WaveKat HuggingFace download path.
+///
+/// Pulls `wavekat/smart-turn-ONNX` from the Hub (cached in `$HF_HOME/hub/`
+/// after the first run), runs it against the repo fixtures, and prints a
+/// markdown table of probabilities. Asserts that the three `zh_*` clips
+/// (Mandarin, synthesized with wavekat-tts at 24 kHz and resampled to
+/// 16 kHz via ffmpeg) classify on the expected side of 0.5. Marked
+/// `#[ignore]` so CI and `cargo test` never hit the network unintentionally.
+///
+/// Run with:
+///   cargo test --features wavekat-smart-turn --test pipecat \
+///       -- --ignored wavekat_hf_download_smoke --nocapture
+#[cfg(feature = "wavekat-smart-turn")]
+#[test]
+#[ignore = "network: downloads ~8 MB from huggingface.co"]
+fn wavekat_hf_download_smoke() {
+    use std::path::Path;
+
+    use wavekat_turn::audio::{SmartTurnLang, SmartTurnVariant};
+    use wavekat_turn::TurnState;
+
+    fn fixtures_dir() -> std::path::PathBuf {
+        Path::new(env!("CARGO_MANIFEST_DIR"))
+            .parent()
+            .unwrap()
+            .parent()
+            .unwrap()
+            .join("tests/fixtures")
+    }
+
+    fn load_wav(path: &Path) -> Vec<f32> {
+        let mut reader =
+            hound::WavReader::open(path).unwrap_or_else(|e| panic!("open {}: {e}", path.display()));
+        let spec = reader.spec();
+        assert_eq!(spec.sample_rate, 16_000);
+        assert_eq!(spec.channels, 1);
+        match spec.sample_format {
+            hound::SampleFormat::Int => reader
+                .samples::<i16>()
+                .map(|s| s.unwrap() as f32 / 32768.0)
+                .collect(),
+            hound::SampleFormat::Float => reader.samples::<f32>().map(|s| s.unwrap()).collect(),
+        }
+    }
+
+    fn p_complete(pred: &TurnPrediction) -> f32 {
+        match pred.state {
+            TurnState::Finished => pred.confidence,
+            TurnState::Unfinished => 1.0 - pred.confidence,
+            TurnState::Wait => unreachable!(),
+        }
+    }
+
+    println!("\nLoading wavekat/smart-turn-ONNX (zh) from HuggingFace…");
+    let mut detector = PipecatSmartTurn::with_variant(SmartTurnVariant::Wavekat(SmartTurnLang::Zh))
+        .expect("HF download / model load failed");
+
+    // (clip, expected_state) — None means "print only, no assertion".
+    // English clips are kept for diagnostics; the zh fine-tune isn't expected
+    // to score them correctly.
+    let clips: &[(&str, Option<TurnState>)] = &[
+        ("silence_2s.wav", None),
+        ("speech_finished.wav", None),
+        ("speech_mid.wav", None),
+        ("zh_speech_finished.wav", Some(TurnState::Finished)),
+        ("zh_speech_finished_short.wav", Some(TurnState::Finished)),
+        ("zh_speech_mid.wav", Some(TurnState::Unfinished)),
+    ];
+
+    println!();
+    println!("| Clip | P(complete) | State | Latency (ms) | Expected |");
+    println!("|------|-------------|-------|--------------|----------|");
+    let mut failures = Vec::new();
+    for (clip, expected) in clips {
+        detector.reset();
+        let samples = load_wav(&fixtures_dir().join(clip));
+        for chunk in samples.chunks(1600) {
+            detector.push_audio(&AudioFrame::new(chunk, 16_000));
+        }
+        let pred = detector.predict().expect("predict failed");
+        valid_prediction(&pred);
+        let exp_label = expected.map(|s| format!("{s:?}")).unwrap_or("—".into());
+        println!(
+            "| `{}` | {:.4} | {:?} | {} | {} |",
+            clip,
+            p_complete(&pred),
+            pred.state,
+            pred.latency_ms,
+            exp_label,
+        );
+        if let Some(want) = expected {
+            if pred.state != *want {
+                failures.push(format!(
+                    "{clip}: expected {want:?}, got {:?} (P={:.4})",
+                    pred.state,
+                    p_complete(&pred),
+                ));
+            }
+        }
+    }
+    println!();
+    if !failures.is_empty() {
+        panic!(
+            "zh fixture misclassifications:\n  {}",
+            failures.join("\n  ")
+        );
+    }
+}
+
 /// Smoke test: latency is measured and non-zero (always runs, including debug).
 #[test]
 fn test_latency_is_measured() {
diff --git a/docs/04-plan-wavekat-smart-turn.md b/docs/04-plan-wavekat-smart-turn.md
new file mode 100644
index 0000000..e85c165
--- /dev/null
+++ b/docs/04-plan-wavekat-smart-turn.md
@@ -0,0 +1,465 @@
+# Plan: Distribute WaveKat Smart Turn Fine-tunes via `wavekat-turn`
+
+**Status:** Draft for review
+**Date:** 2026-05-11
+**Branch:** `feat/wavekat-smart-turn`
+
+> Scope: a language-agnostic distribution path for our own Smart Turn
+> fine-tunes. Mandarin (`zh`) is the first language we ship; the design must
+> let us add more languages without breaking changes or new HF repos.
+
+---
+
+## What we are (and are not) shipping
+
+**Smart Turn is Pipecat's project.** The architecture (Whisper-Tiny encoder +
+binary classification head), the training recipe, the ONNX export pipeline,
+and the original `smart-turn-v3.2-cpu.onnx` weights all belong to
+[pipecat-ai/smart-turn](https://github.com/pipecat-ai/smart-turn) (BSD 2-Clause).
+
+What WaveKat contributes is **language-specialized weights** that drop into the
+same architecture, exported to the **same ONNX interface** Pipecat already
+defines. Concretely:
+
+- **Same input tensor:** `input_features`, shape `[B, 80, 800]`, float32.
+- **Same output tensor:** `logits`, shape `[B, 1]`, float32 (sigmoid fused).
+- **Same audio pipeline:** 16 kHz mono, 8-second window, Whisper-style log-mel
+  features (Slaney, n_fft=400, hop=160, 80 mels).
+
+The implication that runs through this whole plan: **anything compatible with
+upstream Pipecat Smart Turn must remain compatible with our weights, and vice
+versa.** That includes:
+
+1. **Pipecat's own Python loader** (`smart-turn` repo) must be able to consume
+   our ONNX files with no code changes. Validated by running the upstream
+   Python inference script against our exported ONNX.
+2. **Our `wavekat-turn` Rust loader** picks them up via the same `from_file()`
+   path used today for the upstream model.
+3. **Future ports** (e.g. a Pipecat Python integration, or a third-party
+   loader) can use the same files.
+
+Practical consequences for this design:
+
+- The HF repo must be **architecture-named, not crate-named**. It is "ONNX
+  weights for Pipecat Smart Turn, fine-tuned by WaveKat", not "weights for
+  wavekat-turn".
+- The model card must lead with **strong attribution to upstream Pipecat**
+  (link to the GitHub repo, the upstream HF org, and the BSD 2-Clause notice)
+  before describing our fine-tunes.
+- Tensor names, shapes, and feature-extraction parameters are **frozen** to
+  match Pipecat. If Pipecat ever revs the architecture (e.g. a v4 with
+  different input shape), we add a new family of repos rather than mutating
+  the existing one.
+- We should not rename `PipecatSmartTurn` to `SmartTurnDetector` in
+  `wavekat-turn` (previously suggested as a follow-up). The name correctly
+  identifies the *architecture* we are wrapping; both upstream and our
+  weights are instances of it. That decision is now reversed — see
+  Decision 9.
+
+---
+
+## Context
+
+- `training/smart-turn-zh/` produced a Mandarin fine-tune of Pipecat Smart Turn
+  v3 (same architecture: Whisper-Tiny encoder + binary classification head).
+- The trained model lives in **wavekat-platform**, which is an **internal-only**
+  model registry today (no public anonymous read access).
+- `wavekat-turn` is a public OSS crate published to crates.io. Its build script
+  downloads `smart-turn-v3.2-cpu.onnx` from HuggingFace at build time and embeds
+  the bytes via `include_bytes!()`.
+- Goal: make our Chinese model usable from `wavekat-turn` with the same
+  zero-setup experience the upstream Pipecat model gets today.
+
+### Sibling-repo precedent: `wavekat-tts`
+
+`wavekat-tts` already publishes WaveKat-owned ONNX weights publicly under the
+existing HuggingFace org **[`wavekat`](https://huggingface.co/wavekat)**:
+
+| Repo | Layout | Loading mechanism |
+|------|--------|-------------------|
+| [`wavekat/Qwen3-TTS-1.7B-VoiceDesign-ONNX`](https://huggingface.co/wavekat/Qwen3-TTS-1.7B-VoiceDesign-ONNX) | `fp32/*.onnx`, `int4/*.onnx`, `config.json`, `embeddings/*.npy`, `tokenizer/*` | Runtime download via `hf-hub` crate, cached at `$HF_HOME/hub/` |
+| [`wavekat/Qwen3-TTS-0.6B-Base-ONNX`](https://huggingface.co/wavekat/Qwen3-TTS-0.6B-Base-ONNX) | Same shape, plus `speaker_encoder.onnx` / `tokenizer_encoder.onnx` | Same |
+
+Conventions established by `wavekat-tts` that we should follow:
+- **HF org name:** `wavekat` (already confirmed live).
+- **Repo naming:** `wavekat/<ModelName>-ONNX` with the `-ONNX` suffix.
+- **Multi-precision layout:** `fp32/` and `int4/` subdirs inside one repo, so
+  users pick precision at runtime instead of at build time.
+- **Revision pinning:** the consuming crate pins a dated revision string in
+  code (e.g. `REVISION: &str = "2026-04-06"`), so model updates ship via a
+  crate release, not silently when a user re-pulls.
+- **Local override env var:** `WAVEKAT_MODEL_DIR` (TTS uses
+  `WAVEKAT_MODEL_DIR` / `WAVEKAT_CLONE_MODEL_DIR`) lets users point at a
+  pre-populated directory and skip downloads entirely — needed for offline
+  builds and CI.
+- **License:** Apache 2.0 on the consuming crate; the model files inherit
+  their upstream license.
+
+---
+
+## Question: HuggingFace first, or load from wavekat-platform?
+
+**Recommendation: HuggingFace first.** Use wavekat-platform as the
+source-of-truth training registry; treat HF as the **public distribution
+mirror** for snapshots we have explicitly chosen to release. This matches what
+`wavekat-tts` already does — the `wavekat` HF org is established and the
+pattern is proven across the ecosystem.
+
+| Concern                       | wavekat-platform               | HuggingFace                                       |
+|-------------------------------|--------------------------------|---------------------------------------------------|
+| Public anonymous access       | No (internal)                  | Yes                                               |
+| Works in OSS user's `cargo build` | Would require auth tokens   | Anonymous HTTP GET, no auth                       |
+| CDN / global cache            | None                           | Built-in                                          |
+| Matches upstream Pipecat path | No                             | Yes — same host, same URL shape as Pipecat        |
+| Build-script complexity       | Auth, secrets, rate limits     | A single `ureq::get(url)` call (already in place) |
+| Versioning / reproducibility  | Internal version IDs           | Git revisions on the model repo                   |
+
+### What the workflow looks like
+
+1. Train on `wavekat-lab`, push artifact to **wavekat-platform** (already done).
+2. When a checkpoint is ready for public release, **export an ONNX snapshot
+   to a HF model repo** under a WaveKat org (e.g. `wavekat/smart-turn-zh`).
+3. `wavekat-turn`'s build script downloads from HF, the same way it does for
+   Pipecat. The platform stays internal; HF carries the public bits only.
+
+This keeps two clear roles:
+- **Platform = training registry** (private, includes raw checkpoints, eval
+  artifacts, experiments).
+- **HF = release channel** (public, only the ONNX files we have decided to
+  ship, tagged and immutable).
+
+### Open questions before publishing to HF
+
+- ~~HF org/account name.~~ **Resolved**: use the existing `wavekat` org (same
+  as `wavekat-tts` models).
+- **HF repo name (language-agnostic).** zh is just the first of many planned
+  languages, so the repo name must not bake the language in. Two viable
+  shapes:
+
+  **A. One repo, language subdirs** *(recommended)*
+  ```
+  wavekat/smart-turn-ONNX
+    ├── zh/smart-turn-cpu.onnx
+    ├── ja/smart-turn-cpu.onnx        (future)
+    ├── yue/smart-turn-cpu.onnx       (future)
+    └── README.md
+  ```
+  Mirrors the TTS precedent of per-axis subdirs (`fp32/`, `int4/`).
+  Adding a language later is a file push, not a new repo + new model card +
+  new revision string.
+
+  **B. Per-language repos with a stable parent pattern**
+  ```
+  wavekat/smart-turn-zh-ONNX        (this branch)
+  wavekat/smart-turn-ja-ONNX        (future)
+  ```
+  Cleanest model card per language, but every new language is a new repo +
+  new constants in `wavekat-turn`, and the repo name still encodes a
+  language — exactly what we want to avoid.
+
+  **Decision proposed: A.** Single repo `wavekat/smart-turn-ONNX` with
+  `<lang>/` subdirs. Future expansion is additive and never requires a new
+  HF repo.
+
+- License. Pipecat upstream is BSD 2-Clause. Our fine-tunes inherit that
+  unless we add separate ToS. Confirm we are comfortable publishing under
+  BSD 2-Clause.
+- Model card content: per-language sections (training data sources, eval
+  numbers, intended use, known limitations — dialect coverage, SNR
+  conditions). Keep a single top-level model card with a section per
+  language.
+- Revision convention. Pin a single dated `REVISION = "YYYY-MM-DD"` in
+  `wavekat-turn` code, same as `wavekat-tts`. Updates to any language
+  bump the same revision.
+
+---
+
+## Architecture: how to add the model to wavekat-turn
+
+The Chinese model is **the same architecture** as upstream Pipecat — only the
+weights differ. Mel feature extraction, ring-buffer logic, tensor shapes,
+output interpretation, and the 0.5 threshold are all identical.
+
+That means we have three real options for how the public API surfaces it.
+
+### Option A — Variant on the existing `PipecatSmartTurn` struct *(recommended)*
+
+Add a `Variant` enum and constructors that select which set of weights to load.
+Inference code is unchanged.
+
+```rust
+/// Language for the WaveKat fine-tune. Extend as we ship more languages.
+#[non_exhaustive]
+pub enum SmartTurnLang {
+    /// Mandarin Chinese (first WaveKat fine-tune).
+    Zh,
+    // Ja, Yue, ... (future)
+}
+
+#[non_exhaustive]
+pub enum SmartTurnVariant {
+    /// Upstream multilingual Pipecat Smart Turn v3.
+    PipecatV3,
+    /// WaveKat fine-tune for a specific language.
+    Wavekat(SmartTurnLang),
+}
+
+impl PipecatSmartTurn {
+    pub fn new() -> Result<Self, TurnError> {                // unchanged: PipecatV3
+    pub fn with_variant(v: SmartTurnVariant) -> Result<Self, TurnError>;
+    pub fn from_file(path: impl AsRef<Path>) -> Result<Self, TurnError>;  // unchanged
+}
+```
+
+`#[non_exhaustive]` on both enums is deliberate: adding a new language must
+not be a breaking change.
+
+The build/load layer resolves `Wavekat(lang)` to `<lang>/smart-turn-cpu.onnx`
+inside the single `wavekat/smart-turn-ONNX` HF repo, so adding a new language
+is a one-line variant addition + a file in the HF repo — no new constants,
+no new feature flag.
+
+**Pros**
+- Zero code duplication; the feature is purely "different bytes".
+- Honest naming: the *backend* is "Pipecat Smart Turn v3 architecture"; both
+  models are instances of it.
+- Users on a strict binary-size budget can disable one variant via features.
+
+**Cons**
+- `PipecatSmartTurn` is no longer a single-model thing; the type name suggests
+  "Pipecat" even when running our weights. We can rename the struct to
+  `SmartTurnDetector` and keep `PipecatSmartTurn` as a deprecated type alias.
+
+### Option B — Separate `WavekatSmartTurnZh` struct
+
+A new struct in `audio/smart_turn_zh.rs` that mostly re-exports the same mel
+extractor and inference logic.
+
+**Pros**
+- Clearer in API docs: "for Chinese, use this struct".
+
+**Cons**
+- The mel extractor, ring buffer, and inference path would be copy-pasted or
+  factored into a shared inner type — extra plumbing for no behavioral
+  difference.
+- Long-term, every additional fine-tune (Cantonese, Japanese, domain-specific)
+  needs its own struct. Not scalable.
+
+### Option C — No automatic download; rely on `from_file()` only
+
+Publish to HF; expect users to download the file themselves and pass the path
+to the existing `from_file()` constructor. Document the URL in the README.
+
+**Pros**
+- Zero changes to `wavekat-turn` code.
+- Lowest friction to ship.
+
+**Cons**
+- Worse UX than the upstream Pipecat path, which is `new()` and "just works".
+- Asymmetric: Pipecat users get build-time download, our own users don't.
+
+**Recommendation: Option A.** Single backend type, two (eventually N) variants.
+Same UX as upstream Pipecat. Option C is a reasonable v0 if we want to publish
+to HF before doing any Rust work.
+
+---
+
+## Model loading strategy
+
+This is the most significant new question now that we've seen the
+`wavekat-tts` precedent. The two crates have diverged:
+
+| Crate | Mechanism | Pros | Cons |
+|-------|-----------|------|------|
+| `wavekat-turn` (today) | `build.rs` downloads, `include_bytes!()` embeds | Zero runtime setup, model lives in the binary, offline-friendly after first build | Bloats binary per variant; no precision choice at runtime; build needs network unless `*_MODEL_PATH` is set |
+| `wavekat-tts` (today)  | `hf-hub` runtime download to `$HF_HOME/hub/`, override with `WAVEKAT_MODEL_DIR` | Supports large models, runtime precision selection, no binary bloat, easy to update models without rebuilding | First-run network dependency; cache lives outside the build artefact |
+
+The Chinese model is ~8 MB int8 — small enough to embed under the existing
+**< 30 MB → embed** rule in [`02-plan-backends.md`](02-plan-backends.md). So
+both options are technically viable.
+
+### Option 1 — Keep embedding (consistent with `wavekat-turn` today)
+
+Add the zh ONNX as a second `include_bytes!()` blob, downloaded by `build.rs`
+under feature `pipecat-zh`. Identical pattern to upstream Pipecat.
+
+**Pros**: zero new dependencies; consistent with the current backend; works
+offline at runtime; reproducible via the existing version-marker caching.
+
+**Cons**: ecosystem-inconsistent — a `wavekat-tts` user knows
+`WAVEKAT_MODEL_DIR` and `~/.cache/huggingface/hub/`, but a `wavekat-turn` user
+has to learn `PIPECAT_SMARTTURN_MODEL_PATH` and a build-time recompile to swap
+weights.
+
+### Option 2 — Switch to `hf-hub` runtime download (align with `wavekat-tts`)
+
+Add `hf-hub` as a runtime dep gated on `pipecat-zh`. On first `new()` for the
+zh variant, download `smart-turn-zh-cpu.onnx` to `$HF_HOME/hub/`. Honor
+`WAVEKAT_MODEL_DIR` and `HF_TOKEN`.
+
+**Pros**: unified ecosystem story across `wavekat-vad` / `wavekat-turn` /
+`wavekat-tts`; trivially supports future fine-tunes (Cantonese, domain-specific
+etc.) without re-publishing the crate; no binary bloat; users can swap model
+revisions by setting `WAVEKAT_MODEL_DIR` without rebuilding.
+
+**Cons**: divergence within `wavekat-turn` itself — upstream Pipecat stays
+embedded, zh model downloads at runtime. Two mental models for the same crate.
+And it introduces first-run network dependency for the zh variant.
+
+### Option 3 — Switch both variants to `hf-hub` (full alignment)
+
+Migrate the upstream Pipecat variant off `include_bytes!()` too, so the whole
+crate uses `hf-hub` like `wavekat-tts`. Out of scope for this branch — would
+need its own migration plan and a major-version bump.
+
+### Recommendation
+
+**Option 2** for this branch, with **Option 3 as a follow-up** in a separate
+migration plan.
+
+Reasoning:
+- The ecosystem-consistency win is real: a user who already runs `wavekat-tts`
+  doesn't have to learn a second set of env vars.
+- The zh variant is the natural place to introduce `hf-hub` because it's
+  greenfield — no existing users to migrate.
+- Once `hf-hub` is in the dep tree under a feature, migrating the upstream
+  Pipecat variant later is a localized change behind the same trait.
+- We get **runtime precision selection** for free if/when we publish an fp16
+  variant — no rebuild required.
+
+If we're cautious about adding `hf-hub`, Option 1 is a perfectly fine
+fallback. The variant API stays the same either way; only the body of
+`with_variant(WavekatZh)` changes.
+
+---
+
+## Implementation plan (phased)
+
+### Phase 0 — Publish to HuggingFace (out-of-repo)
+
+1. ~~Decide HF org name and create the org if it does not exist.~~ Use
+   existing `wavekat` org.
+2. Create one language-agnostic model repo: **`wavekat/smart-turn-ONNX`**.
+3. Write a model card. **Lead with attribution**:
+   - First section: "WaveKat fine-tunes of [Pipecat Smart Turn v3](https://github.com/pipecat-ai/smart-turn)
+     ([upstream HF](https://huggingface.co/pipecat-ai/smart-turn-v3),
+     BSD 2-Clause)". State explicitly that the architecture, training recipe,
+     and ONNX export contract are Pipecat's; WaveKat contributes
+     language-specialized weights only.
+   - Followed by a per-language section (data, eval, limitations).
+   - Reproduce the BSD 2-Clause notice.
+4. Export the ONNX from the wavekat-platform checkpoint we want to ship.
+   **Compatibility checks before push** (block on these):
+   - Tensor names match Pipecat: input `input_features` `[B, 80, 800]`
+     float32, output `logits` `[B, 1]` float32 (sigmoid fused).
+   - Loads in the upstream **Pipecat Python** inference pipeline with no
+     code changes — just swap the model path. Capture a reference inference
+     output for our fixture clips from Python.
+   - Loads in our **Rust** pipeline via `from_file()` and matches the
+     Python reference within the existing accuracy tolerance.
+5. Push the ONNX to **`zh/smart-turn-cpu.onnx`** in the HF repo. Optionally
+   add `zh/smart-turn-fp32.onnx` if/when we want to ship higher precision.
+6. Sanity check:
+   `curl -L https://huggingface.co/wavekat/smart-turn-ONNX/resolve/main/zh/smart-turn-cpu.onnx`
+   returns the expected bytes anonymously.
+
+Future-language workflow (e.g. Japanese): push `ja/smart-turn-cpu.onnx` to
+the same repo, add a `Ja` variant to `SmartTurnLang`, ship a crate release.
+No new HF repo. No new feature flag.
+
+**Python usability note**: because the ONNX matches Pipecat's contract, a
+Python user can consume it directly from the Pipecat `smart-turn` repo with
+something like `SmartTurnAnalyzer(model_path=hf_hub_download("wavekat/smart-turn-ONNX", "zh/smart-turn-cpu.onnx"))`.
+The HF repo README should include this one-liner so the audience is
+explicitly "Pipecat users (Python or Rust) who want non-English support",
+not just `wavekat-turn` users.
+
+### Phase 1 — Add the variant to `wavekat-turn`
+
+Assuming **Option 2** (hf-hub runtime loading) is chosen:
+
+- Add `SmartTurnLang` and `SmartTurnVariant` enums (both `#[non_exhaustive]`).
+  Default constructor `new()` keeps using `PipecatV3` for backwards compat.
+- Add `PipecatSmartTurn::with_variant(variant)` constructor.
+- Add `hf-hub` as an optional dep gated on the feature flag.
+- New module `src/audio/wavekat_download.rs` mirroring `wavekat-tts`'
+  `download.rs`:
+  - `REPO_ID = "wavekat/smart-turn-ONNX"`, dated `REVISION`.
+  - Map `SmartTurnLang::Zh → "zh/smart-turn-cpu.onnx"`. The path lookup is
+    the single point that knows about languages — adding a language is one
+    match arm.
+  - Honor `WAVEKAT_TURN_MODEL_DIR` and `HF_TOKEN` exactly as TTS does for
+    `WAVEKAT_MODEL_DIR`. Use a turn-specific name to avoid collision with
+    the TTS env var.
+  - Return a path that `onnx::session_from_file` consumes.
+- In `with_variant(Wavekat(lang))`, call the download helper, then build a
+  session from the resolved path. The Pipecat variant continues to use the
+  embedded bytes path — no change.
+
+Alternative (Option 1, kept for fallback):
+
+- Extend `build.rs` with a download step per language and per-language
+  `include_bytes!()` blobs. Each new language requires a recompile and a
+  crate release — strictly worse for the multi-language future, but
+  acceptable if we want to avoid the `hf-hub` dependency.
+
+### Phase 2 — Cross-validation
+
+- Add fixture clips in Mandarin (a "finished" clip, an "unfinished" clip,
+  a silence/no-speech clip) under `tests/fixtures/`.
+- Regenerate the Python reference (`scripts/gen_reference.py`) against the zh
+  checkpoint and add `*.zh.mel.npy` / expected probabilities.
+- Extend `tests/pipecat.rs` (or add `tests/smart_turn_zh.rs`) with the same
+  9-test matrix from Phase 4 of `02-plan-backends.md`, plus parity checks
+  against the Python reference.
+
+### Phase 3 — README and example updates
+
+- README: add a row to the Backends table, document the `pipecat-zh` feature,
+  show a one-line example with `with_variant(SmartTurnVariant::WavekatZh)`.
+- `examples/controller.rs`: optional second example with the zh model.
+- Update `02-plan-backends.md` to reflect "model variants" as a concept.
+
+### Phase 4 — Optional follow-ups
+
+- ~~Consider renaming `PipecatSmartTurn` to `SmartTurnDetector`.~~ **Reversed**:
+  keep `PipecatSmartTurn`. Pipecat owns the Smart Turn architecture; the
+  type name correctly identifies what we are wrapping. Our weights are
+  *instances* of Pipecat Smart Turn, not a separate detector.
+- Decide whether `TurnController` should expose the variant in its API
+  surface. Probably not: it is detector-agnostic by design.
+- Once the HF repo exists, open a small PR / issue on
+  [pipecat-ai/smart-turn](https://github.com/pipecat-ai/smart-turn)
+  pointing Python users at our weights for non-English support. Coordinate
+  on whether they want to list the WaveKat repo from their README.
+
+---
+
+## Risks and tradeoffs
+
+| Risk                                                          | Mitigation                                                                  |
+|---------------------------------------------------------------|-----------------------------------------------------------------------------|
+| Two embedded models double the crate's compiled size          | Feature-gate each variant; default features enable only `pipecat`.          |
+| HF revision drift between platform and HF                     | Pin the revision in `build.rs` (not just the URL) — same pattern as today.  |
+| Model card not ready for public release                       | Phase 0 gates the rest; do not start Phase 1 until the HF repo is signed off. |
+| License compatibility (Pipecat is BSD 2-Clause)               | Confirm before publishing; include upstream attribution in the model card.  |
+| Discoverability — users may not know there is a zh variant    | README table + variant docstring + a one-liner in the crate-level rustdoc.  |
+| Pipecat reves the architecture (v4 with different tensor shape) | Frozen contract is documented in Phase 0. A breaking change upstream means a new HF repo family (e.g. `wavekat/smart-turn-v4-ONNX`), not mutating the existing one. |
+| Our ONNX silently diverges from Pipecat's contract            | Phase 0 compatibility checks (Python pipeline + Rust pipeline) are gating. CI in the training repo should re-run them per checkpoint. |
+| Python users can't easily find/use our weights                | Model card includes a Python one-liner; consider a PR to upstream pointing at the WaveKat repo. |
+
+---
+
+## Decisions to confirm before implementation
+
+1. **Distribution channel:** HuggingFace `wavekat` org as the public mirror, platform stays internal? *(strongly recommended: yes — sibling `wavekat-tts` already does this)*
+2. **HF repo name:** `wavekat/smart-turn-ONNX` (language-agnostic, with per-language subdirs like `zh/`) — recommended over `wavekat/smart-turn-zh-ONNX` because more languages are coming.
+3. **License:** ship under BSD 2-Clause to match upstream Pipecat? *(default: yes)*
+4. **API shape:** variant enum on `PipecatSmartTurn` (Option A), separate struct (Option B), or `from_file()`-only (Option C)? *(recommended: A)*
+5. **Loading mechanism:** keep `build.rs` + `include_bytes!()` (Option 1), or switch to `hf-hub` runtime download (Option 2)? *(recommended: 2 — aligns with `wavekat-tts`)*
+6. **Feature flag name:** language-agnostic, e.g. `wavekat-smart-turn` or `smart-turn-wavekat`, rather than `pipecat-zh` / `smart-turn-zh` (one flag gates *all* WaveKat fine-tunes, language is chosen at runtime via `SmartTurnLang`).
+7. **Default features:** does the zh variant ship in default features, or stay opt-in? *(recommended: opt-in — keeps default install lean)*
+8. **Env var name:** `WAVEKAT_TURN_MODEL_DIR` (recommended, turn-specific, no collision with `WAVEKAT_MODEL_DIR` from TTS). Applies to whichever language is selected.
+9. ~~Rename `PipecatSmartTurn` → `SmartTurnDetector`?~~ **Resolved (keep `PipecatSmartTurn`)** — the architecture is Pipecat's; renaming would obscure that.
+10. **Future migration:** do we want to plan now for moving the upstream Pipecat variant off `include_bytes!()` to `hf-hub` too (Option 3), or leave that for later?
+11. **Coordination with Pipecat upstream:** do we want to proactively notify pipecat-ai/smart-turn maintainers (issue / PR linking our HF repo) so Python users discover the weights? *(recommended: yes, after Phase 0 ships)*
diff --git a/tests/fixtures/zh_speech_finished.wav b/tests/fixtures/zh_speech_finished.wav
new file mode 100644
index 0000000..52da116
Binary files /dev/null and b/tests/fixtures/zh_speech_finished.wav differ
diff --git a/tests/fixtures/zh_speech_finished_short.wav b/tests/fixtures/zh_speech_finished_short.wav
new file mode 100644
index 0000000..1832e03
Binary files /dev/null and b/tests/fixtures/zh_speech_finished_short.wav differ
diff --git a/tests/fixtures/zh_speech_mid.wav b/tests/fixtures/zh_speech_mid.wav
new file mode 100644
index 0000000..2633a0c
Binary files /dev/null and b/tests/fixtures/zh_speech_mid.wav differ