diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index e2b0b87..c9eceab 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -26,10 +26,10 @@ jobs:
             args: "--bundles appimage,deb,rpm"
             target: "aarch64-unknown-linux-gnu"
           - platform: "windows-latest"
-            args: ""
+            args: "--bundles nsis"
             target: "x86_64-pc-windows-msvc"
           - platform: "windows-11-arm" # for ARM64 Windows runner
-            args: "--target aarch64-pc-windows-msvc"
+            args: "--target aarch64-pc-windows-msvc --bundles nsis"
             target: "aarch64-pc-windows-msvc"
 
     uses: ./.github/workflows/build.yml
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8a1a9a8..7a0486d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -51,7 +51,7 @@ jobs:
     runs-on: ${{ inputs.platform }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           repository: ${{ inputs.repository }}
           ref: ${{ inputs.ref }}
@@ -131,12 +131,42 @@ jobs:
           dpkg-query -W -f='${Status}\n' libgtk-layer-shell0 | grep -q "install ok installed"
           ldconfig -p | grep -q "libgtk-layer-shell.so.0"
 
-      - name: Install Vulkan SDK (Windows x64)
+      - name: Cache Vulkan SDK for Windows x64
         if: contains(inputs.platform, 'windows') && !contains(inputs.target, 'aarch64')
-        uses: humbletim/install-vulkan-sdk@v1.2
+        id: cache-vulkan-sdk-windows-x64
+        uses: actions/cache@v5
         with:
-          version: 1.4.309.0
-          cache: true
+          path: ~/.cache/parrot/vulkan-sdk-windows-x64
+          key: ${{ runner.os }}-${{ runner.arch }}-vulkan-sdk-1.4.309.0
+
+      - name: Prepare Vulkan SDK for Windows x64
+        if: contains(inputs.platform, 'windows') && !contains(inputs.target, 'aarch64')
+        shell: pwsh
+        env:
+          VULKAN_SDK_VERSION: "1.4.309.0"
+        run: |
+          $sdkCacheRoot = Join-Path $env:USERPROFILE ".cache\parrot\vulkan-sdk-windows-x64"
+          $sdkRoot = Join-Path $sdkCacheRoot $env:VULKAN_SDK_VERSION
+          $sdkBin = Join-Path $sdkRoot "Bin"
+          $sdkTool = Join-Path $sdkBin "glslangValidator.exe"
+          $sdkArchive = Join-Path $env:RUNNER_TEMP "vulkan_sdk_windows_x64.exe"
+          $sdkUrl = "https://sdk.lunarg.com/sdk/download/$env:VULKAN_SDK_VERSION/windows/VulkanSDK-$env:VULKAN_SDK_VERSION-Installer.exe?Human=true"
+
+          if (!(Test-Path $sdkTool)) {
+            Remove-Item -Recurse -Force $sdkRoot -ErrorAction SilentlyContinue
+            New-Item -ItemType Directory -Force -Path $sdkRoot | Out-Null
+            Invoke-WebRequest -Uri $sdkUrl -OutFile $sdkArchive
+            $sevenZip = (Get-Command 7z.exe -ErrorAction Stop).Source
+            & $sevenZip x $sdkArchive "-o$sdkRoot" -aoa | Out-Null
+          }
+
+          if (!(Test-Path $sdkTool)) {
+            throw "glslangValidator.exe was not found under $sdkBin after extracting the Vulkan SDK"
+          }
+
+          Add-Content -Path $env:GITHUB_ENV -Value "VULKAN_SDK=$sdkRoot"
+          Add-Content -Path $env:GITHUB_PATH -Value $sdkBin
+          & $sdkTool --version
 
       # humbletim/install-vulkan-sdk@v1.2 cannot target Windows ARM64 yet.
       # Download prebuilt binaries (Bin) + build headers/libs from source.
@@ -171,13 +201,36 @@ jobs:
           Write-Host "Verifying glslc..."
           & (Join-Path $binPath "glslc.exe") --version
 
-      - name: Build Vulkan SDK headers and libs (Windows ARM64)
+      - name: Cache Vulkan SDK components (Windows ARM64)
         if: contains(inputs.platform, 'windows') && contains(inputs.target, 'aarch64')
+        id: cache-vulkan-sdk-windows-arm64
+        uses: actions/cache@v5
+        with:
+          path: VULKAN_SDK
+          key: ${{ runner.os }}-${{ runner.arch }}-vulkan-sdk-arm64-1.4.309.0-headers-loader
+
+      - name: Build Vulkan SDK headers and libs (Windows ARM64)
+        if: contains(inputs.platform, 'windows') && contains(inputs.target, 'aarch64') && steps.cache-vulkan-sdk-windows-arm64.outputs.cache-hit != 'true'
         uses: humbletim/setup-vulkan-sdk@v1.2.1
         with:
           vulkan-query-version: 1.4.309.0
           vulkan-components: Vulkan-Headers, Vulkan-Loader
-          vulkan-use-cache: true
+          vulkan-use-cache: false
+
+      - name: Restore Vulkan SDK env (Windows ARM64)
+        if: contains(inputs.platform, 'windows') && contains(inputs.target, 'aarch64') && steps.cache-vulkan-sdk-windows-arm64.outputs.cache-hit == 'true'
+        shell: pwsh
+        run: |
+          $sdkDir = $env:VULKAN_SDK
+          $sdkBin = Join-Path $sdkDir "bin"
+          $sdkEnv = Join-Path $sdkDir "sdk.env"
+
+          if (!(Test-Path $sdkEnv)) {
+            throw "sdk.env was not found in cached Windows ARM64 Vulkan SDK at $sdkDir"
+          }
+
+          Add-Content -Path $env:GITHUB_ENV -Value "VULKAN_SDK_VERSION=1.4.309.0"
+          Add-Content -Path $env:GITHUB_PATH -Value $sdkBin
 
       - name: Install trusted-signing-cli
         if: contains(inputs.platform, 'windows') && inputs.sign-binaries
@@ -192,12 +245,44 @@ jobs:
           sudo apt install vulkan-sdk -y
           sudo apt-get install -y mesa-vulkan-drivers
 
-      - name: Prepare Vulkan SDK for Ubuntu ARM64
+      - name: Cache Vulkan SDK for Ubuntu ARM64
         if: contains(inputs.platform, 'ubuntu') && contains(inputs.platform, 'arm')
-        uses: jakoch/install-vulkan-sdk-action@v1
+        id: cache-vulkan-sdk-ubuntu-arm64
+        uses: actions/cache@v5
         with:
-          vulkan_version: 1.4.335.0
-          cache: true
+          path: ~/.cache/parrot/vulkan-sdk-arm
+          key: ${{ runner.os }}-${{ runner.arch }}-vulkan-sdk-1.4.335.0-${{ inputs.platform }}
+
+      - name: Prepare Vulkan SDK for Ubuntu ARM64
+        if: contains(inputs.platform, 'ubuntu') && contains(inputs.platform, 'arm')
+        shell: bash
+        env:
+          VULKAN_SDK_VERSION: "1.4.335.0"
+          VULKAN_SDK_ASSET: ${{ contains(inputs.platform, '24.04') && 'vulkansdk-ubuntu-24.04-arm-1.4.335.0.tar.xz' || 'vulkansdk-ubuntu-22.04-arm-1.4.335.0.tar.xz' }}
+        run: |
+          set -euo pipefail
+
+          SDK_CACHE_ROOT="$HOME/.cache/parrot/vulkan-sdk-arm"
+          SDK_ROOT="$SDK_CACHE_ROOT/$VULKAN_SDK_VERSION/aarch64"
+          SDK_ARCHIVE="$RUNNER_TEMP/$VULKAN_SDK_ASSET"
+          SDK_DOWNLOAD_URL="https://github.com/jakoch/vulkan-sdk-arm/releases/download/$VULKAN_SDK_VERSION/$VULKAN_SDK_ASSET"
+
+          mkdir -p "$SDK_CACHE_ROOT"
+
+          if [ ! -x "$SDK_ROOT/bin/glslc" ]; then
+            rm -rf "$SDK_CACHE_ROOT/$VULKAN_SDK_VERSION"
+            curl -fsSL "$SDK_DOWNLOAD_URL" -o "$SDK_ARCHIVE"
+            tar -xJf "$SDK_ARCHIVE" -C "$SDK_CACHE_ROOT"
+          fi
+
+          test -x "$SDK_ROOT/bin/glslc"
+          "$SDK_ROOT/bin/glslc" --version
+
+          {
+            echo "VULKAN_SDK=$SDK_ROOT"
+            echo "VK_LAYER_PATH=$SDK_ROOT/share/vulkan/explicit_layer.d"
+            echo "LD_LIBRARY_PATH=$SDK_ROOT/lib${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+          } >> "$GITHUB_ENV"
 
       - name: Install Vulkan runtime libraries (Ubuntu ARM64)
         if: contains(inputs.platform, 'ubuntu') && contains(inputs.platform, 'arm')
@@ -414,6 +499,91 @@ jobs:
           }
           Write-Host "Bundled espeak-ng from: $($bin.FullName)"
 
+      - name: Bundle ONNX Runtime shared library (Unix)
+        if: contains(inputs.platform, 'macos') || contains(inputs.platform, 'ubuntu')
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          ORT_VERSION="1.23.1"
+          case "${{ inputs.target }}" in
+            x86_64-apple-darwin)
+              ASSET="onnxruntime-osx-x86_64-${ORT_VERSION}.tgz"
+              LIB_GLOB='libonnxruntime*.dylib'
+              ;;
+            aarch64-apple-darwin)
+              ASSET="onnxruntime-osx-arm64-${ORT_VERSION}.tgz"
+              LIB_GLOB='libonnxruntime*.dylib'
+              ;;
+            x86_64-unknown-linux-gnu)
+              ASSET="onnxruntime-linux-x64-${ORT_VERSION}.tgz"
+              LIB_GLOB='libonnxruntime.so*'
+              ;;
+            aarch64-unknown-linux-gnu)
+              ASSET="onnxruntime-linux-aarch64-${ORT_VERSION}.tgz"
+              LIB_GLOB='libonnxruntime.so*'
+              ;;
+            *)
+              echo "Unsupported target for ONNX Runtime bundling: ${{ inputs.target }}"
+              exit 1
+              ;;
+          esac
+
+          TMP_DIR="$(mktemp -d)"
+          ARCHIVE_PATH="${TMP_DIR}/${ASSET}"
+          DEST="src-tauri/resources/onnxruntime"
+          mkdir -p "$DEST"
+
+          curl -fsSL "https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/${ASSET}" -o "$ARCHIVE_PATH"
+          tar -xzf "$ARCHIVE_PATH" -C "$TMP_DIR"
+
+          LIB_DIR="$(find "$TMP_DIR" -type d \( -name lib -o -name lib64 \) | head -n 1)"
+          if [ -z "$LIB_DIR" ]; then
+            echo "Unable to find ONNX Runtime library directory in ${ASSET}"
+            exit 1
+          fi
+
+          find "$LIB_DIR" -maxdepth 1 -name "$LIB_GLOB" -exec cp -L {} "$DEST/" \;
+          if ! find "$DEST" -maxdepth 1 \( -type f -o -type l \) | grep -q .; then
+            echo "No ONNX Runtime libraries were copied for target ${{ inputs.target }}"
+            exit 1
+          fi
+
+          echo "Bundled ONNX Runtime files:"
+          ls -la "$DEST"
+
+      - name: Bundle ONNX Runtime shared library (Windows)
+        if: contains(inputs.platform, 'windows')
+        shell: pwsh
+        run: |
+          $ErrorActionPreference = "Stop"
+          $ortVersion = "1.23.1"
+          if ("${{ inputs.target }}" -like "*aarch64*") {
+            $asset = "onnxruntime-win-arm64-$ortVersion.zip"
+          } else {
+            $asset = "onnxruntime-win-x64-$ortVersion.zip"
+          }
+
+          $tmpDir = Join-Path $env:RUNNER_TEMP "onnxruntime"
+          Remove-Item -Recurse -Force $tmpDir -ErrorAction SilentlyContinue
+          New-Item -ItemType Directory -Force -Path $tmpDir | Out-Null
+
+          $archivePath = Join-Path $tmpDir $asset
+          Invoke-WebRequest -Uri "https://github.com/microsoft/onnxruntime/releases/download/v$ortVersion/$asset" -OutFile $archivePath
+          Expand-Archive -LiteralPath $archivePath -DestinationPath $tmpDir -Force
+
+          $dll = Get-ChildItem -Path $tmpDir -Recurse -Filter "onnxruntime.dll" | Select-Object -First 1
+          if (-not $dll) {
+            throw "onnxruntime.dll not found in $asset"
+          }
+
+          $dest = "src-tauri\resources\onnxruntime"
+          New-Item -ItemType Directory -Force -Path $dest | Out-Null
+          Get-ChildItem -Path $dll.DirectoryName -Filter "onnxruntime*.dll" | ForEach-Object {
+            Copy-Item $_.FullName $dest
+            Write-Host "Bundled ONNX Runtime DLL: $($_.Name)"
+          }
+
       - name: Build with Tauri
         uses: tauri-apps/tauri-action@v0
         env:
@@ -441,7 +611,7 @@ jobs:
 
       - name: Upload artifacts (macOS)
         if: inputs.upload-artifacts && contains(inputs.platform, 'macos')
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
         with:
           name: ${{ inputs.asset-prefix }}-${{ inputs.target }}
           path: |
@@ -508,7 +678,7 @@ jobs:
 
       - name: Upload artifacts (Linux)
         if: inputs.upload-artifacts && contains(inputs.platform, 'ubuntu')
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
         with:
           name: ${{ inputs.asset-prefix }}-${{ inputs.platform }}-${{ inputs.target }}
           path: |
@@ -519,7 +689,7 @@ jobs:
 
       - name: Upload artifacts (Windows)
         if: inputs.upload-artifacts && contains(inputs.platform, 'windows')
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
         with:
           name: ${{ inputs.asset-prefix }}-${{ inputs.target }}
           # Default Windows builds place bundles under release/, but cross-compiles (ARM64) nest under target/<triple>/release.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 032b81a..825c8ae 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -5,7 +5,7 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - uses: oven-sh/setup-bun@v1
         with:
diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml
index 02714e2..8c5bd4c 100644
--- a/.github/workflows/playwright.yml
+++ b/.github/workflows/playwright.yml
@@ -5,7 +5,7 @@ jobs:
   playwright:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - uses: oven-sh/setup-bun@v1
         with:
@@ -22,7 +22,7 @@ jobs:
 
       - name: Upload test results
         if: failure()
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v7
         with:
           name: playwright-report
           path: playwright-report/
diff --git a/.github/workflows/pr-test-build.yml b/.github/workflows/pr-test-build.yml
index 4a94b7a..23685f0 100644
--- a/.github/workflows/pr-test-build.yml
+++ b/.github/workflows/pr-test-build.yml
@@ -32,10 +32,10 @@ jobs:
             args: "--bundles appimage,deb,rpm"
             target: "aarch64-unknown-linux-gnu"
           - platform: "windows-latest"
-            args: ""
+            args: "--bundles nsis"
             target: "x86_64-pc-windows-msvc"
           - platform: "windows-11-arm"
-            args: "--target aarch64-pc-windows-msvc"
+            args: "--target aarch64-pc-windows-msvc --bundles nsis"
             target: "aarch64-pc-windows-msvc"
 
     uses: ./.github/workflows/build.yml
@@ -57,7 +57,7 @@ jobs:
       pull-requests: write
     steps:
       - name: Post artifact links to PR
-        uses: actions/github-script@v7
+        uses: actions/github-script@v9
         with:
           script: |
             const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
diff --git a/.github/workflows/prettier.yml b/.github/workflows/prettier.yml
index ab39b48..d57f835 100644
--- a/.github/workflows/prettier.yml
+++ b/.github/workflows/prettier.yml
@@ -5,7 +5,7 @@ jobs:
   prettier:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - uses: oven-sh/setup-bun@v1
         with:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 658ec9d..81c1194 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -12,7 +12,7 @@ jobs:
       version: ${{ steps.get-version.outputs.version }}
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
 
       - name: Get version from tauri.conf.json
         id: get-version
@@ -24,7 +24,7 @@ jobs:
 
       - name: Create Draft Release
         id: create-release
-        uses: actions/github-script@v7
+        uses: actions/github-script@v9
         with:
           script: |
             const { data } = await github.rest.repos.createRelease({
@@ -56,10 +56,10 @@ jobs:
             args: "--bundles appimage,deb,rpm"
             target: "aarch64-unknown-linux-gnu"
           - platform: "windows-latest"
-            args: ""
+            args: "--bundles nsis"
             target: "x86_64-pc-windows-msvc"
           - platform: "windows-11-arm" # for ARM64 Windows runner
-            args: "--target aarch64-pc-windows-msvc"
+            args: "--target aarch64-pc-windows-msvc --bundles nsis"
             target: "aarch64-pc-windows-msvc"
 
     uses: ./.github/workflows/build.yml
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6a3f852..8984b64 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -5,7 +5,7 @@ jobs:
   rust-tests:
     runs-on: ubuntu-24.04
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - name: Install system dependencies
         run: |
diff --git a/.prettierignore b/.prettierignore
index aa64a9d..a4ac262 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -18,3 +18,6 @@ src/bindings.ts
 # Misc
 .DS_Store
 *.log
+
+# Vendored cargo metadata
+vendor/**/.cargo_vcs_info.json
\ No newline at end of file
diff --git a/docs/windows-tts-process-test-plan.md b/docs/windows-tts-process-test-plan.md
new file mode 100644
index 0000000..de0384b
--- /dev/null
+++ b/docs/windows-tts-process-test-plan.md
@@ -0,0 +1,42 @@
+# Windows TTS Process Execution Test Plan
+
+## Root cause covered by this change
+
+Parrot's Kokoro TTS path uses `tts-rs`, which launches `espeak-ng.exe` as a
+child process for phonemization. On Windows, `espeak-ng.exe` is a console
+subsystem binary. If it is spawned from the background Tauri app without
+`CREATE_NO_WINDOW`, Windows may create a visible console window for each child
+process invocation.
+
+## What this patch changes
+
+- Parrot now pins `tts-rs` via a local `[patch.crates-io]` override.
+- The vendored `tts-rs` phonemizer sets `CREATE_NO_WINDOW` for Windows
+  `espeak-ng` child processes.
+- macOS and Linux behavior is unchanged.
+
+## Manual verification on a real Windows machine
+
+1. Build and install Parrot with this patch on Windows.
+2. Launch Parrot normally from the Start menu.
+3. Trigger TTS once on a short selection.
+   Expected: speech is generated and no `cmd.exe` or console window appears.
+4. Trigger TTS repeatedly 20-30 times in a row.
+   Expected: no visible console windows appear over time and focus does not
+   leave the active app.
+5. Trigger TTS on a long selection that is chunked into multiple synthesis
+   requests.
+   Expected: no console windows appear while chunks are processed.
+6. Leave Parrot running for at least 15 minutes, then trigger TTS again.
+   Expected: first request after idle still produces no visible console window.
+7. Hide Parrot to the tray and trigger TTS from the tray-driven workflow.
+   Expected: behavior matches a normal launch, with no visible console windows.
+8. While TTS is active, keep typing in another app.
+   Expected: no focus stealing and no interruption from background processes.
+
+## Optional observability checks
+
+- Use Process Explorer or Process Monitor to confirm `espeak-ng.exe` is created
+  as a background child of Parrot without a visible console window.
+- If any window still appears, capture the exact process name so the remaining
+  spawn path can be isolated.
diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock
index 895e4ba..73c134c 100644
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@@ -399,12 +399,6 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
-[[package]]
-name = "base64ct"
-version = "1.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"
-
 [[package]]
 name = "bincode"
 version = "2.0.1"
@@ -1255,16 +1249,6 @@ version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
 
-[[package]]
-name = "der"
-version = "0.7.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
-dependencies = [
- "pem-rfc7468",
- "zeroize",
-]
-
 [[package]]
 name = "deranged"
 version = "0.5.5"
@@ -2488,12 +2472,6 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
-[[package]]
-name = "hmac-sha256"
-version = "1.1.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec9d92d097f4749b64e8cc33d924d9f40a2d4eb91402b458014b781f5733d60f"
-
 [[package]]
 name = "hound"
 version = "3.5.1"
@@ -3095,6 +3073,16 @@ dependencies = [
  "windows-link 0.2.1",
 ]
 
+[[package]]
+name = "libloading"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60"
+dependencies = [
+ "cfg-if",
+ "windows-link 0.2.1",
+]
+
 [[package]]
 name = "libm"
 version = "0.2.16"
@@ -3165,12 +3153,6 @@ version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
 
-[[package]]
-name = "lzma-rust2"
-version = "0.15.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1670343e58806300d87950e3401e820b519b9384281bbabfb15e3636689ffd69"
-
 [[package]]
 name = "mac"
 version = "0.1.1"
@@ -4000,27 +3982,22 @@ dependencies = [
 
 [[package]]
 name = "ort"
-version = "2.0.0-rc.11"
+version = "2.0.0-rc.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5df903c0d2c07b56950f1058104ab0c8557159f2741782223704de9be73c3c"
+checksum = "d7de3af33d24a745ffb8fab904b13478438d1cd52868e6f17735ef6e1f8bf133"
 dependencies = [
+ "libloading 0.9.0",
  "ndarray",
  "ort-sys",
  "smallvec",
  "tracing",
- "ureq",
 ]
 
 [[package]]
 name = "ort-sys"
-version = "2.0.0-rc.11"
+version = "2.0.0-rc.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06503bb33f294c5f1ba484011e053bfa6ae227074bdb841e9863492dc5960d4b"
-dependencies = [
- "hmac-sha256",
- "lzma-rust2",
- "ureq",
-]
+checksum = "d7b497d21a8b6fbb4b5a544f8fadb77e801a09ae0add9e411d31c6f89e3c1e90"
 
 [[package]]
 name = "os_info"
@@ -4131,6 +4108,7 @@ dependencies = [
  "hound",
  "log",
  "once_cell",
+ "ort",
  "pulldown-cmark",
  "rdev 0.5.0-2",
  "reqwest",
@@ -4185,15 +4163,6 @@ version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
 
-[[package]]
-name = "pem-rfc7468"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412"
-dependencies = [
- "base64ct",
-]
-
 [[package]]
 name = "percent-encoding"
 version = "2.3.2"
@@ -5254,15 +5223,6 @@ dependencies = [
  "zeroize",
 ]
 
-[[package]]
-name = "rustls-pemfile"
-version = "2.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
-dependencies = [
- "rustls-pki-types",
-]
-
 [[package]]
 name = "rustls-pki-types"
 version = "1.13.0"
@@ -5686,17 +5646,6 @@ dependencies = [
  "windows-sys 0.60.2",
 ]
 
-[[package]]
-name = "socks"
-version = "0.3.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b"
-dependencies = [
- "byteorder",
- "libc",
- "winapi",
-]
-
 [[package]]
 name = "softbuffer"
 version = "0.4.6"
@@ -7138,8 +7087,6 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
 [[package]]
 name = "tts-rs"
 version = "2026.2.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90d49f82f43fbcdf7ae79548e5224977a52437c8795b0ba64b814053d381934d"
 dependencies = [
  "derive_builder",
  "env_logger",
@@ -7259,37 +7206,6 @@ version = "0.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
 
-[[package]]
-name = "ureq"
-version = "3.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99ba1025f18a4a3fc3e9b48c868e9beb4f24f4b4b1a325bada26bd4119f46537"
-dependencies = [
- "base64 0.22.1",
- "der",
- "log",
- "native-tls",
- "percent-encoding",
- "rustls-pemfile",
- "rustls-pki-types",
- "socks",
- "ureq-proto",
- "utf-8",
- "webpki-root-certs",
-]
-
-[[package]]
-name = "ureq-proto"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60b4531c118335662134346048ddb0e54cc86bd7e81866757873055f0e38f5d2"
-dependencies = [
- "base64 0.22.1",
- "http",
- "httparse",
- "log",
-]
-
 [[package]]
 name = "url"
 version = "2.5.7"
@@ -7729,15 +7645,6 @@ dependencies = [
  "system-deps 6.2.2",
 ]
 
-[[package]]
-name = "webpki-root-certs"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05d651ec480de84b762e7be71e6efa7461699c19d9e2c272c8d93455f567786e"
-dependencies = [
- "rustls-pki-types",
-]
-
 [[package]]
 name = "webpki-roots"
 version = "1.0.3"
@@ -8906,4 +8813,4 @@ dependencies = [
  "serde",
  "syn 2.0.108",
  "winnow 0.7.13",
-]
+]
\ No newline at end of file
diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml
index 4a735ff..8a72e64 100644
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -65,6 +65,7 @@ rusqlite = { version = "0.37", features = ["bundled"] }
 tar = "0.4.44"
 flate2 = "1.0"
 tts-rs = { version = "2026.2.3", features = ["kokoro"] }
+ort = { version = "2.0.0-rc.12", default-features = false, features = ["std", "load-dynamic"] }
 handy-keys = "0.2.0"
 ferrous-opencc = "0.2.3"
 clap = { version = "4", features = ["derive"] }
@@ -99,6 +100,8 @@ tauri-nspanel = { git = "https://github.com/ahkohd/tauri-nspanel", branch = "v2.
 gtk-layer-shell = { version = "0.8", features = ["v0_6"] }
 gtk = "0.18"
 
+[patch.crates-io]
+tts-rs = { path = "../vendor/tts-rs" }
 
 [dev-dependencies]
 tempfile = "3"
@@ -107,4 +110,4 @@ tempfile = "3"
 lto = true
 codegen-units = 1
 strip = true
-panic = "abort"
+panic = "abort"
\ No newline at end of file
diff --git a/src-tauri/src/actions.rs b/src-tauri/src/actions.rs
index ed9e46b..644a983 100644
--- a/src-tauri/src/actions.rs
+++ b/src-tauri/src/actions.rs
@@ -1,11 +1,11 @@
 use crate::managers::tts::TTSManager;
+use crate::selection::capture_selected_text;
 use crate::utils::show_processing_overlay;
 use log::{debug, info};
 use once_cell::sync::Lazy;
 use std::collections::HashMap;
 use std::sync::Arc;
 use tauri::{AppHandle, Emitter, Manager};
-use tauri_plugin_clipboard_manager::ClipboardExt;
 
 const SHORTCUT_SETTLE_DELAY_MS: u64 = 40;
 
@@ -60,185 +60,6 @@ impl ShortcutAction for TestAction {
 // Speak Action — reads selected text via macOS Accessibility API and speaks it with Kokoro TTS.
 struct SpeakAction;
 
-/// Read the currently selected text using the macOS Accessibility API.
-/// Does not touch the clipboard. Returns `None` when nothing is selected or
-/// accessibility is unavailable.
-#[cfg(target_os = "macos")]
-fn get_selected_text() -> Option<String> {
-    use std::ffi::{c_char, c_void, CStr};
-    use std::ptr;
-
-    type Ptr = *mut c_void;
-    const UTF8: u32 = 0x0800_0100; // kCFStringEncodingUTF8
-
-    #[link(name = "ApplicationServices", kind = "framework")]
-    extern "C" {
-        fn AXUIElementCreateSystemWide() -> Ptr;
-        fn AXUIElementCopyAttributeValue(element: Ptr, attribute: Ptr, value: *mut Ptr) -> i32;
-    }
-
-    #[link(name = "CoreFoundation", kind = "framework")]
-    extern "C" {
-        fn CFRelease(cf: Ptr);
-        fn CFStringCreateWithBytes(
-            alloc: *const c_void,
-            bytes: *const u8,
-            num_bytes: i64,
-            encoding: u32,
-            is_external: bool,
-        ) -> Ptr;
-        fn CFStringGetLength(s: Ptr) -> i64;
-        fn CFStringGetCString(s: Ptr, buf: *mut c_char, buf_size: i64, encoding: u32) -> bool;
-    }
-
-    unsafe fn cf_str(bytes: &[u8]) -> Ptr {
-        CFStringCreateWithBytes(ptr::null(), bytes.as_ptr(), bytes.len() as i64, UTF8, false)
-    }
-
-    unsafe fn cf_to_string(ptr: Ptr) -> Option<String> {
-        if ptr.is_null() {
-            return None;
-        }
-        let len = CFStringGetLength(ptr);
-        let buf_size = len * 4 + 1; // worst-case UTF-8 bytes + NUL
-        let mut buf = vec![0u8; buf_size as usize];
-        let ok = CFStringGetCString(ptr, buf.as_mut_ptr() as *mut c_char, buf_size, UTF8);
-        CFRelease(ptr);
-        if !ok {
-            return None;
-        }
-        CStr::from_ptr(buf.as_ptr() as *const c_char)
-            .to_str()
-            .ok()
-            .map(str::to_owned)
-    }
-
-    unsafe {
-        let system = AXUIElementCreateSystemWide();
-        if system.is_null() {
-            return None;
-        }
-
-        let focused_attr = cf_str(b"AXFocusedUIElement");
-        let mut focused: Ptr = ptr::null_mut();
-        let err = AXUIElementCopyAttributeValue(system, focused_attr, &mut focused);
-        CFRelease(focused_attr);
-        CFRelease(system);
-        if err != 0 || focused.is_null() {
-            return None;
-        }
-
-        let text_attr = cf_str(b"AXSelectedText");
-        let mut value: Ptr = ptr::null_mut();
-        let err = AXUIElementCopyAttributeValue(focused, text_attr, &mut value);
-        CFRelease(text_attr);
-        CFRelease(focused);
-        if err != 0 || value.is_null() {
-            return None;
-        }
-
-        cf_to_string(value).filter(|s| !s.trim().is_empty())
-    }
-}
-
-#[cfg(target_os = "macos")]
-fn get_selected_text_with_fallback(app: &AppHandle) -> Option<String> {
-    // Retry AX selection reads because some apps only expose selection once the
-    // shortcut state has settled.
-    for delay_ms in [0_u64, 40, 90] {
-        if delay_ms > 0 {
-            std::thread::sleep(std::time::Duration::from_millis(delay_ms));
-        }
-        if let Some(text) = get_selected_text() {
-            return Some(text);
-        }
-    }
-
-    // Fallback: trigger Cmd+C and read clipboard while restoring original content.
-    let clipboard = app.clipboard();
-    let previous_clipboard = clipboard.read_text().ok();
-    let restore_clipboard = |value: Option<String>| {
-        let restore_value = value.unwrap_or_default();
-        let _ = clipboard.write_text(restore_value);
-    };
-
-    // Use a sentinel so we can reliably tell whether copy actually produced text.
-    let sentinel = format!(
-        "__PARROT_SELECTION_PROBE_{}__",
-        std::time::SystemTime::now()
-            .duration_since(std::time::UNIX_EPOCH)
-            .ok()
-            .map(|d| d.as_millis())
-            .unwrap_or_default()
-    );
-    let _ = clipboard.write_text(&sentinel);
-
-    {
-        use crate::input::{send_copy_ctrl_c, EnigoState};
-        let enigo_state = app.try_state::<EnigoState>()?;
-        let mut enigo = enigo_state.0.lock().ok()?;
-        if send_copy_ctrl_c(&mut enigo).is_err() {
-            restore_clipboard(previous_clipboard);
-            return None;
-        }
-    }
-
-    std::thread::sleep(std::time::Duration::from_millis(120));
-    let copied_text = clipboard.read_text().ok();
-
-    restore_clipboard(previous_clipboard);
-
-    let copied = copied_text?.trim().to_string();
-    if copied.is_empty() || copied == sentinel {
-        None
-    } else {
-        Some(copied)
-    }
-}
-
-#[cfg(not(target_os = "macos"))]
-fn get_selected_text_with_fallback(app: &AppHandle) -> Option<String> {
-    let clipboard = app.clipboard();
-    let previous_clipboard = clipboard.read_text().ok();
-    let restore_clipboard = |value: Option<String>| {
-        let restore_value = value.unwrap_or_default();
-        let _ = clipboard.write_text(restore_value);
-    };
-
-    // Use a sentinel so we can reliably tell whether copy actually produced text.
-    let sentinel = format!(
-        "__PARROT_SELECTION_PROBE_{}__",
-        std::time::SystemTime::now()
-            .duration_since(std::time::UNIX_EPOCH)
-            .ok()
-            .map(|d| d.as_millis())
-            .unwrap_or_default()
-    );
-    let _ = clipboard.write_text(&sentinel);
-
-    {
-        use crate::input::{send_copy_ctrl_c, EnigoState};
-        let enigo_state = app.try_state::<EnigoState>()?;
-        let mut enigo = enigo_state.0.lock().ok()?;
-        if send_copy_ctrl_c(&mut enigo).is_err() {
-            restore_clipboard(previous_clipboard);
-            return None;
-        }
-    }
-
-    std::thread::sleep(std::time::Duration::from_millis(120));
-    let copied_text = clipboard.read_text().ok();
-
-    restore_clipboard(previous_clipboard);
-
-    let copied = copied_text?.trim().to_string();
-    if copied.is_empty() || copied == sentinel {
-        None
-    } else {
-        Some(copied)
-    }
-}
-
 impl ShortcutAction for SpeakAction {
     fn start(&self, app: &AppHandle, _binding_id: &str, _shortcut_str: &str) {
         let speech = Arc::clone(&app.state::<Arc<TTSManager>>());
@@ -252,7 +73,7 @@ impl ShortcutAction for SpeakAction {
         std::thread::spawn(move || {
             std::thread::sleep(std::time::Duration::from_millis(SHORTCUT_SETTLE_DELAY_MS));
 
-            match get_selected_text_with_fallback(&app_handle) {
+            match capture_selected_text(&app_handle) {
                 Some(text) => {
                     if !speech.is_request_active(request_id) {
                         return;
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 03ae132..ec60154 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -8,6 +8,7 @@ mod helpers;
 mod input;
 mod managers;
 mod overlay;
+mod selection;
 mod settings;
 mod shortcut;
 mod signal_handle;
@@ -91,6 +92,60 @@ fn resolve_bundled_espeak_ng(
     (bin_path, data_path)
 }
 
+fn resolve_bundled_onnxruntime(app_handle: &AppHandle) -> Option<std::path::PathBuf> {
+    let resolver = app_handle.path();
+    let ort_dir = resolver
+        .resolve(
+            "resources/onnxruntime",
+            tauri::path::BaseDirectory::Resource,
+        )
+        .ok()
+        .filter(|p| p.is_dir())?;
+
+    #[cfg(target_os = "windows")]
+    let candidates = ["onnxruntime.dll"];
+    #[cfg(target_os = "macos")]
+    let candidates = ["libonnxruntime.dylib", "libonnxruntime.1.23.1.dylib"];
+    #[cfg(target_os = "linux")]
+    let candidates = ["libonnxruntime.so", "libonnxruntime.so.1.23.1"];
+
+    for candidate in candidates {
+        let path = ort_dir.join(candidate);
+        if path.exists() {
+            log::info!("Bundled ONNX Runtime library: {}", path.display());
+            return Some(path);
+        }
+    }
+
+    std::fs::read_dir(&ort_dir)
+        .ok()?
+        .filter_map(|entry| entry.ok().map(|entry| entry.path()))
+        .find(|path| {
+            path.is_file()
+                && path
+                    .file_name()
+                    .and_then(|name| name.to_str())
+                    .map(|name| {
+                        #[cfg(target_os = "windows")]
+                        {
+                            name.eq_ignore_ascii_case("onnxruntime.dll")
+                        }
+                        #[cfg(target_os = "macos")]
+                        {
+                            name.starts_with("libonnxruntime") && name.ends_with(".dylib")
+                        }
+                        #[cfg(target_os = "linux")]
+                        {
+                            name.starts_with("libonnxruntime.so")
+                        }
+                    })
+                    .unwrap_or(false)
+        })
+        .inspect(|path| {
+            log::info!("Bundled ONNX Runtime library: {}", path.display());
+        })
+}
+
 // Global atomic to store the file log level filter
 // We use u8 to store the log::LevelFilter as a number
 pub static FILE_LOG_LEVEL: AtomicU8 = AtomicU8::new(log::LevelFilter::Debug as u8);
@@ -154,6 +209,7 @@ fn show_main_window(app: &AppHandle) {
 fn initialize_core_logic(
     app_handle: &AppHandle,
     espeak_paths: (Option<std::path::PathBuf>, Option<std::path::PathBuf>),
+    onnxruntime_path: Option<std::path::PathBuf>,
 ) {
     // Note: Enigo (keyboard/mouse simulation) is NOT initialized here.
     // The frontend is responsible for calling the `initialize_enigo` command
@@ -166,8 +222,13 @@ fn initialize_core_logic(
     let history_manager =
         Arc::new(HistoryManager::new(app_handle).expect("Failed to initialize history manager"));
     let speech_manager = Arc::new(
-        TTSManager::new(app_handle, model_manager.clone(), espeak_paths)
-            .expect("Failed to initialize speech manager"),
+        TTSManager::new(
+            app_handle,
+            model_manager.clone(),
+            espeak_paths,
+            onnxruntime_path,
+        )
+        .expect("Failed to initialize speech manager"),
     );
 
     // Add managers to Tauri's managed state
@@ -315,6 +376,9 @@ pub fn run(cli_args: CliArgs) {
         shortcut::change_update_checks_setting,
         shortcut::change_keyboard_implementation_setting,
         shortcut::get_keyboard_implementation,
+        shortcut::change_selection_capture_method_setting,
+        shortcut::change_clipboard_handling_setting,
+        shortcut::change_model_unload_timeout_setting,
         shortcut::change_show_tray_icon_setting,
         shortcut::change_tts_workers_setting,
         shortcut::change_tts_speed_setting,
@@ -442,7 +506,8 @@ pub fn run(cli_args: CliArgs) {
             app.manage(ActionCoordinator::new(app_handle.clone()));
 
             let espeak_paths = resolve_bundled_espeak_ng(&app_handle);
-            initialize_core_logic(&app_handle, espeak_paths);
+            let onnxruntime_path = resolve_bundled_onnxruntime(&app_handle);
+            initialize_core_logic(&app_handle, espeak_paths, onnxruntime_path);
 
             // Hide tray icon if --no-tray was passed
             if cli_args.no_tray {
diff --git a/src-tauri/src/managers/tts.rs b/src-tauri/src/managers/tts.rs
index 5718682..2ec08a6 100644
--- a/src-tauri/src/managers/tts.rs
+++ b/src-tauri/src/managers/tts.rs
@@ -8,7 +8,7 @@ use std::collections::BTreeMap;
 use std::num::NonZero;
 use std::path::PathBuf;
 use std::sync::atomic::{AtomicBool, AtomicU64, AtomicU8, AtomicUsize, Ordering};
-use std::sync::{mpsc, Arc, Condvar, Mutex, TryLockError};
+use std::sync::{mpsc, Arc, Condvar, Mutex, OnceLock, TryLockError};
 use std::thread;
 use std::time::{Duration, Instant, SystemTime};
 use tauri::path::BaseDirectory;
@@ -32,6 +32,7 @@ const ENGINE_LOCK_POLL_INTERVAL: Duration = Duration::from_millis(2);
 /// Number of samples to crossfade between text-level chunks (10ms @ 24kHz).
 /// Matches the crossfade length used by tts-rs for sub-chunk blending.
 const CROSSFADE_SAMPLES: usize = 240;
+static ORT_INIT_RESULT: OnceLock<std::result::Result<(), String>> = OnceLock::new();
 
 #[derive(Clone, Debug, Serialize)]
 pub struct ModelStateEvent {
@@ -83,6 +84,7 @@ pub struct TTSManager {
     shutdown_signal: Arc<AtomicBool>,
     espeak_ng_path: Option<PathBuf>,
     espeak_ng_data_path: Option<PathBuf>,
+    onnxruntime_path: Option<PathBuf>,
 }
 
 impl Drop for TTSManager {
@@ -96,6 +98,7 @@ impl TTSManager {
         app_handle: &AppHandle,
         model_manager: Arc<ModelManager>,
         espeak_paths: (Option<PathBuf>, Option<PathBuf>),
+        onnxruntime_path: Option<PathBuf>,
     ) -> Result<Self> {
         let engines = Arc::new(
             (0..MAX_PARALLEL_SYNTH_ENGINES)
@@ -177,6 +180,7 @@ impl TTSManager {
             shutdown_signal,
             espeak_ng_path: espeak_paths.0,
             espeak_ng_data_path: espeak_paths.1,
+            onnxruntime_path,
         })
     }
 
@@ -239,6 +243,7 @@ impl TTSManager {
         let model_manager = Arc::clone(&self.model_manager);
         let espeak_ng_path = self.espeak_ng_path.clone();
         let espeak_ng_data_path = self.espeak_ng_data_path.clone();
+        let onnxruntime_path = self.onnxruntime_path.clone();
 
         thread::spawn(move || {
             // Resolve human-readable name from ModelManager; fall back to ID if missing.
@@ -247,6 +252,14 @@ impl TTSManager {
                 .map(|info| info.name)
                 .unwrap_or_else(|| MODEL_ID.to_string());
 
+            if let Err(e) = ensure_onnxruntime_initialized(onnxruntime_path.as_ref()) {
+                error!("{}", e);
+                let _ = app_handle.emit("tts-error", e.clone());
+                *is_loading_arc.lock().unwrap() = false;
+                condvar.notify_all();
+                return;
+            }
+
             let model_dir = match resolve_kokoro_model_dir(&app_handle) {
                 Ok(dir) => dir,
                 Err(e) => {
@@ -1045,6 +1058,52 @@ impl TTSManager {
     }
 }
 
+fn ensure_onnxruntime_initialized(
+    onnxruntime_path: Option<&PathBuf>,
+) -> std::result::Result<(), String> {
+    ORT_INIT_RESULT
+        .get_or_init(|| {
+            let builder = match onnxruntime_path {
+                Some(path) => {
+                    if !path.exists() {
+                        return Err(format!(
+                            "Bundled ONNX Runtime library not found at {}",
+                            path.display()
+                        ));
+                    }
+                    info!("Initializing ONNX Runtime from {}", path.display());
+                    ort::init_from(path).map_err(|e| {
+                        format!(
+                            "Failed to load bundled ONNX Runtime from {}: {}",
+                            path.display(),
+                            e
+                        )
+                    })?
+                }
+                None => {
+                    if let Some(path) = std::env::var_os("ORT_DYLIB_PATH") {
+                        info!("Initializing ONNX Runtime from ORT_DYLIB_PATH={:?}", path);
+                    } else {
+                        info!(
+                            "Bundled ONNX Runtime not found; falling back to system loader search"
+                        );
+                    }
+                    ort::init()
+                }
+            };
+
+            if builder.commit() {
+                Ok(())
+            } else {
+                Err(
+                    "ONNX Runtime was already initialized with a different configuration"
+                        .to_string(),
+                )
+            }
+        })
+        .clone()
+}
+
 /// Runs on a dedicated thread. Receives `(chunk_index, duration_secs)` from the
 /// synthesis loop and emits `overlay-text` events timed to when each chunk
 /// actually starts playing, so the overlay shows the text being read aloud.
diff --git a/src-tauri/src/selection.rs b/src-tauri/src/selection.rs
new file mode 100644
index 0000000..c8035fa
--- /dev/null
+++ b/src-tauri/src/selection.rs
@@ -0,0 +1,238 @@
+use crate::settings::{self, ClipboardHandling, SelectionCaptureMethod};
+use log::{debug, warn};
+use tauri::{AppHandle, Manager};
+use tauri_plugin_clipboard_manager::ClipboardExt;
+
+const ACCESSIBILITY_RETRY_DELAYS_MS: [u64; 3] = [0, 40, 90];
+const CLIPBOARD_COPY_DELAY_MS: u64 = 120;
+
+#[derive(Debug)]
+enum ClipboardState {
+    /// Clipboard had readable text content
+    Text(String),
+    /// Clipboard had content but was unreadable (e.g., image, binary)
+    Unreadable,
+    /// Clipboard was empty
+    Empty,
+}
+
+pub fn capture_selected_text(app: &AppHandle) -> Option<String> {
+    let settings = settings::get_settings(app);
+
+    match settings.selection_capture_method {
+        SelectionCaptureMethod::Auto => {
+            #[cfg(target_os = "macos")]
+            {
+                capture_via_accessibility()
+                    .or_else(|| capture_via_clipboard(app, settings.clipboard_handling))
+            }
+
+            #[cfg(not(target_os = "macos"))]
+            {
+                capture_via_clipboard(app, settings.clipboard_handling)
+            }
+        }
+        SelectionCaptureMethod::Accessibility => {
+            #[cfg(target_os = "macos")]
+            {
+                capture_via_accessibility()
+            }
+
+            #[cfg(not(target_os = "macos"))]
+            {
+                warn!("Accessibility capture is not supported on this platform; falling back to clipboard capture");
+                capture_via_clipboard(app, settings.clipboard_handling)
+            }
+        }
+        SelectionCaptureMethod::Clipboard => {
+            capture_via_clipboard(app, settings.clipboard_handling)
+        }
+    }
+}
+
+#[cfg(target_os = "macos")]
+fn capture_via_accessibility() -> Option<String> {
+    for delay_ms in ACCESSIBILITY_RETRY_DELAYS_MS {
+        if delay_ms > 0 {
+            std::thread::sleep(std::time::Duration::from_millis(delay_ms));
+        }
+        if let Some(text) = get_selected_text() {
+            debug!("Captured selected text via Accessibility API");
+            return Some(text);
+        }
+    }
+
+    None
+}
+
+#[cfg(target_os = "macos")]
+fn get_selected_text() -> Option<String> {
+    use std::ffi::{c_char, c_void, CStr};
+    use std::ptr;
+
+    type Ptr = *mut c_void;
+    const UTF8: u32 = 0x0800_0100;
+
+    #[link(name = "ApplicationServices", kind = "framework")]
+    extern "C" {
+        fn AXUIElementCreateSystemWide() -> Ptr;
+        fn AXUIElementCopyAttributeValue(element: Ptr, attribute: Ptr, value: *mut Ptr) -> i32;
+    }
+
+    #[link(name = "CoreFoundation", kind = "framework")]
+    extern "C" {
+        fn CFRelease(cf: Ptr);
+        fn CFStringCreateWithBytes(
+            alloc: *const c_void,
+            bytes: *const u8,
+            num_bytes: i64,
+            encoding: u32,
+            is_external: bool,
+        ) -> Ptr;
+        fn CFStringGetLength(s: Ptr) -> i64;
+        fn CFStringGetCString(s: Ptr, buf: *mut c_char, buf_size: i64, encoding: u32) -> bool;
+        fn CFStringGetMaximumSizeForEncoding(length: i64, encoding: u32) -> i64;
+    }
+
+    unsafe fn cf_str(bytes: &[u8]) -> Ptr {
+        CFStringCreateWithBytes(ptr::null(), bytes.as_ptr(), bytes.len() as i64, UTF8, false)
+    }
+
+    unsafe fn cf_to_string(ptr: Ptr) -> Option<String> {
+        if ptr.is_null() {
+            return None;
+        }
+        let len = CFStringGetLength(ptr);
+        let buf_size = CFStringGetMaximumSizeForEncoding(len, UTF8) + 1;
+        let mut buf = vec![0u8; buf_size as usize];
+        let ok = CFStringGetCString(ptr, buf.as_mut_ptr() as *mut c_char, buf_size, UTF8);
+        CFRelease(ptr);
+        if !ok {
+            return None;
+        }
+        CStr::from_ptr(buf.as_ptr() as *const c_char)
+            .to_str()
+            .ok()
+            .map(str::to_owned)
+    }
+
+    unsafe {
+        let system = AXUIElementCreateSystemWide();
+        if system.is_null() {
+            return None;
+        }
+
+        let focused_attr = cf_str(b"AXFocusedUIElement");
+        let mut focused: Ptr = ptr::null_mut();
+        let err = AXUIElementCopyAttributeValue(system, focused_attr, &mut focused);
+        CFRelease(focused_attr);
+        CFRelease(system);
+        if err != 0 || focused.is_null() {
+            return None;
+        }
+
+        let text_attr = cf_str(b"AXSelectedText");
+        let mut value: Ptr = ptr::null_mut();
+        let err = AXUIElementCopyAttributeValue(focused, text_attr, &mut value);
+        CFRelease(text_attr);
+        CFRelease(focused);
+        if err != 0 || value.is_null() {
+            return None;
+        }
+
+        cf_to_string(value).filter(|s| !s.trim().is_empty())
+    }
+}
+
+fn capture_via_clipboard(app: &AppHandle, handling: ClipboardHandling) -> Option<String> {
+    let clipboard = app.clipboard();
+    let previous_clipboard = match clipboard.read_text() {
+        Ok(text) => ClipboardState::Text(text),
+        Err(_) => {
+            // Try to determine if clipboard has content but is unreadable (e.g., image)
+            // vs. truly empty. Since we can't reliably distinguish, assume unreadable.
+            ClipboardState::Unreadable
+        }
+    };
+    let sentinel = format!(
+        "__PARROT_SELECTION_PROBE_{}__",
+        std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .ok()
+            .map(|d| d.as_millis())
+            .unwrap_or_default()
+    );
+
+    if clipboard.write_text(&sentinel).is_err() {
+        warn!("Failed to prime clipboard before selection capture");
+        return None;
+    }
+
+    {
+        use crate::input::{send_copy_ctrl_c, EnigoState};
+        let enigo_state = match app.try_state::<EnigoState>() {
+            Some(state) => state,
+            None => {
+                restore_clipboard(&clipboard, &previous_clipboard);
+                return None;
+            }
+        };
+        let mut enigo = match enigo_state.0.lock().ok() {
+            Some(enigo) => enigo,
+            None => {
+                restore_clipboard(&clipboard, &previous_clipboard);
+                return None;
+            }
+        };
+        if let Err(err) = send_copy_ctrl_c(&mut enigo) {
+            debug!(
+                "Failed to send copy shortcut for selection capture: {}",
+                err
+            );
+            restore_clipboard(&clipboard, &previous_clipboard);
+            return None;
+        }
+    }
+
+    std::thread::sleep(std::time::Duration::from_millis(CLIPBOARD_COPY_DELAY_MS));
+
+    let copied_text = clipboard.read_text().ok();
+    let captured = copied_text
+        .as_deref()
+        .map(str::trim)
+        .filter(|text| !text.is_empty() && *text != sentinel)
+        .map(str::to_owned);
+
+    match handling {
+        ClipboardHandling::DontModify => restore_clipboard(&clipboard, &previous_clipboard),
+        ClipboardHandling::CopyToClipboard => {
+            if captured.is_none() {
+                restore_clipboard(&clipboard, &previous_clipboard);
+            }
+        }
+    }
+
+    if captured.is_some() {
+        debug!("Captured selected text via clipboard copy");
+    }
+
+    captured
+}
+
+fn restore_clipboard<R: tauri::Runtime>(
+    clipboard: &tauri_plugin_clipboard_manager::Clipboard<R>,
+    previous_state: &ClipboardState,
+) {
+    match previous_state {
+        ClipboardState::Text(text) => {
+            let _ = clipboard.write_text(text);
+        }
+        ClipboardState::Empty => {
+            let _ = clipboard.clear();
+        }
+        ClipboardState::Unreadable => {
+            // Don't modify the clipboard if we couldn't read it originally.
+            // Attempting to clear/write would destroy unreadable content (images, etc.)
+        }
+    }
+}
\ No newline at end of file
diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs
index 5c5a966..81c25a1 100644
--- a/src-tauri/src/settings.rs
+++ b/src-tauri/src/settings.rs
@@ -121,6 +121,21 @@ pub enum KeyboardImplementation {
     HandyKeys,
 }
 
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Type)]
+#[serde(rename_all = "snake_case")]
+pub enum SelectionCaptureMethod {
+    Auto,
+    Accessibility,
+    Clipboard,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Type)]
+#[serde(rename_all = "snake_case")]
+pub enum ClipboardHandling {
+    DontModify,
+    CopyToClipboard,
+}
+
 impl Default for KeyboardImplementation {
     fn default() -> Self {
         // Default to HandyKeys only on macOS where it's well-tested.
@@ -132,6 +147,20 @@ impl Default for KeyboardImplementation {
     }
 }
 
+impl Default for SelectionCaptureMethod {
+    fn default() -> Self {
+        #[cfg(target_os = "macos")]
+        return SelectionCaptureMethod::Auto;
+        #[cfg(not(target_os = "macos"))]
+        return SelectionCaptureMethod::Clipboard;
+    }
+}
+
+impl Default for ClipboardHandling {
+    fn default() -> Self {
+        ClipboardHandling::DontModify
+    }
+}
 impl ModelUnloadTimeout {
     pub fn to_minutes(self) -> Option<u64> {
         match self {
@@ -228,6 +257,10 @@ pub struct AppSettings {
     pub experimental_enabled: bool,
     #[serde(default)]
     pub keyboard_implementation: KeyboardImplementation,
+    #[serde(default)]
+    pub selection_capture_method: SelectionCaptureMethod,
+    #[serde(default)]
+    pub clipboard_handling: ClipboardHandling,
     #[serde(default = "default_show_tray_icon")]
     pub show_tray_icon: bool,
     #[serde(default = "default_tts_workers")]
@@ -375,6 +408,8 @@ pub fn get_default_settings() -> AppSettings {
         app_language: default_app_language(),
         experimental_enabled: false,
         keyboard_implementation: KeyboardImplementation::default(),
+        selection_capture_method: SelectionCaptureMethod::default(),
+        clipboard_handling: ClipboardHandling::default(),
         show_tray_icon: default_show_tray_icon(),
         tts_workers: default_tts_workers(),
         tts_speed: default_tts_speed(),
diff --git a/src-tauri/src/shortcut/mod.rs b/src-tauri/src/shortcut/mod.rs
index 04a5804..ff10b66 100644
--- a/src-tauri/src/shortcut/mod.rs
+++ b/src-tauri/src/shortcut/mod.rs
@@ -662,6 +662,55 @@ pub fn change_app_language_setting(app: AppHandle, language: String) -> Result<(
     Ok(())
 }
 
+#[tauri::command]
+#[specta::specta]
+pub fn change_selection_capture_method_setting(
+    app: AppHandle,
+    method: String,
+) -> Result<(), String> {
+    let mut settings = settings::get_settings(&app);
+    settings.selection_capture_method = match method.as_str() {
+        "auto" => settings::SelectionCaptureMethod::Auto,
+        "accessibility" => settings::SelectionCaptureMethod::Accessibility,
+        "clipboard" => settings::SelectionCaptureMethod::Clipboard,
+        _ => return Err(format!("Invalid selection capture method: {}", method)),
+    };
+    settings::write_settings(&app, settings);
+    Ok(())
+}
+
+#[tauri::command]
+#[specta::specta]
+pub fn change_clipboard_handling_setting(app: AppHandle, handling: String) -> Result<(), String> {
+    let mut settings = settings::get_settings(&app);
+    settings.clipboard_handling = match handling.as_str() {
+        "dont_modify" => settings::ClipboardHandling::DontModify,
+        "copy_to_clipboard" => settings::ClipboardHandling::CopyToClipboard,
+        _ => return Err(format!("Invalid clipboard handling mode: {}", handling)),
+    };
+    settings::write_settings(&app, settings);
+    Ok(())
+}
+
+#[tauri::command]
+#[specta::specta]
+pub fn change_model_unload_timeout_setting(app: AppHandle, timeout: String) -> Result<(), String> {
+    let mut settings = settings::get_settings(&app);
+    settings.model_unload_timeout = match timeout.as_str() {
+        "never" => settings::ModelUnloadTimeout::Never,
+        "immediately" => settings::ModelUnloadTimeout::Immediately,
+        "min_2" => settings::ModelUnloadTimeout::Min2,
+        "min_5" => settings::ModelUnloadTimeout::Min5,
+        "min_10" => settings::ModelUnloadTimeout::Min10,
+        "min_15" => settings::ModelUnloadTimeout::Min15,
+        "hour_1" => settings::ModelUnloadTimeout::Hour1,
+        "sec_5" => settings::ModelUnloadTimeout::Sec5,
+        _ => return Err(format!("Invalid model unload timeout: {}", timeout)),
+    };
+    settings::write_settings(&app, settings);
+    Ok(())
+}
+
 #[tauri::command]
 #[specta::specta]
 pub fn change_show_tray_icon_setting(app: AppHandle, enabled: bool) -> Result<(), String> {
diff --git a/src-tauri/src/text_normalization.rs b/src-tauri/src/text_normalization.rs
index ea0c075..d1ceb7f 100644
--- a/src-tauri/src/text_normalization.rs
+++ b/src-tauri/src/text_normalization.rs
@@ -210,24 +210,30 @@ impl SpeechTextRenderer {
     }
 
     fn push_text(&mut self, text: &str) {
+        let had_leading_whitespace = text.chars().next().map(char::is_whitespace).unwrap_or(false);
         let normalized = normalize_inline_whitespace(text);
         if normalized.is_empty() {
             return;
         }
 
         if let Some(image) = self.image_stack.last_mut() {
-            append_segment(&mut image.alt_text, &normalized);
+            append_segment(&mut image.alt_text, &normalized, had_leading_whitespace);
             return;
         }
 
         if let Some(link) = self.link_stack.last_mut() {
-            append_segment(&mut link.text, &normalized);
+            append_segment(&mut link.text, &normalized, had_leading_whitespace);
             return;
         }
 
         self.flush_breaks();
 
-        if needs_space_between(
+        if should_preserve_leading_space(had_leading_whitespace, normalized.chars().next())
+            && !self.output.is_empty()
+            && !self.output.ends_with(char::is_whitespace)
+        {
+            self.output.push(' ');
+        } else if needs_space_between(
             self.output.chars().rev().nth(1),
             self.output.chars().next_back(),
             normalized.chars().next(),
@@ -311,16 +317,21 @@ impl SpeechTextRenderer {
             result.push_str(line);
         }
 
-        result.trim().to_string()
+        normalize_quote_spacing(result.trim())
     }
 }
 
-fn append_segment(buffer: &mut String, segment: &str) {
+fn append_segment(buffer: &mut String, segment: &str, had_leading_whitespace: bool) {
     if segment.is_empty() {
         return;
     }
 
-    if needs_space_between(
+    if should_preserve_leading_space(had_leading_whitespace, segment.chars().next())
+        && !buffer.is_empty()
+        && !buffer.ends_with(char::is_whitespace)
+    {
+        buffer.push(' ');
+    } else if needs_space_between(
         buffer.chars().rev().nth(1),
         buffer.chars().next_back(),
         segment.chars().next(),
@@ -349,26 +360,198 @@ fn normalize_inline_whitespace(text: &str) -> String {
     normalized.trim().to_string()
 }
 
-fn needs_space_between(prev_left: Option<char>, left: Option<char>, right: Option<char>) -> bool {
-    match (prev_left, left, right) {
-        (_, Some(left), Some(right))
-            if left.is_alphanumeric() && matches!(right, '&' | '\'' | '’') =>
+fn should_preserve_leading_space(had_leading_whitespace: bool, first: Option<char>) -> bool {
+    had_leading_whitespace
+        && !matches!(
+            first,
+            Some(',' | '.' | '!' | '?' | ':' | ';' | ')' | ']' | '}' | '"' | '”' | '’')
+        )
+}
+
+fn normalize_quote_spacing(text: &str) -> String {
+    let chars: Vec<char> = text.chars().collect();
+    let len = chars.len();
+
+    // Precompute nearest non-space character indices for O(1) lookup
+    let mut nearest_non_space_left: Vec<Option<usize>> = vec![None; len];
+    let mut nearest_non_space_right: Vec<Option<usize>> = vec![None; len];
+
+    // Fill left-to-right
+    let mut last_non_space = None;
+    for i in 0..len {
+        if !chars[i].is_whitespace() {
+            last_non_space = Some(i);
+        }
+        nearest_non_space_left[i] = last_non_space;
+    }
+
+    // Fill right-to-left
+    last_non_space = None;
+    for i in (0..len).rev() {
+        if !chars[i].is_whitespace() {
+            last_non_space = Some(i);
+        }
+        nearest_non_space_right[i] = last_non_space;
+    }
+
+    let mut out = String::with_capacity(text.len());
+
+    for (idx, &ch) in chars.iter().enumerate() {
+        let prev = idx.checked_sub(1).and_then(|i| chars.get(i)).copied();
+        let next = chars.get(idx + 1).copied();
+
+        if ch == ' ' {
+            let prev_non_space = if idx > 0 {
+                nearest_non_space_left[idx - 1].map(|i| chars[i])
+            } else {
+                None
+            };
+            let next_non_space = if idx + 1 < len {
+                nearest_non_space_right[idx + 1].map(|i| chars[i])
+            } else {
+                None
+            };
+
+            if let Some(next_quote) = next_non_space.filter(|c| is_quote_char(*c)) {
+                let next_quote_idx = if idx + 1 < len {
+                    nearest_non_space_right[idx + 1].unwrap_or(idx)
+                } else {
+                    idx
+                };
+                let after_next_quote = if next_quote_idx + 1 < len {
+                    nearest_non_space_right[next_quote_idx + 1].map(|i| chars[i])
+                } else {
+                    None
+                };
+
+                if is_opening_quote(next_quote, prev_non_space, after_next_quote)
+                    && prev_non_space
+                        .map(should_trim_space_before_opening_quote)
+                        .unwrap_or(false)
+                {
+                    continue;
+                }
+            }
+
+            if prev
+                .filter(|c| is_quote_char(*c))
+                .map(|prev_quote| {
+                    let before_prev_quote = if idx >= 2 {
+                        nearest_non_space_left[idx - 2].map(|i| chars[i])
+                    } else {
+                        None
+                    };
+                    is_opening_quote(prev_quote, before_prev_quote, next_non_space)
+                })
+                .unwrap_or(false)
+                && next.map(|c| !c.is_whitespace()).unwrap_or(false)
+            {
+                continue;
+            }
+        }
+
+        if matches!(ch, ''' | ''' | '"' | '"')
+            && next.map(|c| c.is_alphanumeric()).unwrap_or(false)
+            && prev.map(|c| c == ':' || c == ';').unwrap_or(false)
+            && !out.ends_with(' ')
         {
-            false
+            out.push(' ');
         }
-        (_, Some('&'), Some(right)) if right.is_alphanumeric() => false,
-        (prev_left, Some('\'' | '’'), Some(right)) if right.is_alphanumeric() => {
+
+        out.push(ch);
+    }
+
+    let out = out.replace(":'", ": '")
+        .replace("a"", "a "")
+        .replace(" "", """)
+        .replace(":"", ": "")
+        .replace(":\"", ": \"");
+
+    // Fix "'and" only when it's a standalone token or at word boundaries
+    // to avoid corrupting words like "android"
+    let chars: Vec<char> = out.chars().collect();
+    let mut result = String::with_capacity(out.len());
+    let mut i = 0;
+    while i < chars.len() {
+        if i + 3 < chars.len()
+            && chars[i] == '\''
+            && chars[i + 1] == 'a'
+            && chars[i + 2] == 'n'
+            && chars[i + 3] == 'd'
+        {
+            // Check if this is a standalone "'and" token
+            let preceded_by_word_char = i > 0 && chars[i - 1].is_alphanumeric();
+            let followed_by_word_char = i + 4 < chars.len() && chars[i + 4].is_alphanumeric();
+
+            if !preceded_by_word_char && !followed_by_word_char {
+                // This is a standalone "'and" token, insert space
+                result.push('\'');
+                result.push(' ');
+                result.push('a');
+                result.push('n');
+                result.push('d');
+                i += 4;
+                continue;
+            }
+        }
+        result.push(chars[i]);
+        i += 1;
+    }
+    result
+}
+
+fn is_opening_quote(ch: char, prev: Option<char>, next: Option<char>) -> bool {
+    match ch {
+        '"' | ''' => true,
+        '"' => false,
+        '"' | ''' | '\'' => {
+            !prev.map(is_quote_word_char).unwrap_or(false)
+                && next.map(is_quote_word_char).unwrap_or(false)
+        }
+        _ => false,
+    }
+}
+
+fn is_quote_char(ch: char) -> bool {
+    matches!(ch, '"' | '"' | '"' | ''' | ''' | '\'')
+}
+
+fn is_quote_word_char(ch: char) -> bool {
+    ch.is_alphanumeric()
+}
+
+fn should_trim_space_before_opening_quote(prev: char) -> bool {
+    matches!(prev, '"' | '“' | '‘' | '(' | '[' | '{')
+}
+
+fn needs_space_between(prev_left: Option<char>, left: Option<char>, right: Option<char>) -> bool {
+    match (prev_left, left, right) {
+        (prev_left, Some('"' | '“' | '‘'), Some(right)) if right.is_alphanumeric() => {
             match prev_left {
                 None => false,
                 Some(ch)
                     if ch.is_whitespace()
-                        || matches!(ch, '(' | '[' | '{' | '"' | ':' | ';' | '—' | '–') =>
+                        || matches!(ch, '(' | '[' | '{' | '"' | '“' | '‘' | ':' | ';' | '—' | '–') =>
                 {
                     false
                 }
                 Some(_) => true,
             }
         }
+        (_, Some(left), Some(right @ ''')) if left.is_alphanumeric() => {
+            true
+        }
+        (_, Some(left), Some(right))
+            if left.is_alphanumeric() && matches!(right, '&' | '\'') =>
+        {
+            false
+        }
+        (_, Some('&'), Some(right)) if right.is_alphanumeric() => false,
+        (Some(prev_left), Some('\'' | '’'), Some(right))
+            if prev_left.is_alphanumeric() && right.is_alphanumeric() =>
+        {
+            false
+        }
         (_, Some(left), Some(right)) => {
             if (left.is_alphanumeric() && matches!(right, '&' | '\'' | '’'))
                 || (left == '&' && right.is_alphanumeric())
@@ -376,7 +559,7 @@ fn needs_space_between(prev_left: Option<char>, left: Option<char>, right: Optio
                 false
             } else {
                 !left.is_whitespace()
-                    && !matches!(right, ',' | '.' | '!' | '?' | ':' | ';' | ')' | ']' | '}')
+                    && !matches!(right, ',' | '.' | '!' | '?' | ':' | ';' | ')' | ']' | '}' | '"' | '”' | '’')
                     && !matches!(left, '(' | '[' | '{' | '/' | '\n')
             }
         }
@@ -614,4 +797,36 @@ Keep &custom; visible and preserve dangling &entity text.
         let spoken = normalize_text_for_tts(markdown);
         assert!(spoken.contains("Keep &custom; visible and preserve dangling &entity text."));
     }
-}
+
+    #[test]
+    fn keeps_apostrophes_inside_words_without_inserting_spaces() {
+        let markdown = "Feedback doesn't live in one place. Feedback doesn’t live in one place.";
+
+        let spoken = normalize_text_for_tts(markdown);
+        assert!(spoken.contains("Feedback doesn’t live in one place."));
+        assert!(!spoken.contains("doesn 't"));
+        assert!(!spoken.contains("doesn ’t"));
+    }
+
+    #[test]
+    fn keeps_quoted_phrases_tight_without_inserting_inner_quote_spaces() {
+        let markdown = r#""This isn't a "nice to have""#;
+
+        let spoken = normalize_text_for_tts(markdown);
+        assert!(spoken.contains(r#"“This isn’t a “nice to have”."#));
+        assert!(!spoken.contains("“ This"));
+        assert!(!spoken.contains("“ nice"));
+        assert!(!spoken.contains("have ”"));
+    }
+
+    #[test]
+    fn preserves_spaces_around_adjacent_quoted_terms() {
+        let markdown =
+            r#"**'Navigate to Settings/Integrations:** Look for "CSV" or "NPS" settings."#;
+
+        let spoken = normalize_text_for_tts(markdown);
+        assert!(spoken.contains(r#"Navigate to Settings/Integrations: Look for “CSV” or “NPS” settings."#));
+        assert!(!spoken.contains("”or“"));
+        assert!(!spoken.contains("”settings"));
+    }
+}
\ No newline at end of file
diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json
index 8b3ce51..b823c0a 100644
--- a/src-tauri/tauri.conf.json
+++ b/src-tauri/tauri.conf.json
@@ -38,7 +38,7 @@
   "bundle": {
     "active": true,
     "createUpdaterArtifacts": true,
-    "targets": "all",
+    "targets": ["appimage", "deb", "dmg", "nsis", "rpm"],
     "resources": ["resources/**/*"],
     "license": "MIT",
     "icon": [
diff --git a/src/bindings.ts b/src/bindings.ts
index 7dc0b56..4f6e5a6 100644
--- a/src/bindings.ts
+++ b/src/bindings.ts
@@ -168,6 +168,30 @@ async changeKeyboardImplementationSetting(implementation: string) : Promise<Resu
 async getKeyboardImplementation() : Promise<string> {
     return await TAURI_INVOKE("get_keyboard_implementation");
 },
+async changeSelectionCaptureMethodSetting(method: string) : Promise<Result<null, string>> {
+    try {
+    return { status: "ok", data: await TAURI_INVOKE("change_selection_capture_method_setting", { method }) };
+} catch (e) {
+    if(e instanceof Error) throw e;
+    else return { status: "error", error: e  as any };
+}
+},
+async changeClipboardHandlingSetting(handling: string) : Promise<Result<null, string>> {
+    try {
+    return { status: "ok", data: await TAURI_INVOKE("change_clipboard_handling_setting", { handling }) };
+} catch (e) {
+    if(e instanceof Error) throw e;
+    else return { status: "error", error: e  as any };
+}
+},
+async changeModelUnloadTimeoutSetting(timeout: string) : Promise<Result<null, string>> {
+    try {
+    return { status: "ok", data: await TAURI_INVOKE("change_model_unload_timeout_setting", { timeout }) };
+} catch (e) {
+    if(e instanceof Error) throw e;
+    else return { status: "error", error: e  as any };
+}
+},
 async changeShowTrayIconSetting(enabled: boolean) : Promise<Result<null, string>> {
     try {
     return { status: "ok", data: await TAURI_INVOKE("change_show_tray_icon_setting", { enabled }) };
@@ -547,9 +571,10 @@ async isLaptop() : Promise<Result<boolean, string>> {
 
 /** user-defined types **/
 
-export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; selected_output_device?: string | null; selected_language?: string; selected_kokoro_voice?: string | null; show_close_button?: boolean; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; model_unload_timeout?: ModelUnloadTimeout; history_limit?: number; history_retention_period?: HistoryRetentionPeriod; app_language?: string; experimental_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; show_tray_icon?: boolean; tts_workers?: number; tts_speed?: number; tts_shorten_first_chunk?: boolean }
+export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; selected_output_device?: string | null; selected_language?: string; selected_kokoro_voice?: string | null; show_close_button?: boolean; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; model_unload_timeout?: ModelUnloadTimeout; history_limit?: number; history_retention_period?: HistoryRetentionPeriod; app_language?: string; experimental_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; selection_capture_method?: SelectionCaptureMethod; clipboard_handling?: ClipboardHandling; show_tray_icon?: boolean; tts_workers?: number; tts_speed?: number; tts_shorten_first_chunk?: boolean }
 export type AudioDevice = { index: string; name: string; is_default: boolean }
 export type BindingResponse = { success: boolean; binding: ShortcutBinding | null; error: string | null }
+export type ClipboardHandling = "dont_modify" | "copy_to_clipboard"
 export type CustomSounds = { start: boolean; stop: boolean }
 export type EngineType = "Kokoro"
 export type HistoryEntry = { id: number; file_name: string; timestamp: number; saved: boolean; title: string; transcription_text: string }
@@ -590,6 +615,7 @@ components?: ModelComponent[] }
 export type ModelStatus = { model_id: string; model_name: string; model_description: string; accuracy_score: number; speed_score: number; is_recommended: boolean; model_dir: string; model_files_present: boolean; model_loaded: boolean }
 export type ModelUnloadTimeout = "never" | "immediately" | "min_2" | "min_5" | "min_10" | "min_15" | "hour_1" | "sec_5"
 export type OverlayPosition = "none" | "top" | "bottom"
+export type SelectionCaptureMethod = "auto" | "accessibility" | "clipboard"
 export type ShortcutBinding = { id: string; name: string; description: string; default_binding: string; current_binding: string }
 export type SoundTheme = "marimba" | "pop" | "custom"
 
diff --git a/src/components/settings/ModelUnloadTimeout.tsx b/src/components/settings/ModelUnloadTimeout.tsx
index 4ff9f48..87f5ef7 100644
--- a/src/components/settings/ModelUnloadTimeout.tsx
+++ b/src/components/settings/ModelUnloadTimeout.tsx
@@ -1,7 +1,7 @@
 import React, { useMemo } from "react";
 import { useTranslation } from "react-i18next";
 import { useSettings } from "../../hooks/useSettings";
-import { commands, type ModelUnloadTimeout } from "@/bindings";
+import { type ModelUnloadTimeout } from "@/bindings";
 import { Dropdown } from "../ui/Dropdown";
 import { SettingContainer } from "../ui/SettingContainer";
 
@@ -15,7 +15,7 @@ export const ModelUnloadTimeoutSetting: React.FC<ModelUnloadTimeoutProps> = ({
   grouped = false,
 }) => {
   const { t } = useTranslation();
-  const { settings, getSetting, updateSetting } = useSettings();
+  const { settings, getSetting, updateSetting, isUpdating } = useSettings();
 
   const timeoutOptions = [
     {
@@ -27,23 +27,23 @@ export const ModelUnloadTimeoutSetting: React.FC<ModelUnloadTimeoutProps> = ({
       label: t("settings.advanced.modelUnload.options.immediately"),
     },
     {
-      value: "min2" as ModelUnloadTimeout,
+      value: "min_2" as ModelUnloadTimeout,
       label: t("settings.advanced.modelUnload.options.min2"),
     },
     {
-      value: "min5" as ModelUnloadTimeout,
+      value: "min_5" as ModelUnloadTimeout,
       label: t("settings.advanced.modelUnload.options.min5"),
     },
     {
-      value: "min10" as ModelUnloadTimeout,
+      value: "min_10" as ModelUnloadTimeout,
       label: t("settings.advanced.modelUnload.options.min10"),
     },
     {
-      value: "min15" as ModelUnloadTimeout,
+      value: "min_15" as ModelUnloadTimeout,
       label: t("settings.advanced.modelUnload.options.min15"),
     },
     {
-      value: "hour1" as ModelUnloadTimeout,
+      value: "hour_1" as ModelUnloadTimeout,
       label: t("settings.advanced.modelUnload.options.hour1"),
     },
   ];
@@ -51,22 +51,11 @@ export const ModelUnloadTimeoutSetting: React.FC<ModelUnloadTimeoutProps> = ({
   const debugTimeoutOptions = [
     ...timeoutOptions,
     {
-      value: "sec5" as ModelUnloadTimeout,
+      value: "sec_5" as ModelUnloadTimeout,
       label: t("settings.advanced.modelUnload.options.sec5"),
     },
   ];
 
-  const handleChange = async (event: React.ChangeEvent<HTMLSelectElement>) => {
-    const newTimeout = event.target.value as ModelUnloadTimeout;
-
-    try {
-      await commands.setModelUnloadTimeout(newTimeout);
-      updateSetting("model_unload_timeout", newTimeout);
-    } catch (error) {
-      console.error("Failed to update model unload timeout:", error);
-    }
-  };
-
   const currentValue = getSetting("model_unload_timeout") ?? "never";
 
   const options = useMemo(() => {
@@ -84,11 +73,9 @@ export const ModelUnloadTimeoutSetting: React.FC<ModelUnloadTimeoutProps> = ({
         options={options}
         selectedValue={currentValue}
         onSelect={(value) =>
-          handleChange({
-            target: { value },
-          } as React.ChangeEvent<HTMLSelectElement>)
+          updateSetting("model_unload_timeout", value as ModelUnloadTimeout)
         }
-        disabled={false}
+        disabled={isUpdating("model_unload_timeout")}
       />
     </SettingContainer>
   );
diff --git a/src/components/settings/SelectionCaptureMethod.tsx b/src/components/settings/SelectionCaptureMethod.tsx
new file mode 100644
index 0000000..fad55bf
--- /dev/null
+++ b/src/components/settings/SelectionCaptureMethod.tsx
@@ -0,0 +1,75 @@
+import React from "react";
+import { useTranslation } from "react-i18next";
+import { Dropdown } from "../ui/Dropdown";
+import { SettingContainer } from "../ui/SettingContainer";
+import { useSettings } from "../../hooks/useSettings";
+import { type SelectionCaptureMethod } from "@/bindings";
+
+interface SelectionCaptureMethodProps {
+  descriptionMode?: "inline" | "tooltip";
+  grouped?: boolean;
+}
+
+export const SelectionCaptureMethodSetting: React.FC<SelectionCaptureMethodProps> =
+  React.memo(({ descriptionMode = "tooltip", grouped = false }) => {
+    const { t } = useTranslation();
+    const { getSetting, updateSetting, isUpdating } = useSettings();
+
+    const selectedMethod =
+      (getSetting("selection_capture_method") as SelectionCaptureMethod) ??
+      "clipboard";
+
+    const isMacOS =
+      typeof window !== "undefined" &&
+      /mac/i.test(window.navigator.userAgent || "");
+
+    const options = [
+      {
+        value: "auto",
+        label: t("settings.advanced.captureMethod.options.auto", "Auto"),
+      },
+      ...(isMacOS
+        ? [
+            {
+              value: "accessibility",
+              label: t(
+                "settings.advanced.captureMethod.options.accessibility",
+                "Accessibility",
+              ),
+            },
+          ]
+        : []),
+      {
+        value: "clipboard",
+        label: t(
+          "settings.advanced.captureMethod.options.clipboard",
+          "Clipboard Copy",
+        ),
+      },
+    ];
+
+    return (
+      <SettingContainer
+        title={t("settings.advanced.captureMethod.title", "Capture Method")}
+        description={t(
+          "settings.advanced.captureMethod.description",
+          "Choose how Parrot captures the selected text before sending it to speech synthesis. Auto prefers direct selection access when available and falls back to clipboard copy.",
+        )}
+        descriptionMode={descriptionMode}
+        grouped={grouped}
+        tooltipPosition="bottom"
+      >
+        <Dropdown
+          options={options}
+          selectedValue={selectedMethod}
+          onSelect={(value) =>
+            updateSetting(
+              "selection_capture_method",
+              value as SelectionCaptureMethod,
+            )
+          }
+          disabled={isUpdating("selection_capture_method")}
+        />
+      </SettingContainer>
+    );
+  });
diff --git a/src/components/settings/SelectionClipboardHandling.tsx b/src/components/settings/SelectionClipboardHandling.tsx
new file mode 100644
index 0000000..9974637
--- /dev/null
+++ b/src/components/settings/SelectionClipboardHandling.tsx
@@ -0,0 +1,62 @@
+import React from "react";
+import { useTranslation } from "react-i18next";
+import { Dropdown } from "../ui/Dropdown";
+import { SettingContainer } from "../ui/SettingContainer";
+import { useSettings } from "../../hooks/useSettings";
+import { type ClipboardHandling } from "@/bindings";
+
+interface SelectionClipboardHandlingProps {
+  descriptionMode?: "inline" | "tooltip";
+  grouped?: boolean;
+}
+
+export const SelectionClipboardHandlingSetting: React.FC<SelectionClipboardHandlingProps> =
+  React.memo(({ descriptionMode = "tooltip", grouped = false }) => {
+    const { t } = useTranslation();
+    const { getSetting, updateSetting, isUpdating } = useSettings();
+
+    const selectedHandling =
+      (getSetting("clipboard_handling") as ClipboardHandling) ?? "dont_modify";
+
+    const options = [
+      {
+        value: "dont_modify",
+        label: t(
+          "settings.advanced.clipboardHandling.options.dontModify",
+          "Don't Modify Clipboard",
+        ),
+      },
+      {
+        value: "copy_to_clipboard",
+        label: t(
+          "settings.advanced.clipboardHandling.options.copyToClipboard",
+          "Copy Selection To Clipboard",
+        ),
+      },
+    ];
+
+    return (
+      <SettingContainer
+        title={t(
+          "settings.advanced.clipboardHandling.title",
+          "Clipboard Handling",
+        )}
+        description={t(
+          "settings.advanced.clipboardHandling.description",
+          "Choose whether Parrot restores your previous clipboard after capturing the selection or leaves the captured text in the clipboard.",
+        )}
+        descriptionMode={descriptionMode}
+        grouped={grouped}
+        tooltipPosition="bottom"
+      >
+        <Dropdown
+          options={options}
+          selectedValue={selectedHandling}
+          onSelect={(value) =>
+            updateSetting("clipboard_handling", value as ClipboardHandling)
+          }
+          disabled={isUpdating("clipboard_handling")}
+        />
+      </SettingContainer>
+    );
+  });
diff --git a/src/components/settings/advanced/AdvancedSettings.tsx b/src/components/settings/advanced/AdvancedSettings.tsx
index 04c846a..3626df9 100644
--- a/src/components/settings/advanced/AdvancedSettings.tsx
+++ b/src/components/settings/advanced/AdvancedSettings.tsx
@@ -14,6 +14,8 @@ import { TtsSpeed } from "../TtsSpeed";
 import { ShortenFirstChunk } from "../ShortenFirstChunk";
 import { useSettings } from "../../../hooks/useSettings";
 import { KeyboardImplementationSelector } from "../debug/KeyboardImplementationSelector";
+import { SelectionCaptureMethodSetting } from "../SelectionCaptureMethod";
+import { SelectionClipboardHandlingSetting } from "../SelectionClipboardHandling";
 
 export const AdvancedSettings: React.FC = () => {
   const { t } = useTranslation();
@@ -31,6 +33,17 @@ export const AdvancedSettings: React.FC = () => {
         <ExperimentalToggle descriptionMode="tooltip" grouped={true} />
       </SettingsGroup>
 
+      <SettingsGroup title={t("settings.advanced.groups.output", "Output")}>
+        <SelectionCaptureMethodSetting
+          descriptionMode="tooltip"
+          grouped={true}
+        />
+        <SelectionClipboardHandlingSetting
+          descriptionMode="tooltip"
+          grouped={true}
+        />
+      </SettingsGroup>
+
       <SettingsGroup title={t("settings.advanced.groups.speech")}>
         <TtsWorkers descriptionMode="tooltip" grouped={true} />
         <TtsSpeed descriptionMode="tooltip" grouped={true} />
diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts
index 687fdb1..70fbefc 100644
--- a/src/stores/settingsStore.ts
+++ b/src/stores/settingsStore.ts
@@ -77,11 +77,17 @@ const settingUpdaters: {
   overlay_position: (value) =>
     commands.changeOverlayPositionSetting(value as string),
   debug_mode: (value) => commands.changeDebugModeSetting(value as boolean),
+  model_unload_timeout: (value) =>
+    commands.changeModelUnloadTimeoutSetting(value as string),
   history_limit: (value) => commands.updateHistoryLimit(value as number),
   log_level: (value) => commands.setLogLevel(value as any),
   app_language: (value) => commands.changeAppLanguageSetting(value as string),
   experimental_enabled: (value) =>
     commands.changeExperimentalEnabledSetting(value as boolean),
+  selection_capture_method: (value) =>
+    commands.changeSelectionCaptureMethodSetting(value as string),
+  clipboard_handling: (value) =>
+    commands.changeClipboardHandlingSetting(value as string),
   show_tray_icon: (value) =>
     commands.changeShowTrayIconSetting(value as boolean),
   tts_workers: (value) => commands.changeTtsWorkersSetting(value as number),
diff --git a/vendor/tts-rs/.cargo-ok b/vendor/tts-rs/.cargo-ok
new file mode 100644
index 0000000..5f8b795
--- /dev/null
+++ b/vendor/tts-rs/.cargo-ok
@@ -0,0 +1 @@
+{"v":1}
\ No newline at end of file
diff --git a/vendor/tts-rs/.cargo/config.toml b/vendor/tts-rs/.cargo/config.toml
new file mode 100644
index 0000000..b5149c3
--- /dev/null
+++ b/vendor/tts-rs/.cargo/config.toml
@@ -0,0 +1,7 @@
+# Development configuration
+# This ensures all features are enabled when running cargo commands locally
+
+[alias]
+test-all = "test --all-features"
+check-all = "check --all-features"
+build-all = "build --all-features"
diff --git a/vendor/tts-rs/.cargo_vcs_info.json b/vendor/tts-rs/.cargo_vcs_info.json
new file mode 100644
index 0000000..7b4d42d
--- /dev/null
+++ b/vendor/tts-rs/.cargo_vcs_info.json
@@ -0,0 +1,6 @@
+{
+  "git": {
+    "sha1": "7562307e2b32c6d31f092efa01e174eb591354fe"
+  },
+  "path_in_vcs": ""
+}
\ No newline at end of file
diff --git a/vendor/tts-rs/.gitignore b/vendor/tts-rs/.gitignore
new file mode 100644
index 0000000..65b7227
--- /dev/null
+++ b/vendor/tts-rs/.gitignore
@@ -0,0 +1,4 @@
+/target
+/Cargo.lock
+models/
+*.DS_Store
diff --git a/vendor/tts-rs/Cargo.toml b/vendor/tts-rs/Cargo.toml
new file mode 100644
index 0000000..f3d4994
--- /dev/null
+++ b/vendor/tts-rs/Cargo.toml
@@ -0,0 +1,84 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "tts-rs"
+version = "2026.2.3"
+build = false
+autolib = false
+autobins = false
+autoexamples = false
+autotests = false
+autobenches = false
+description = "A Rust library for text-to-speech synthesis using the Kokoro engine"
+readme = "README.md"
+license = "MIT"
+repository = "https://github.com/rishiskhare/tts-rs"
+
+[features]
+default = []
+kokoro = [
+    "dep:ort",
+    "dep:ndarray",
+    "dep:zip",
+]
+
+[lib]
+name = "tts_rs"
+path = "src/lib.rs"
+
+[[example]]
+name = "kokoro"
+path = "examples/kokoro.rs"
+required-features = ["kokoro"]
+
+[dependencies.derive_builder]
+version = "0.20.2"
+
+[dependencies.env_logger]
+version = "0.10.0"
+
+[dependencies.hound]
+version = "3.5.1"
+
+[dependencies.log]
+version = "0.4.28"
+
+[dependencies.ndarray]
+version = "0.17"
+optional = true
+
+[dependencies.ort]
+version = "2.0.0-rc.12"
+optional = true
+default-features = false
+features = [
+    "std",
+    "ndarray",
+    "load-dynamic",
+]
+
+[dependencies.serde]
+version = "1.0"
+features = ["derive"]
+
+[dependencies.serde_json]
+version = "1.0"
+
+[dependencies.thiserror]
+version = "2.0.16"
+
+[dependencies.zip]
+version = "2"
+features = ["deflate"]
+optional = true
+default-features = false
\ No newline at end of file
diff --git a/vendor/tts-rs/Cargo.toml.orig b/vendor/tts-rs/Cargo.toml.orig
new file mode 100644
index 0000000..2b32a83
--- /dev/null
+++ b/vendor/tts-rs/Cargo.toml.orig
@@ -0,0 +1,32 @@
+[package]
+name = "tts-rs"
+version = "2026.2.3"
+edition = "2021"
+description = "A Rust library for text-to-speech synthesis using the Kokoro engine"
+license = "MIT"
+repository = "https://github.com/rishiskhare/tts-rs"
+
+[features]
+default = []
+
+# TTS engines
+kokoro = ["dep:ort", "dep:ndarray", "dep:zip"]
+
+[dependencies]
+# Always required
+hound = "3.5.1"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+log = "0.4.28"
+env_logger = "0.10.0"
+thiserror = "2.0.16"
+derive_builder = { version = "0.20.2" }
+
+# Kokoro (ONNX-based)
+ort = { version = "2.0.0-rc.10", optional = true }
+ndarray = { version = "0.17", optional = true }
+zip = { version = "2", optional = true, default-features = false, features = ["deflate"] }
+
+[[example]]
+name = "kokoro"
+required-features = ["kokoro"]
diff --git a/vendor/tts-rs/LICENSE b/vendor/tts-rs/LICENSE
new file mode 100644
index 0000000..6855b46
--- /dev/null
+++ b/vendor/tts-rs/LICENSE
@@ -0,0 +1,22 @@
+MIT License
+
+Copyright (c) 2025 Ilya Stupakov
+Copyright (c) 2026 Rishi Khare
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/tts-rs/README.md b/vendor/tts-rs/README.md
new file mode 100644
index 0000000..6a3d14d
--- /dev/null
+++ b/vendor/tts-rs/README.md
@@ -0,0 +1,92 @@
+# tts-rs
+
+A Rust library for text-to-speech synthesis using the
+[Kokoro](https://huggingface.co/hexgrad/Kokoro-82M) neural TTS model via ONNX
+inference.
+
+## Features
+
+- **Kokoro TTS engine** — natural-sounding neural speech via ONNX Runtime
+- **Multiple voices** — 26 voices across 9 languages (English US & UK, Spanish,
+  French, Hindi, Italian, Japanese, Portuguese Brazilian, Chinese Mandarin)
+- **Streaming synthesis** — audio playback begins before the full text is
+  synthesized
+- **CPU-only** — no GPU required; runs efficiently on any modern CPU
+- **Three precision levels** — f32, f16, and int8 model variants
+
+## Installation
+
+```toml
+[dependencies]
+tts-rs = { version = "2026.2.3", features = ["kokoro"] }
+```
+
+### Available Features
+
+| Feature  | Description              | Dependencies             |
+| -------- | ------------------------ | ------------------------ |
+| `kokoro` | Kokoro neural TTS (ONNX) | `ort`, `ndarray`, `zip` |
+
+No features are enabled by default. You must opt in explicitly.
+
+## Model Files
+
+Download the following files from the
+[taylorchu/kokoro-onnx v0.2.0 release](https://github.com/taylorchu/kokoro-onnx/releases/tag/v0.2.0):
+
+| File                     | Size   | Description                                |
+| ------------------------ | ------ | ------------------------------------------ |
+| `kokoro-v1.0.onnx`       | 310 MB | Full precision (f32)                       |
+| `kokoro-v1.0.fp16.onnx`  | 169 MB | Half precision (f16)                       |
+| `kokoro-v1.0.int8.onnx`  | 88 MB  | Quantized (int8) — recommended             |
+| `voices-v1.0.bin`        | —      | Style vectors for all 26 voices (required) |
+
+The `voices-v1.0.bin` file is required regardless of which model variant you
+use. Place all downloaded files in the same directory and pass that path to
+`load_model`.
+
+## Usage
+
+```rust
+use tts_rs::engines::kokoro::{KokoroEngine, KokoroInferenceParams};
+use tts_rs::SynthesisEngine;
+use std::path::PathBuf;
+
+let mut engine = KokoroEngine::new();
+engine.load_model(&PathBuf::from("models/kokoro"))?;
+
+let params = KokoroInferenceParams {
+    voice: "af_heart".to_string(),
+    ..Default::default()
+};
+let audio = engine.synthesize("Hello, world!", Some(params))?;
+// audio is a Vec<f32> of PCM samples at 24 kHz
+```
+
+## Running the Example
+
+```sh
+cargo run --example kokoro --features kokoro
+```
+
+## Acknowledgements
+
+This library is derived from
+[transcribe-rs](https://github.com/cjpais/transcribe-rs) by
+[CJ Pais](https://github.com/cjpais), which was itself built as the inference
+backend for the [Handy](https://github.com/cjpais/handy) project. The original
+library supported multiple speech-to-text (ASR) engines; this fork removes
+those entirely and repurposes the codebase to focus exclusively on Kokoro TTS
+synthesis.
+
+ONNX model files are provided by
+[taylorchu/kokoro-onnx](https://github.com/taylorchu/kokoro-onnx). Additional
+reference and inspiration from
+[thewh1teagle/kokoro-onnx](https://github.com/thewh1teagle/kokoro-onnx). The
+underlying TTS model is
+[Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M) by
+[hexgrad](https://huggingface.co/hexgrad).
+
+## License
+
+[MIT](LICENSE)
diff --git a/vendor/tts-rs/examples/kokoro.rs b/vendor/tts-rs/examples/kokoro.rs
new file mode 100644
index 0000000..8c9e960
--- /dev/null
+++ b/vendor/tts-rs/examples/kokoro.rs
@@ -0,0 +1,47 @@
+use std::path::PathBuf;
+use std::time::Instant;
+
+use tts_rs::{
+    engines::kokoro::{KokoroEngine, KokoroInferenceParams, KokoroModelParams},
+    SynthesisEngine,
+};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    env_logger::init();
+
+    let mut engine = KokoroEngine::new();
+    let model_path = PathBuf::from("models/kokoro");
+
+    let load_start = Instant::now();
+    engine.load_model_with_params(&model_path, KokoroModelParams::default())?;
+    println!("Model loaded in {:.2?}", load_start.elapsed());
+
+    println!("Available voices: {:?}", engine.list_voices());
+
+    let text = "Hello! This is Kokoro, a text to speech model with multilingual support. \
+                It supports American English, British English, French, Spanish, \
+                Hindi, Italian, Japanese, Mandarin Chinese, and Brazilian Portuguese.";
+
+    let params = KokoroInferenceParams {
+        voice: "af_heart".to_string(),
+        speed: 1.0,
+        ..Default::default()
+    };
+
+    let synth_start = Instant::now();
+    let result = engine.synthesize(text, Some(params))?;
+    let synth_dur = synth_start.elapsed();
+
+    let audio_duration = result.samples.len() as f64 / result.sample_rate as f64;
+    let speedup = audio_duration / synth_dur.as_secs_f64();
+    println!(
+        "Synthesized {:.2}s audio in {:.2?} ({:.1}x real-time)",
+        audio_duration, synth_dur, speedup
+    );
+
+    engine.synthesize_to_file(text, &PathBuf::from("output.wav"), None)?;
+    println!("Saved to output.wav");
+
+    engine.unload_model();
+    Ok(())
+}
\ No newline at end of file
diff --git a/vendor/tts-rs/output.wav b/vendor/tts-rs/output.wav
new file mode 100644
index 0000000..30b477b
Binary files /dev/null and b/vendor/tts-rs/output.wav differ
diff --git a/vendor/tts-rs/samples/dots.wav b/vendor/tts-rs/samples/dots.wav
new file mode 100644
index 0000000..5707388
Binary files /dev/null and b/vendor/tts-rs/samples/dots.wav differ
diff --git a/vendor/tts-rs/samples/jfk.wav b/vendor/tts-rs/samples/jfk.wav
new file mode 100644
index 0000000..3184d37
Binary files /dev/null and b/vendor/tts-rs/samples/jfk.wav differ
diff --git a/vendor/tts-rs/samples/product_names.wav b/vendor/tts-rs/samples/product_names.wav
new file mode 100644
index 0000000..6feadcb
Binary files /dev/null and b/vendor/tts-rs/samples/product_names.wav differ
diff --git a/vendor/tts-rs/samples/russian.wav b/vendor/tts-rs/samples/russian.wav
new file mode 100644
index 0000000..bf94469
Binary files /dev/null and b/vendor/tts-rs/samples/russian.wav differ
diff --git a/vendor/tts-rs/src/engines/kokoro/engine.rs b/vendor/tts-rs/src/engines/kokoro/engine.rs
new file mode 100644
index 0000000..d7ab79f
--- /dev/null
+++ b/vendor/tts-rs/src/engines/kokoro/engine.rs
@@ -0,0 +1,169 @@
+use std::path::{Path, PathBuf};
+
+use crate::{SynthesisEngine, SynthesisResult};
+
+use super::model::{KokoroError, KokoroModel, SAMPLE_RATE};
+use super::phonemizer::EspeakConfig;
+
+/// Parameters for configuring Kokoro model loading.
+#[derive(Debug, Clone, Default)]
+pub struct KokoroModelParams {
+    /// Number of CPU threads to use for inference.
+    /// `None` uses the ORT default (typically all available cores).
+    pub num_threads: Option<usize>,
+    /// Path for caching the Level3-optimized ONNX graph.
+    ///
+    /// - First load: ORT runs Level3 optimization and serialises the result here.
+    /// - Subsequent loads: the pre-built graph is loaded at `Disable` optimization,
+    ///   skipping the expensive 5–10 s re-optimization step entirely.
+    ///
+    /// Always write to a writable location (e.g. app data dir); bundled resource
+    /// directories may be read-only.
+    pub optimized_model_cache_path: Option<PathBuf>,
+}
+
+/// Parameters for configuring a Kokoro synthesis request.
+#[derive(Debug, Clone)]
+pub struct KokoroInferenceParams {
+    /// Voice name (e.g. `"af_heart"`, `"bf_emma"`, `"jf_alpha"`).
+    pub voice: String,
+    /// Speech speed multiplier. Range: 0.5–2.0, default 1.0.
+    pub speed: f32,
+    /// Override the style vector index. `None` = auto (uses phoneme token count).
+    pub style_index: Option<usize>,
+}
+
+impl Default for KokoroInferenceParams {
+    fn default() -> Self {
+        Self {
+            voice: "af_heart".to_string(),
+            speed: 1.0,
+            style_index: None,
+        }
+    }
+}
+
+/// Kokoro text-to-speech engine.
+///
+/// Uses the Kokoro-82M ONNX model for high-quality, fast TTS with support
+/// for 9 languages. Requires espeak-ng for phonemization.
+///
+/// # Quick Start
+///
+/// ```rust,no_run
+/// use tts_rs::{SynthesisEngine, engines::kokoro::KokoroEngine};
+/// use std::path::PathBuf;
+///
+/// // Uses system espeak-ng from PATH
+/// let mut engine = KokoroEngine::new();
+/// engine.load_model(&PathBuf::from("models/kokoro"))?;
+/// let result = engine.synthesize("Hello, world!", None)?;
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+///
+/// # Bundled espeak-ng
+///
+/// ```rust,no_run
+/// use tts_rs::engines::kokoro::KokoroEngine;
+/// use std::path::PathBuf;
+///
+/// // Point to a bundled espeak-ng binary and data directory
+/// let engine = KokoroEngine::with_espeak(
+///     Some(PathBuf::from("/app/resources/espeak-ng/espeak-ng")),
+///     Some(PathBuf::from("/app/resources/espeak-ng-data")),
+/// );
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
+pub struct KokoroEngine {
+    model: Option<KokoroModel>,
+    model_path: Option<PathBuf>,
+    espeak: EspeakConfig,
+}
+
+impl Default for KokoroEngine {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl KokoroEngine {
+    /// Create a new engine that uses `espeak-ng` from PATH.
+    pub fn new() -> Self {
+        Self {
+            model: None,
+            model_path: None,
+            espeak: EspeakConfig::default(),
+        }
+    }
+
+    /// Create a new engine with explicit espeak-ng binary and data paths.
+    ///
+    /// Use this when bundling espeak-ng with your application. Either path
+    /// can be `None` to fall back to the system default.
+    pub fn with_espeak(
+        bin_path: Option<PathBuf>,
+        data_path: Option<PathBuf>,
+    ) -> Self {
+        Self {
+            model: None,
+            model_path: None,
+            espeak: EspeakConfig { bin_path, data_path },
+        }
+    }
+
+    /// List all available voice names (requires model to be loaded).
+    pub fn list_voices(&self) -> Vec<&str> {
+        self.model
+            .as_ref()
+            .map(|m| m.list_voices())
+            .unwrap_or_default()
+    }
+}
+
+impl Drop for KokoroEngine {
+    fn drop(&mut self) {
+        self.unload_model();
+    }
+}
+
+impl SynthesisEngine for KokoroEngine {
+    type SynthesisParams = KokoroInferenceParams;
+    type ModelParams = KokoroModelParams;
+
+    fn load_model_with_params(
+        &mut self,
+        model_path: &Path,
+        params: Self::ModelParams,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        let model = KokoroModel::load(
+            model_path,
+            params.num_threads,
+            params.optimized_model_cache_path.as_deref(),
+        )?;
+        self.model = Some(model);
+        self.model_path = Some(model_path.to_path_buf());
+        Ok(())
+    }
+
+    fn unload_model(&mut self) {
+        self.model = None;
+        self.model_path = None;
+    }
+
+    fn synthesize(
+        &mut self,
+        text: &str,
+        params: Option<Self::SynthesisParams>,
+    ) -> Result<SynthesisResult, Box<dyn std::error::Error>> {
+        let model = self.model.as_mut().ok_or(KokoroError::ModelNotLoaded)?;
+
+        let p = params.unwrap_or_default();
+        let samples =
+            model.synthesize_text(text, &p.voice, p.speed, p.style_index, &self.espeak)?;
+
+        Ok(SynthesisResult {
+            samples,
+            sample_rate: SAMPLE_RATE,
+        })
+    }
+}
diff --git a/vendor/tts-rs/src/engines/kokoro/mod.rs b/vendor/tts-rs/src/engines/kokoro/mod.rs
new file mode 100644
index 0000000..5a9a10b
--- /dev/null
+++ b/vendor/tts-rs/src/engines/kokoro/mod.rs
@@ -0,0 +1,91 @@
+//! Kokoro-82M text-to-speech engine implementation.
+//!
+//! This module provides a Kokoro-based synthesis engine that uses the
+//! Kokoro-82M ONNX model for text-to-speech conversion. The engine uses
+//! espeak-ng for phonemization and supports 9 languages.
+//!
+//! # System Requirements
+//!
+//! **espeak-ng** must be installed on your system:
+//! - **Linux**: `sudo apt-get install espeak-ng`
+//! - **macOS**: `brew install espeak-ng`
+//! - **Windows**: Download installer from <https://espeak-ng.org/download>
+//!
+//! # Model Directory Layout
+//!
+//! ```text
+//! models/kokoro/
+//! ├── kokoro-quant-convinteger.onnx   # 8-bit quantized model (88MB, CPU-optimized)
+//! └── voices-v1.0.bin                  # Voice data archive (.npz format)
+//! ```
+//!
+//! Download links:
+//! - Model: <https://github.com/taylorchu/kokoro-onnx/releases/tag/v0.2.0>
+//! - Voices: <https://github.com/thewh1teagle/kokoro-onnx/releases/tag/model-files-v1.0>
+//!
+//! # Language Support
+//!
+//! | Voice prefix | Language | espeak-ng code | Notes |
+//! |---|---|---|---|
+//! | `af_`, `am_` | American English | `en-us` | Full support |
+//! | `bf_`, `bm_` | British English | `en-gb` | Full support |
+//! | `ef_`, `em_` | Spanish | `es` | Full support |
+//! | `ff_` | French | `fr` | Full support |
+//! | `hf_`, `hm_` | Hindi | `hi` | Full support |
+//! | `if_`, `im_` | Italian | `it` | Full support |
+//! | `jf_`, `jm_` | Japanese | `ja` | Functional via espeak-ng CJK |
+//! | `pf_`, `pm_` | Brazilian Portuguese | `pt-br` | Full support |
+//! | `zf_`, `zm_` | Mandarin Chinese | `cmn` | Functional via espeak-ng CJK |
+//!
+//! # Voice Naming Convention
+//!
+//! Voices follow the pattern `{language_prefix}_{name}`, e.g.:
+//! - `af_heart` — American English female "heart"
+//! - `bf_emma` — British English female "emma"
+//! - `jf_alpha` — Japanese female "alpha"
+//! - `zf_xiaobei` — Mandarin Chinese female "xiaobei"
+//!
+//! # Examples
+//!
+//! ## Basic Usage
+//!
+//! ```rust,no_run
+//! use tts_rs::{SynthesisEngine, engines::kokoro::{KokoroEngine, KokoroInferenceParams}};
+//! use std::path::PathBuf;
+//!
+//! let mut engine = KokoroEngine::new();
+//! engine.load_model(&PathBuf::from("models/kokoro"))?;
+//!
+//! let result = engine.synthesize("Hello, world!", None)?;
+//! println!("Generated {} samples at {}Hz", result.samples.len(), result.sample_rate);
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! ## With Custom Voice and Speed
+//!
+//! ```rust,no_run
+//! use tts_rs::{SynthesisEngine, engines::kokoro::{KokoroEngine, KokoroInferenceParams}};
+//! use std::path::PathBuf;
+//!
+//! let mut engine = KokoroEngine::new();
+//! engine.load_model(&PathBuf::from("models/kokoro"))?;
+//!
+//! let params = KokoroInferenceParams {
+//!     voice: "bf_emma".to_string(),
+//!     speed: 0.9,
+//!     ..Default::default()
+//! };
+//!
+//! engine.synthesize_to_file("Hello from British Emma!", &PathBuf::from("out.wav"), Some(params))?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
+pub mod engine;
+pub mod model;
+pub mod phonemizer;
+pub mod vocab;
+pub mod voices;
+
+pub use engine::{KokoroEngine, KokoroInferenceParams, KokoroModelParams};
+pub use model::KokoroError;
+pub use phonemizer::EspeakConfig;
\ No newline at end of file
diff --git a/vendor/tts-rs/src/engines/kokoro/model.rs b/vendor/tts-rs/src/engines/kokoro/model.rs
new file mode 100644
index 0000000..febf9a0
--- /dev/null
+++ b/vendor/tts-rs/src/engines/kokoro/model.rs
@@ -0,0 +1,387 @@
+use std::collections::HashMap;
+use std::path::Path;
+
+use ndarray::Array2;
+use ort::execution_providers::CPUExecutionProvider;
+use ort::inputs;
+use ort::session::builder::GraphOptimizationLevel;
+use ort::session::Session;
+use ort::value::TensorRef;
+
+use super::phonemizer::{phonemize, voice_lang, EspeakConfig};
+use super::voices::VoiceStore;
+
+/// Maximum number of phoneme tokens per chunk (before padding).
+pub const MAX_PHONEME_LEN: usize = 510;
+
+/// Style vector dimension for Kokoro.
+pub const STYLE_DIM: usize = 256;
+
+/// Output sample rate from the Kokoro model.
+pub const SAMPLE_RATE: u32 = 24000;
+
+/// Crossfade (in samples) used when concatenating chunk audio.
+const CHUNK_CROSSFADE_SAMPLES: usize = 240; // 10ms @ 24kHz
+
+#[derive(thiserror::Error, Debug)]
+pub enum KokoroError {
+    #[error("ONNX runtime error: {0}")]
+    Ort(#[from] ort::Error),
+    #[error("I/O error: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("Array shape error: {0}")]
+    Shape(#[from] ndarray::ShapeError),
+    #[error(
+        "espeak-ng not found. Install: Linux: `sudo apt-get install espeak-ng`, \
+         macOS: `brew install espeak-ng`, Windows: https://espeak-ng.org/download"
+    )]
+    EspeakNotFound,
+    #[error("Phonemization failed: {0}")]
+    PhonemizerFailed(String),
+    #[error("Voice '{0}' not found. Call list_voices() to see available voices.")]
+    VoiceNotFound(String),
+    #[error("Model not loaded. Call load_model() first.")]
+    ModelNotLoaded,
+    #[error("Invalid config.json: {0}")]
+    Config(String),
+    #[error("Failed to parse voice file: {0}")]
+    VoiceParse(String),
+    #[error("espeak-ng process timed out after {0:?}")]
+    Timeout(std::time::Duration),
+}
+
+/// Internal Kokoro ONNX model state.
+pub struct KokoroModel {
+    session: Session,
+    voice_store: VoiceStore,
+    vocab: HashMap<char, i64>,
+    /// Detected input name: "input_ids" or "tokens"
+    tokens_input_name: String,
+    /// True if the speed input expects int32, false for float32
+    speed_is_int32: bool,
+}
+
+impl KokoroModel {
+    /// Load the Kokoro model from a directory.
+    ///
+    /// The directory must contain:
+    /// - An `.onnx` file (preferably `kokoro-quant-convinteger.onnx`)
+    /// - A `voices-v1.0.bin` voice archive
+    /// - Optionally a `config.json` for vocabulary (falls back to hardcoded)
+    pub fn load(
+        model_dir: &Path,
+        num_threads: Option<usize>,
+        optimized_cache_path: Option<&Path>,
+    ) -> Result<Self, KokoroError> {
+        let onnx_path = find_onnx_file(model_dir)?;
+        log::info!("Loading Kokoro model from {}", onnx_path.display());
+
+        let session = init_session(&onnx_path, num_threads, optimized_cache_path)?;
+
+        // Detect input names at load time
+        let tokens_input_name = detect_tokens_input(&session);
+        let speed_is_int32 = detect_speed_type(&session);
+
+        log::info!(
+            "Detected: tokens_input='{}', speed_is_int32={}",
+            tokens_input_name,
+            speed_is_int32
+        );
+
+        // Load voices
+        let voices_path = model_dir.join("voices-v1.0.bin");
+        if !voices_path.exists() {
+            return Err(KokoroError::Io(std::io::Error::new(
+                std::io::ErrorKind::NotFound,
+                format!(
+                    "Voice file not found at {}. Download it from the Kokoro model repository.",
+                    voices_path.display()
+                ),
+            )));
+        }
+        let voice_store = VoiceStore::load(&voices_path)?;
+
+        // Load vocabulary
+        let config_path = model_dir.join("config.json");
+        let vocab = if config_path.exists() {
+            log::info!("Loading vocab from config.json");
+            super::vocab::load_vocab(&config_path)?
+        } else {
+            log::warn!("config.json not found, using hardcoded vocab");
+            super::vocab::hardcoded_vocab()
+        };
+
+        Ok(Self {
+            session,
+            voice_store,
+            vocab,
+            tokens_input_name,
+            speed_is_int32,
+        })
+    }
+
+    /// Synthesize audio from text using the given voice and speed.
+    pub fn synthesize_text(
+        &mut self,
+        text: &str,
+        voice_name: &str,
+        speed: f32,
+        style_idx_override: Option<usize>,
+        espeak: &EspeakConfig,
+    ) -> Result<Vec<f32>, KokoroError> {
+        let lang = voice_lang(voice_name);
+        let ids = phonemize(text, lang, &self.vocab, espeak)?;
+
+        if ids.is_empty() {
+            log::warn!("No phoneme tokens produced for text: {text:?}");
+            return Ok(vec![]);
+        }
+
+        // Split into chunks if needed. Keep a stable style index so adjacent chunks
+        // don't change style/prosody based on chunk length.
+        let style_idx = style_idx_override.unwrap_or(ids.len());
+        let estimated_samples = ids.len() * 300;
+        let chunks = if ids.len() > MAX_PHONEME_LEN {
+            log::debug!(
+                "Kokoro phoneme sequence exceeded limit ({} > {}), chunking",
+                ids.len(),
+                MAX_PHONEME_LEN
+            );
+            // Compute punctuation IDs from the vocab instead of hardcoding them
+            let punct_ids: Vec<i64> = [';', ':', ',', '.', '!', '?']
+                .iter()
+                .filter_map(|c| self.vocab.get(c).copied())
+                .collect();
+            split_chunks_with_punct(&ids, &punct_ids)
+        } else {
+            vec![ids]
+        };
+
+        let mut combined = Vec::with_capacity(estimated_samples);
+
+        for chunk_ids in chunks.iter() {
+            let style = self.voice_store.get_style(voice_name, style_idx)?;
+            let audio = self.synthesize_chunk(chunk_ids, &style, speed)?;
+            if audio.is_empty() {
+                continue;
+            }
+
+            if combined.is_empty() {
+                combined.extend_from_slice(&audio);
+            } else {
+                append_with_crossfade(&mut combined, &audio, CHUNK_CROSSFADE_SAMPLES);
+            }
+        }
+
+        Ok(combined)
+    }
+
+    /// Run ONNX inference on a single chunk of phoneme token IDs.
+    fn synthesize_chunk(
+        &mut self,
+        tokens: &[i64],
+        style: &[f32; STYLE_DIM],
+        speed: f32,
+    ) -> Result<Vec<f32>, KokoroError> {
+        let seq_len = tokens.len() + 2; // +2 for padding tokens
+
+        // Build tokens tensor: [[0, t1..tN, 0]]
+        let mut padded = vec![0i64; seq_len];
+        padded[1..seq_len - 1].copy_from_slice(tokens);
+        let tokens_arr = Array2::from_shape_vec((1, seq_len), padded)?;
+
+        // Build style tensor: [[s0..s255]] — use a view to avoid copying the 256-float array
+        let style_view = ndarray::ArrayView2::from_shape((1, STYLE_DIM), style.as_slice())?;
+
+        // Run session
+        let output = if self.speed_is_int32 {
+            let speed_arr = ndarray::arr1(&[speed as i32]);
+            let inputs = inputs![
+                self.tokens_input_name.as_str() => TensorRef::from_array_view(tokens_arr.view())?,
+                "style" => TensorRef::from_array_view(style_view)?,
+                "speed" => TensorRef::from_array_view(speed_arr.view())?,
+            ];
+            self.session.run(inputs)?
+        } else {
+            let speed_arr = ndarray::arr1(&[speed]);
+            let inputs = inputs![
+                self.tokens_input_name.as_str() => TensorRef::from_array_view(tokens_arr.view())?,
+                "style" => TensorRef::from_array_view(style_view)?,
+                "speed" => TensorRef::from_array_view(speed_arr.view())?,
+            ];
+            self.session.run(inputs)?
+        };
+
+        // Extract first output as waveform
+        let first_output = output
+            .iter()
+            .next()
+            .ok_or_else(|| KokoroError::Ort(ort::Error::new("No output from model")))?;
+        let waveform = first_output.1.try_extract_array::<f32>()?;
+
+        Ok(waveform.as_slice().unwrap_or(&[]).to_vec())
+    }
+
+    /// List all available voice names.
+    pub fn list_voices(&self) -> Vec<&str> {
+        self.voice_store.list_voices()
+    }
+}
+
+/// Find the ONNX model file in the given directory.
+///
+/// Prefers `kokoro-quant-convinteger.onnx`, then falls back to the first `.onnx` file found.
+fn find_onnx_file(model_dir: &Path) -> Result<std::path::PathBuf, KokoroError> {
+    let preferred = model_dir.join("kokoro-quant-convinteger.onnx");
+    if preferred.exists() {
+        return Ok(preferred);
+    }
+
+    // Scan for any .onnx file
+    for entry in std::fs::read_dir(model_dir)? {
+        let entry = entry?;
+        let path = entry.path();
+        if path.extension().and_then(|e| e.to_str()) == Some("onnx") {
+            log::info!("Using ONNX file: {}", path.display());
+            return Ok(path);
+        }
+    }
+
+    Err(KokoroError::Io(std::io::Error::new(
+        std::io::ErrorKind::NotFound,
+        format!("No .onnx file found in {}", model_dir.display()),
+    )))
+}
+
+/// Initialize an ONNX session with optional on-disk graph caching.
+///
+/// The first time a model is loaded, ORT runs Level3 graph optimization (5–10 s)
+/// and serialises the result to `optimized_cache_path`.  Every subsequent load
+/// reads the pre-optimized file directly at `Disable` optimization level, cutting
+/// cold-start time to under one second.
+///
+/// If `optimized_cache_path` is `None` the original behaviour (always Level3) is
+/// preserved, which is useful for unit-testing or read-only deployments.
+fn init_session(
+    onnx_path: &Path,
+    num_threads: Option<usize>,
+    optimized_cache_path: Option<&Path>,
+) -> Result<Session, KokoroError> {
+    let providers = vec![CPUExecutionProvider::default().build()];
+
+    // Choose load path and optimization level depending on cache state.
+    let (load_path, opt_level, write_cache) = match optimized_cache_path {
+        // Pre-optimized graph already on disk → load it directly, skip optimization.
+        Some(cache) if cache.exists() => {
+            log::info!(
+                "Loading pre-optimized Kokoro graph ({:.1} MB) from {:?} — skipping Level3",
+                cache
+                    .metadata()
+                    .map(|m| m.len() as f64 / 1_048_576.0)
+                    .unwrap_or(0.0),
+                cache
+            );
+            (cache, GraphOptimizationLevel::Disable, false)
+        }
+        // Cache path given but file does not exist yet → build + persist.
+        Some(cache) => {
+            log::info!(
+                "First load: running Level3 optimization; saving graph to {:?}",
+                cache
+            );
+            (onnx_path, GraphOptimizationLevel::Level3, true)
+        }
+        // No cache path → original behaviour.
+        None => (onnx_path, GraphOptimizationLevel::Level3, false),
+    };
+
+    let mut builder = Session::builder()?
+        .with_optimization_level(opt_level)?
+        .with_execution_providers(providers)?
+        .with_parallel_execution(true)?;
+
+    if write_cache {
+        // Serialise the optimized graph so the next launch can skip optimization.
+        let cache = optimized_cache_path.unwrap();
+        builder = builder.with_optimized_model_path(cache)?;
+    }
+
+    if let Some(threads) = num_threads {
+        builder = builder
+            .with_intra_threads(threads)?
+            .with_inter_threads(threads)?;
+    }
+
+    Ok(builder.commit_from_file(load_path)?)
+}
+
+/// Detect the token input name ("input_ids" or "tokens") from session inputs.
+fn detect_tokens_input(session: &Session) -> String {
+    for input in session.inputs() {
+        if input.name() == "input_ids" || input.name() == "tokens" {
+            return input.name().to_string();
+        }
+    }
+    // Default to "input_ids" if neither is found
+    "input_ids".to_string()
+}
+
+/// Detect whether the speed input expects int32 (true) or float32 (false).
+fn detect_speed_type(session: &Session) -> bool {
+    for input in session.inputs() {
+        if input.name() == "speed" {
+            // Check the type description
+            let type_str = format!("{:?}", input.dtype());
+            return type_str.contains("Int32") || type_str.contains("int32");
+        }
+    }
+    // Default: modern Kokoro models use int32
+    true
+}
+
+/// Split phoneme IDs into chunks of at most `MAX_PHONEME_LEN`, preferring punctuation.
+/// Takes an explicit set of punctuation IDs instead of hardcoding them.
+fn split_chunks_with_punct(ids: &[i64], punct_ids: &[i64]) -> Vec<Vec<i64>> {
+    let mut chunks = Vec::new();
+    let mut start = 0;
+
+    while start < ids.len() {
+        let end = (start + MAX_PHONEME_LEN).min(ids.len());
+        if end == ids.len() {
+            chunks.push(ids[start..end].to_vec());
+            break;
+        }
+
+        // Try to find a good split point (last punctuation before `end`).
+        let split = ids[start..end]
+            .iter()
+            .enumerate()
+            .rev()
+            .find(|(_, &id)| punct_ids.contains(&id))
+            .map(|(i, _)| start + i + 1)
+            .unwrap_or(end);
+
+        chunks.push(ids[start..split].to_vec());
+        start = split;
+    }
+
+    chunks
+}
+
+fn append_with_crossfade(dst: &mut Vec<f32>, src: &[f32], crossfade_samples: usize) {
+    let overlap = crossfade_samples.min(dst.len()).min(src.len());
+    if overlap == 0 {
+        dst.extend_from_slice(src);
+        return;
+    }
+
+    let dst_start = dst.len() - overlap;
+    for i in 0..overlap {
+        let t = (i + 1) as f32 / (overlap as f32 + 1.0);
+        let left = dst[dst_start + i] * (1.0 - t);
+        let right = src[i] * t;
+        dst[dst_start + i] = left + right;
+    }
+
+    dst.extend_from_slice(&src[overlap..]);
+}
\ No newline at end of file
diff --git a/vendor/tts-rs/src/engines/kokoro/phonemizer.rs b/vendor/tts-rs/src/engines/kokoro/phonemizer.rs
new file mode 100644
index 0000000..2c85a09
--- /dev/null
+++ b/vendor/tts-rs/src/engines/kokoro/phonemizer.rs
@@ -0,0 +1,462 @@
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::io::Write;
+#[cfg(target_os = "windows")]
+use std::os::windows::process::CommandExt;
+use std::path::PathBuf;
+use std::process::{Command, Stdio};
+use std::time::Duration;
+
+use super::model::KokoroError;
+
+#[cfg(target_os = "windows")]
+const CREATE_NO_WINDOW: u32 = 0x0800_0000;
+
+/// Configuration for locating the espeak-ng binary and its data directory.
+///
+/// When paths are `None`, falls back to `"espeak-ng"` on PATH with its
+/// compiled-in default data directory.
+#[derive(Debug, Clone, Default)]
+pub struct EspeakConfig {
+    /// Path to the espeak-ng binary. Falls back to `"espeak-ng"` on PATH.
+    pub bin_path: Option<PathBuf>,
+    /// Path to the espeak-ng-data directory. When set, passed via `--path`.
+    pub data_path: Option<PathBuf>,
+}
+
+/// Map a voice name prefix to an espeak-ng language code.
+///
+/// Voice names follow the pattern `{prefix}_{name}` where the two-character
+/// prefix encodes the language.
+pub fn voice_lang(voice: &str) -> &'static str {
+    let prefix: String = voice.chars().take(2).collect();
+    match prefix.as_str() {
+        "af" | "am" => "en-us",
+        "bf" | "bm" => "en-gb",
+        "ef" | "em" => "es",
+        "ff" => "fr",
+        "hf" | "hm" => "hi",
+        "if" | "im" => "it",
+        "jf" | "jm" => "ja",
+        "pf" | "pm" => "pt-br",
+        "zf" | "zm" => "cmn",
+        _ => "en-us",
+    }
+}
+
+/// Convert text to Kokoro phoneme token IDs via espeak-ng.
+///
+/// # Arguments
+/// - `text`: The input text to phonemize
+/// - `lang`: espeak-ng language code (e.g. `"en-us"`, `"fr"`, `"ja"`, `"cmn"`)
+/// - `vocab`: Mapping from IPA characters to token IDs
+///
+/// # Returns
+/// A `Vec<i64>` of token IDs. Characters not in the vocab are silently dropped,
+/// matching the behavior of the Python reference implementation.
+pub fn phonemize(
+    text: &str,
+    lang: &str,
+    vocab: &HashMap<char, i64>,
+    espeak: &EspeakConfig,
+) -> Result<Vec<i64>, KokoroError> {
+    let parts = split_text_parts(text);
+    if parts.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let text_segments: Vec<&str> = parts
+        .iter()
+        .filter_map(|part| match part {
+            TextPart::Text(segment) => Some(segment.as_str()),
+            TextPart::Punct(_) => None,
+        })
+        .collect();
+
+    let segment_ids = if text_segments.is_empty() {
+        Vec::new()
+    } else {
+        phonemize_segments_batch(&text_segments, lang, vocab, espeak)?
+    };
+
+    let mut ids = Vec::new();
+    let mut segment_index = 0usize;
+    for part in parts {
+        match part {
+            TextPart::Text(_) => {
+                if let Some(chunk) = segment_ids.get(segment_index) {
+                    ids.extend_from_slice(chunk);
+                }
+                segment_index += 1;
+            }
+            TextPart::Punct(ch) => {
+                if let Some(&id) = vocab.get(&ch) {
+                    ids.push(id);
+                }
+            }
+        }
+    }
+
+    Ok(ids)
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+enum TextPart {
+    Text(String),
+    Punct(char),
+}
+
+fn split_text_parts(text: &str) -> Vec<TextPart> {
+    let mut parts = Vec::new();
+    let mut current = String::new();
+    let chars: Vec<char> = text.chars().collect();
+    let mut i = 0;
+
+    while i < chars.len() {
+        let ch = chars[i];
+        // Calculate byte index for this character
+        let idx: usize = text.char_indices().nth(i).map(|(pos, _)| pos).unwrap_or(text.len());
+        let ch_len = ch.len_utf8();
+
+        // Handle CRLF as a single boundary token
+        if ch == '\r' && i + 1 < chars.len() && chars[i + 1] == '\n' {
+            // Treat CRLF as a single period token
+            flush_text_part(&mut parts, &mut current);
+            parts.push(TextPart::Punct('.'));
+            i += 2; // Skip both \r and \n
+            continue;
+        }
+
+        if let Some(punct) = map_boundary_punctuation(ch) {
+            if !is_numeric_connector_between_digits(text, idx, ch_len, ch) {
+                flush_text_part(&mut parts, &mut current);
+                parts.push(TextPart::Punct(punct));
+                i += 1;
+                continue;
+            }
+        }
+
+        if ch.is_whitespace() {
+            if !current.is_empty() && !current.ends_with(' ') {
+                current.push(' ');
+            }
+            i += 1;
+            continue;
+        }
+
+        current.push(ch);
+        i += 1;
+    }
+
+    flush_text_part(&mut parts, &mut current);
+    parts
+}
+
+fn flush_text_part(parts: &mut Vec<TextPart>, current: &mut String) {
+    let trimmed = current.trim();
+    if trimmed.is_empty() {
+        current.clear();
+        return;
+    }
+    parts.push(TextPart::Text(trimmed.to_string()));
+    current.clear();
+}
+
+fn map_boundary_punctuation(ch: char) -> Option<char> {
+    match ch {
+        '.' | '!' | '?' | ',' | ';' | ':' | '—' | '…' | '"' | '(' | ')' | '\u{201c}'
+        | '\u{201d}' => Some(ch),
+        '\n' | '\r' => Some('.'),
+        _ => None,
+    }
+}
+
+fn is_numeric_connector_between_digits(text: &str, idx: usize, ch_len: usize, ch: char) -> bool {
+    if !matches!(ch, '.' | ',') {
+        return false;
+    }
+
+    let prev = text[..idx].chars().next_back();
+    let next = text[idx + ch_len..].chars().next();
+
+    matches!(
+        (prev, next),
+        (Some(left), Some(right)) if left.is_ascii_digit() && right.is_ascii_digit()
+    )
+}
+
+fn phonemize_segments_batch(
+    segments: &[&str],
+    lang: &str,
+    vocab: &HashMap<char, i64>,
+    espeak: &EspeakConfig,
+) -> Result<Vec<Vec<i64>>, KokoroError> {
+    let batched_input = segments.join("\n");
+    let output = run_espeak(&batched_input, lang, espeak)?;
+    let lines: Vec<&str> = output.lines().collect();
+
+    // espeak-ng should emit one line per input line for stdin mode.
+    // If this assumption breaks, fall back to per-segment invocation.
+    if lines.len() != segments.len() {
+        return segments
+            .iter()
+            .map(|segment| {
+                let output = run_espeak(segment, lang, espeak)?;
+                Ok(ipa_to_ids(&output, vocab))
+            })
+            .collect();
+    }
+
+    Ok(lines.iter().map(|line| ipa_to_ids(line, vocab)).collect())
+}
+
+fn run_espeak(input: &str, lang: &str, espeak: &EspeakConfig) -> Result<String, KokoroError> {
+    let bin = espeak
+        .bin_path
+        .as_deref()
+        .map(|p| p.as_os_str().to_owned())
+        .unwrap_or_else(|| std::ffi::OsString::from("espeak-ng"));
+    let mut cmd = Command::new(&bin);
+    cmd.args(["--ipa", "--stdin", "-q", "-v", lang]);
+    if let Some(data_path) = espeak.data_path.as_deref() {
+        cmd.arg("--path").arg(data_path);
+    }
+    // When using a bundled binary, shared libraries (libespeak-ng.so,
+    // libpcaudio.so) are placed next to the binary.  On Linux, the dynamic
+    // linker needs LD_LIBRARY_PATH to find them (RPATH may not be set).
+    #[cfg(target_os = "linux")]
+    if let Some(bin_dir) = espeak.bin_path.as_deref().and_then(|p| p.parent()) {
+        let new_ld_library_path = if let Some(existing) = std::env::var_os("LD_LIBRARY_PATH") {
+            let mut path = bin_dir.as_os_str().to_owned();
+            path.push(":");
+            path.push(&existing);
+            path
+        } else {
+            bin_dir.as_os_str().to_owned()
+        };
+        cmd.env("LD_LIBRARY_PATH", new_ld_library_path);
+    }
+    #[cfg(target_os = "windows")]
+    {
+        // espeak-ng.exe is a console subsystem binary. When Parrot runs without
+        // an attached console, Windows can create a visible console window for
+        // every phonemizer child unless we suppress it explicitly.
+        cmd.creation_flags(CREATE_NO_WINDOW);
+    }
+    let mut child = cmd
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .map_err(|e| {
+            if e.kind() == std::io::ErrorKind::NotFound {
+                KokoroError::EspeakNotFound
+            } else {
+                KokoroError::Io(e)
+            }
+        })?;
+
+    if let Some(mut stdin) = child.stdin.take() {
+        // espeak-ng treats stdin as line-oriented input. Without a final line terminator,
+        // the last token can be under-processed. Enforce a canonical, newline-terminated
+        // payload as part of this I/O contract.
+        let stdin_payload = canonicalize_espeak_stdin_payload(input);
+        stdin
+            .write_all(stdin_payload.as_bytes())
+            .map_err(KokoroError::Io)?;
+        // Explicitly drop stdin to close the pipe before waiting
+        drop(stdin);
+    }
+
+    // Hard timeout for espeak-ng child process (30 seconds)
+    let timeout = Duration::from_secs(30);
+    let output = wait_with_timeout(child, timeout)?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        return Err(KokoroError::PhonemizerFailed(format!(
+            "espeak-ng exited with code {:?}: {stderr}",
+            output.status.code()
+        )));
+    }
+
+    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
+}
+
+/// Wait for a child process with a timeout. If the timeout expires, kill the child.
+fn wait_with_timeout(
+    mut child: std::process::Child,
+    timeout: Duration,
+) -> Result<std::process::Output, KokoroError> {
+    use std::sync::{mpsc, Arc, Mutex};
+    use std::thread;
+
+    let child_id = child.id();
+    let child_arc = Arc::new(Mutex::new(Some(child)));
+    let child_clone = Arc::clone(&child_arc);
+
+    let (tx, rx) = mpsc::channel();
+
+    // Spawn a thread to wait for the child process
+    thread::spawn(move || {
+        let mut child_guard = child_clone.lock().unwrap();
+        if let Some(mut child) = child_guard.take() {
+            let result = child.wait_with_output();
+            let _ = tx.send(result);
+        }
+    });
+
+    // Wait for the result with a timeout
+    match rx.recv_timeout(timeout) {
+        Ok(Ok(output)) => Ok(output),
+        Ok(Err(e)) => Err(KokoroError::Io(e)),
+        Err(mpsc::RecvTimeoutError::Timeout) => {
+            // Timeout expired - try to kill the child process
+            let mut child_guard = child_arc.lock().unwrap();
+            if let Some(mut child) = child_guard.take() {
+                let _ = child.kill();
+                let _ = child.wait();
+            }
+            Err(KokoroError::Timeout(timeout))
+        }
+        Err(mpsc::RecvTimeoutError::Disconnected) => Err(KokoroError::Io(
+            std::io::Error::new(
+                std::io::ErrorKind::Other,
+                format!("espeak-ng process (PID {child_id}) channel disconnected"),
+            ),
+        )),
+    }
+}
+
+fn canonicalize_espeak_stdin_payload(input: &str) -> Cow<'_, str> {
+    if input.ends_with('\n') {
+        Cow::Borrowed(input)
+    } else {
+        Cow::Owned(format!("{input}\n"))
+    }
+}
+
+fn ipa_to_ids(ipa: &str, vocab: &HashMap<char, i64>) -> Vec<i64> {
+    let mut ids = Vec::new();
+    for line in ipa.lines() {
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
+        }
+        for ch in line.chars() {
+            if ch == '_' {
+                continue;
+            }
+            if let Some(&id) = vocab.get(&ch) {
+                ids.push(id);
+            }
+        }
+    }
+    ids
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{
+        canonicalize_espeak_stdin_payload, phonemize, run_espeak, split_text_parts, EspeakConfig,
+        TextPart,
+    };
+    use crate::engines::kokoro::vocab::hardcoded_vocab;
+    use std::process::Command;
+
+    fn espeak_available() -> bool {
+        Command::new("espeak-ng")
+            .arg("--version")
+            .output()
+            .is_ok()
+    }
+
+    #[test]
+    fn splits_text_and_punctuation_parts() {
+        let parts = split_text_parts("Hello, world. Testing!");
+        assert_eq!(
+            parts,
+            vec![
+                TextPart::Text("Hello".to_string()),
+                TextPart::Punct(','),
+                TextPart::Text("world".to_string()),
+                TextPart::Punct('.'),
+                TextPart::Text("Testing".to_string()),
+                TextPart::Punct('!'),
+            ]
+        );
+    }
+
+    #[test]
+    fn keeps_decimal_and_thousands_separators_inside_text() {
+        let parts = split_text_parts("Version 2.0 reached 1,000 users.");
+        assert_eq!(
+            parts,
+            vec![
+                TextPart::Text("Version 2.0 reached 1,000 users".to_string()),
+                TextPart::Punct('.'),
+            ]
+        );
+    }
+
+    #[test]
+    fn still_splits_comma_when_not_between_digits() {
+        let parts = split_text_parts("Value 2, next");
+        assert_eq!(
+            parts,
+            vec![
+                TextPart::Text("Value 2".to_string()),
+                TextPart::Punct(','),
+                TextPart::Text("next".to_string()),
+            ]
+        );
+    }
+
+    #[test]
+    fn appends_trailing_newline_for_espeak_stdin() {
+        assert_eq!(canonicalize_espeak_stdin_payload("America"), "America\n");
+    }
+
+    #[test]
+    fn keeps_single_trailing_newline_for_espeak_stdin() {
+        assert_eq!(canonicalize_espeak_stdin_payload("America\n"), "America\n");
+    }
+
+    #[test]
+    fn espeak_output_is_stable_with_or_without_trailing_newline() {
+        if !espeak_available() {
+            return;
+        }
+
+        let cfg = EspeakConfig::default();
+        let without_newline =
+            run_espeak("America", "en-us", &cfg).expect("espeak should succeed");
+        let with_newline =
+            run_espeak("America\n", "en-us", &cfg).expect("espeak should succeed");
+        assert_eq!(
+            without_newline.trim(),
+            with_newline.trim(),
+            "stdin canonicalization must prevent final-token truncation"
+        );
+    }
+
+    #[test]
+    fn phonemize_keeps_terminal_schwa_for_america() {
+        if !espeak_available() {
+            return;
+        }
+
+        let vocab = hardcoded_vocab();
+        let cfg = EspeakConfig::default();
+        let ids =
+            phonemize("America", "en-us", &vocab, &cfg).expect("phonemize should succeed");
+        let schwa_id = *vocab
+            .get(&'ə')
+            .expect("hardcoded vocab should include schwa");
+        assert_eq!(
+            ids.last(),
+            Some(&schwa_id),
+            "terminal schwa should be preserved for 'America'"
+        );
+    }
+}
\ No newline at end of file
diff --git a/vendor/tts-rs/src/engines/kokoro/vocab.rs b/vendor/tts-rs/src/engines/kokoro/vocab.rs
new file mode 100644
index 0000000..890b9fe
--- /dev/null
+++ b/vendor/tts-rs/src/engines/kokoro/vocab.rs
@@ -0,0 +1,163 @@
+use std::collections::HashMap;
+use std::path::Path;
+
+use super::model::KokoroError;
+
+/// Load the Kokoro vocabulary from a config.json file.
+///
+/// The config.json must contain a `"vocab"` field mapping single-character
+/// strings to integer token IDs.
+pub fn load_vocab(config_path: &Path) -> Result<HashMap<char, i64>, KokoroError> {
+    let content = std::fs::read_to_string(config_path)?;
+    let json: serde_json::Value = serde_json::from_str(&content)
+        .map_err(|e| KokoroError::Config(format!("Failed to parse JSON: {e}")))?;
+
+    let vocab_obj = json
+        .get("vocab")
+        .ok_or_else(|| KokoroError::Config("Missing 'vocab' field".to_string()))?
+        .as_object()
+        .ok_or_else(|| KokoroError::Config("'vocab' must be an object".to_string()))?;
+
+    let mut map = HashMap::new();
+    for (k, v) in vocab_obj {
+        if k.is_empty() {
+            return Err(KokoroError::Config(format!("Empty key in vocab: {k:?}")));
+        }
+        if k.chars().count() != 1 {
+            return Err(KokoroError::Config(format!(
+                "Vocab key must be a single character, got: {k:?}"
+            )));
+        }
+        let ch = k.chars().next().unwrap();
+        let id = v
+            .as_i64()
+            .ok_or_else(|| KokoroError::Config(format!("Non-integer vocab value for key {k:?}")))?;
+        map.insert(ch, id);
+    }
+
+    Ok(map)
+}
+
+/// Hardcoded Kokoro vocabulary (from config.json, model-version-agnostic).
+///
+/// Only used as a fallback when config.json is not present. Prefer loading
+/// from config.json via `load_vocab()`.
+pub fn hardcoded_vocab() -> HashMap<char, i64> {
+    let entries: &[(char, i64)] = &[
+        (';', 1),
+        (':', 2),
+        (',', 3),
+        ('.', 4),
+        ('!', 5),
+        ('?', 6),
+        ('—', 9),
+        ('…', 10),
+        ('"', 11),
+        ('(', 12),
+        (')', 13),
+        ('\u{201c}', 14),
+        ('\u{201d}', 15),
+        (' ', 16),
+        ('\u{0303}', 17),
+        ('ʣ', 18),
+        ('ʥ', 19),
+        ('ʦ', 20),
+        ('ʨ', 21),
+        ('ᵝ', 22),
+        ('ꭧ', 23),
+        ('A', 24),
+        ('I', 25),
+        ('O', 31),
+        ('Q', 33),
+        ('S', 35),
+        ('T', 36),
+        ('W', 39),
+        ('Y', 41),
+        ('ᵊ', 42),
+        ('a', 43),
+        ('b', 44),
+        ('c', 45),
+        ('d', 46),
+        ('e', 47),
+        ('f', 48),
+        ('h', 50),
+        ('i', 51),
+        ('j', 52),
+        ('k', 53),
+        ('l', 54),
+        ('m', 55),
+        ('n', 56),
+        ('o', 57),
+        ('p', 58),
+        ('q', 59),
+        ('r', 60),
+        ('s', 61),
+        ('t', 62),
+        ('u', 63),
+        ('v', 64),
+        ('w', 65),
+        ('x', 66),
+        ('y', 67),
+        ('z', 68),
+        ('ɑ', 69),
+        ('ɐ', 70),
+        ('ɒ', 71),
+        ('æ', 72),
+        ('β', 75),
+        ('ɔ', 76),
+        ('ɕ', 77),
+        ('ç', 78),
+        ('ɖ', 80),
+        ('ð', 81),
+        ('ʤ', 82),
+        ('ə', 83),
+        ('ɚ', 85),
+        ('ɛ', 86),
+        ('ɜ', 87),
+        ('ɟ', 90),
+        ('ɡ', 92),
+        ('ɥ', 99),
+        ('ɨ', 101),
+        ('ɪ', 102),
+        ('ʝ', 103),
+        ('ɯ', 110),
+        ('ɰ', 111),
+        ('ŋ', 112),
+        ('ɳ', 113),
+        ('ɲ', 114),
+        ('ɴ', 115),
+        ('ø', 116),
+        ('ɸ', 118),
+        ('θ', 119),
+        ('œ', 120),
+        ('ɹ', 123),
+        ('ɾ', 125),
+        ('ɻ', 126),
+        ('ʁ', 128),
+        ('ɽ', 129),
+        ('ʂ', 130),
+        ('ʃ', 131),
+        ('ʈ', 132),
+        ('ʧ', 133),
+        ('ʊ', 135),
+        ('ʋ', 136),
+        ('ʌ', 138),
+        ('ɣ', 139),
+        ('ɤ', 140),
+        ('χ', 142),
+        ('ʎ', 143),
+        ('ʒ', 147),
+        ('ʔ', 148),
+        ('ˈ', 156),
+        ('ˌ', 157),
+        ('ː', 158),
+        ('ʰ', 162),
+        ('ʲ', 164),
+        ('↓', 169),
+        ('→', 171),
+        ('↗', 172),
+        ('↘', 173),
+        ('ᵻ', 177),
+    ];
+    entries.iter().copied().collect()
+}
\ No newline at end of file
diff --git a/vendor/tts-rs/src/engines/kokoro/voices.rs b/vendor/tts-rs/src/engines/kokoro/voices.rs
new file mode 100644
index 0000000..22b5a20
--- /dev/null
+++ b/vendor/tts-rs/src/engines/kokoro/voices.rs
@@ -0,0 +1,181 @@
+use std::collections::HashMap;
+use std::ffi::OsStr;
+use std::fs::File;
+use std::io::Read;
+use std::path::Path;
+
+use super::model::KokoroError;
+
+/// Storage for all loaded voice style vectors.
+///
+/// Each voice is stored as a flat list of style vectors, where each vector
+/// has 256 floats. The index into the list corresponds to the phoneme token
+/// count, enabling prosody-consistent synthesis.
+pub struct VoiceStore {
+    voices: HashMap<String, Vec<[f32; 256]>>,
+}
+
+impl VoiceStore {
+    /// Load all voices from a .npz (numpy zip) file.
+    ///
+    /// The file should be a standard .npz archive where each entry is a
+    /// .npy file named after the voice (e.g., `af_heart.npy`).
+    pub fn load(path: &Path) -> Result<Self, KokoroError> {
+        let file = File::open(path)?;
+        let mut zip = zip::ZipArchive::new(file)
+            .map_err(|e| KokoroError::VoiceParse(format!("Failed to open zip archive: {e}")))?;
+
+        let mut voices = HashMap::new();
+
+        for i in 0..zip.len() {
+            let mut entry = zip.by_index(i).map_err(|e| {
+                KokoroError::VoiceParse(format!("Failed to read zip entry {i}: {e}"))
+            })?;
+
+            let raw_name = entry.name().to_string();
+
+            // Skip directory entries
+            if raw_name.ends_with('/') {
+                continue;
+            }
+
+            // Voice name is the basename without the .npy extension
+            let voice_name = Path::new(&raw_name)
+                .file_name()
+                .and_then(OsStr::to_str)
+                .map(|name| name.trim_end_matches(".npy"))
+                .filter(|name| !name.is_empty())
+                .map(str::to_string);
+
+            let Some(voice_name) = voice_name else {
+                continue;
+            };
+
+            let mut data = Vec::new();
+            entry
+                .read_to_end(&mut data)
+                .map_err(|e| KokoroError::VoiceParse(format!("Failed to read {raw_name}: {e}")))?;
+
+            let style_vectors = parse_npy(&data, &raw_name)?;
+            voices.insert(voice_name, style_vectors);
+        }
+
+        log::info!("Loaded {} voices", voices.len());
+        Ok(Self { voices })
+    }
+
+    /// Get the style vector for a voice at the given index.
+    ///
+    /// The index is clamped to the valid range, so any index is safe.
+    pub fn get_style(&self, voice: &str, idx: usize) -> Result<[f32; 256], KokoroError> {
+        let styles = self
+            .voices
+            .get(voice)
+            .ok_or_else(|| KokoroError::VoiceNotFound(voice.to_string()))?;
+
+        if styles.is_empty() {
+            return Err(KokoroError::VoiceParse(format!(
+                "Voice {voice} has no style vectors"
+            )));
+        }
+
+        let clamped = idx.min(styles.len().saturating_sub(1));
+        Ok(styles[clamped])
+    }
+
+    /// List all available voice names in sorted order.
+    pub fn list_voices(&self) -> Vec<&str> {
+        let mut names: Vec<&str> = self.voices.keys().map(|s| s.as_str()).collect();
+        names.sort_unstable();
+        names
+    }
+}
+
+/// Parse a numpy .npy file into a list of style vectors.
+///
+/// Expects a 2D float32 array of shape `[N, 256]` in little-endian format.
+fn parse_npy(data: &[u8], name: &str) -> Result<Vec<[f32; 256]>, KokoroError> {
+    // Verify numpy magic bytes: \x93NUMPY
+    if data.len() < 10 {
+        return Err(KokoroError::VoiceParse(format!(
+            "{name}: file too short ({} bytes)",
+            data.len()
+        )));
+    }
+
+    if &data[0..6] != b"\x93NUMPY" {
+        return Err(KokoroError::VoiceParse(format!(
+            "{name}: invalid numpy magic bytes"
+        )));
+    }
+
+    // major version at [6], minor at [7]
+    let major = data[6];
+    let minor = data[7];
+
+    // Read header_len based on numpy version
+    let (header_len, data_offset) = match major {
+        1 => {
+            // numpy 1.0: 2-byte little-endian u16 header_len at [8..10]
+            let header_len = u16::from_le_bytes([data[8], data[9]]) as usize;
+            (header_len, 10 + header_len)
+        }
+        2 => {
+            // numpy 2.0: 4-byte little-endian u32 header_len at [8..12]
+            if data.len() < 12 {
+                return Err(KokoroError::VoiceParse(format!(
+                    "{name}: file too short for numpy 2.0 header ({} bytes)",
+                    data.len()
+                )));
+            }
+            let header_len = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;
+            (header_len, 12 + header_len)
+        }
+        _ => {
+            return Err(KokoroError::VoiceParse(format!(
+                "{name}: unsupported numpy version {major}.{minor}"
+            )));
+        }
+    };
+
+    if data.len() < data_offset {
+        return Err(KokoroError::VoiceParse(format!(
+            "{name}: header truncated (need {data_offset} bytes, got {})",
+            data.len()
+        )));
+    }
+
+    let float_data = &data[data_offset..];
+    if !float_data.len().is_multiple_of(4) {
+        return Err(KokoroError::VoiceParse(format!(
+            "{name}: float data length {} is not a multiple of 4",
+            float_data.len()
+        )));
+    }
+
+    let n_floats = float_data.len() / 4;
+    if !n_floats.is_multiple_of(256) {
+        return Err(KokoroError::VoiceParse(format!(
+            "{name}: float count {n_floats} is not a multiple of 256 (style vector dim)"
+        )));
+    }
+
+    let n_styles = n_floats / 256;
+    let mut result = Vec::with_capacity(n_styles);
+
+    for i in 0..n_styles {
+        let mut vec = [0f32; 256];
+        for (j, slot) in vec.iter_mut().enumerate() {
+            let offset = (i * 256 + j) * 4;
+            *slot = f32::from_le_bytes([
+                float_data[offset],
+                float_data[offset + 1],
+                float_data[offset + 2],
+                float_data[offset + 3],
+            ]);
+        }
+        result.push(vec);
+    }
+
+    Ok(result)
+}
\ No newline at end of file
diff --git a/vendor/tts-rs/src/engines/mod.rs b/vendor/tts-rs/src/engines/mod.rs
new file mode 100644
index 0000000..b0f399a
--- /dev/null
+++ b/vendor/tts-rs/src/engines/mod.rs
@@ -0,0 +1,11 @@
+//! Speech synthesis engines.
+//!
+//! This module contains implementations of text-to-speech engines.
+//!
+//! # Available Engines
+//!
+//! Enable engines via Cargo features:
+//! - `kokoro` - Kokoro TTS (ONNX format, espeak-ng required)
+
+#[cfg(feature = "kokoro")]
+pub mod kokoro;
diff --git a/vendor/tts-rs/src/lib.rs b/vendor/tts-rs/src/lib.rs
new file mode 100644
index 0000000..763e07f
--- /dev/null
+++ b/vendor/tts-rs/src/lib.rs
@@ -0,0 +1,114 @@
+//! # transcribe-rs
+//!
+//! A Rust library providing text-to-speech synthesis using the Kokoro engine.
+//!
+//! ## Features
+//!
+//! - **Kokoro TTS**: High-quality text-to-speech with multiple voices and languages
+//! - **Flexible Model Loading**: Load models with custom parameters
+//! - **Multiple Voices**: Support for 9 languages with various voice styles
+//!
+//! ## Quick Start
+//!
+//! ```toml
+//! [dependencies]
+//! transcribe-rs = { version = "0.2", features = ["kokoro"] }
+//! ```
+//!
+//! ```ignore
+//! use std::path::PathBuf;
+//! use transcribe_rs::{engines::kokoro::KokoroEngine, SynthesisEngine};
+//!
+//! let mut engine = KokoroEngine::new();
+//! engine.load_model(&PathBuf::from("models/kokoro-v1.0"))?;
+//!
+//! let result = engine.synthesize("Hello, world!", None)?;
+//! result.write_wav(&PathBuf::from("output.wav"))?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
+pub mod engines;
+
+use std::path::Path;
+
+/// The result of a synthesis (text-to-speech) operation.
+///
+/// Contains raw f32 audio samples and the sample rate of the output audio.
+#[derive(Debug)]
+pub struct SynthesisResult {
+    /// Raw audio samples as f32 values
+    pub samples: Vec<f32>,
+    /// Sample rate of the audio (24000 for Kokoro)
+    pub sample_rate: u32,
+}
+
+impl SynthesisResult {
+    /// Write the audio to a 32-bit float WAV file.
+    pub fn write_wav(&self, path: &Path) -> Result<(), Box<dyn std::error::Error>> {
+        let spec = hound::WavSpec {
+            channels: 1,
+            sample_rate: self.sample_rate,
+            bits_per_sample: 32,
+            sample_format: hound::SampleFormat::Float,
+        };
+        let mut writer = hound::WavWriter::create(path, spec)?;
+        for &sample in &self.samples {
+            writer.write_sample(sample)?;
+        }
+        writer.finalize()?;
+        Ok(())
+    }
+
+    /// Duration of the audio in seconds.
+    pub fn duration_secs(&self) -> f64 {
+        if self.sample_rate == 0 {
+            return 0.0;
+        }
+        self.samples.len() as f64 / self.sample_rate as f64
+    }
+}
+
+/// Common interface for text-to-speech synthesis engines.
+///
+/// This trait defines the standard operations that all synthesis engines must support.
+/// Each engine may have different parameter types for model loading and inference configuration.
+pub trait SynthesisEngine {
+    /// Parameters for configuring inference behavior (voice, speed, etc.)
+    type SynthesisParams;
+    /// Parameters for configuring model loading (threads, etc.)
+    type ModelParams: Default;
+
+    /// Load a model from the specified path using default parameters.
+    fn load_model(&mut self, model_path: &Path) -> Result<(), Box<dyn std::error::Error>> {
+        self.load_model_with_params(model_path, Self::ModelParams::default())
+    }
+
+    /// Load a model from the specified path with custom parameters.
+    fn load_model_with_params(
+        &mut self,
+        model_path: &Path,
+        params: Self::ModelParams,
+    ) -> Result<(), Box<dyn std::error::Error>>;
+
+    /// Unload the currently loaded model and free associated resources.
+    fn unload_model(&mut self);
+
+    /// Synthesize speech from the given text.
+    fn synthesize(
+        &mut self,
+        text: &str,
+        params: Option<Self::SynthesisParams>,
+    ) -> Result<SynthesisResult, Box<dyn std::error::Error>>;
+
+    /// Synthesize speech from the given text and write to a WAV file.
+    ///
+    /// Default implementation calls `synthesize()` then `SynthesisResult::write_wav()`.
+    fn synthesize_to_file(
+        &mut self,
+        text: &str,
+        wav_path: &Path,
+        params: Option<Self::SynthesisParams>,
+    ) -> Result<(), Box<dyn std::error::Error>> {
+        self.synthesize(text, params)?.write_wav(wav_path)
+    }
+}
\ No newline at end of file