From e581e3fe446005cdaf1fd4d9d74d887c0b77f1b2 Mon Sep 17 00:00:00 2001
From: Sam <peter.samuel.anttila@gmail.com>
Date: Tue, 11 Mar 2025 18:10:42 -0700
Subject: [PATCH] Fix: voice cloning with 'audio prompt' and associated text.

The voice cloning with text is broken due to a missing token in the input tokenization stage. This commit fixes this and makes voice cloning work as expected.
---
 cli/SparkTTS.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cli/SparkTTS.py b/cli/SparkTTS.py
index bc86ce3..9f26733 100644
--- a/cli/SparkTTS.py
+++ b/cli/SparkTTS.py
@@ -91,6 +91,7 @@ def process_prompt(
                 "<|end_global_token|>",
                 "<|start_semantic_token|>",
                 semantic_tokens,
+                "<|end_semantic_token|>",
             ]
         else:
             inputs = [
@@ -233,4 +234,4 @@ def inference(
             pred_semantic_ids.to(self.device),
         )
 
-        return wav
\ No newline at end of file
+        return wav