From e581e3fe446005cdaf1fd4d9d74d887c0b77f1b2 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 11 Mar 2025 18:10:42 -0700 Subject: [PATCH] Fix: voice cloning with 'audio prompt' and associated text. The voice cloning with text is broken due to a missing token in the input tokenization stage. This commit fixes this and makes voice cloning work as expected. --- cli/SparkTTS.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/SparkTTS.py b/cli/SparkTTS.py index bc86ce3..9f26733 100644 --- a/cli/SparkTTS.py +++ b/cli/SparkTTS.py @@ -91,6 +91,7 @@ def process_prompt( "<|end_global_token|>", "<|start_semantic_token|>", semantic_tokens, + "<|end_semantic_token|>", ] else: inputs = [ @@ -233,4 +234,4 @@ def inference( pred_semantic_ids.to(self.device), ) - return wav \ No newline at end of file + return wav