From 4b16080f4b3d45fc7132fc69a6b1e5f23d229719 Mon Sep 17 00:00:00 2001
From: PR Bot <pr-bot@minimaxi.com>
Date: Tue, 17 Mar 2026 18:17:54 +0800
Subject: [PATCH] feat: add MiniMax TTS as a new dubbing provider

Add MiniMax Cloud TTS (speech-2.8-hd / speech-2.8-turbo) as a new TTS
provider for video dubbing, with 12 built-in voices, Streamlit sidebar
integration, full i18n support (7 locales), and unit + integration tests.

- New provider: core/tts_backend/minimax_tts.py
- Config: minimax_tts section in config.yaml
- UI: voice/model selectors in sidebar settings
- Docs: TTS comparison tables updated (EN + ZH)
- Tests: 8 unit tests + 3 integration tests

Co-Authored-By: Octopus <liyuan851277048@icloud.com>
---
 config.yaml                      |   8 +-
 core/st_utils/sidebar_setting.py |  37 ++++-
 core/tts_backend/minimax_tts.py  |  79 +++++++++
 core/tts_backend/tts_main.py     |   3 +
 docs/pages/docs/start.en-US.md   |   2 +
 docs/pages/docs/start.zh-CN.md   |   2 +
 tests/__init__.py                |   0
 tests/test_minimax_tts.py        | 272 +++++++++++++++++++++++++++++++
 translations/en.json             |  17 +-
 translations/es.json             |  17 +-
 translations/fr.json             |  17 +-
 translations/ja.json             |  17 +-
 translations/ru.json             |  17 +-
 translations/zh-CN.json          |  17 +-
 translations/zh-HK.json          |  17 +-
 15 files changed, 513 insertions(+), 9 deletions(-)
 create mode 100644 core/tts_backend/minimax_tts.py
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_minimax_tts.py

diff --git a/config.yaml b/config.yaml
index 099b4bdb..bf4ea2a6 100644
--- a/config.yaml
+++ b/config.yaml
@@ -70,7 +70,7 @@ reflect_translate: true
 pause_before_translate: false
 
 ## ======================== Dubbing Settings ======================== ##
-# TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts]
+# TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts, minimax_tts]
 tts_method: 'azure_tts'
 
 # SiliconFlow FishTTS
@@ -119,6 +119,12 @@ gpt_sovits:
 f5tts:
   302_api: 'YOUR_302_API_KEY'
 
+# MiniMax TTS configuration, get API key from https://www.minimax.io
+minimax_tts:
+  api_key: 'YOUR_MINIMAX_API_KEY'
+  model: 'speech-2.8-hd'
+  voice: 'English_Graceful_Lady'
+
 # *Audio speed range
 speed_factor:
   min: 1
diff --git a/core/st_utils/sidebar_setting.py b/core/st_utils/sidebar_setting.py
index 5c5d24aa..42f13dae 100644
--- a/core/st_utils/sidebar_setting.py
+++ b/core/st_utils/sidebar_setting.py
@@ -84,7 +84,7 @@ def page_setting():
             update_key("burn_subtitles", burn_subtitles)
             st.rerun()
     with st.expander(t("Dubbing Settings"), expanded=True):
-        tts_methods = ["azure_tts", "openai_tts", "fish_tts", "sf_fish_tts", "edge_tts", "gpt_sovits", "custom_tts", "sf_cosyvoice2", "f5tts"]
+        tts_methods = ["azure_tts", "openai_tts", "fish_tts", "sf_fish_tts", "edge_tts", "gpt_sovits", "custom_tts", "sf_cosyvoice2", "f5tts", "minimax_tts"]
         select_tts = st.selectbox(t("TTS Method"), options=tts_methods, index=tts_methods.index(load_key("tts_method")))
         if select_tts != load_key("tts_method"):
             update_key("tts_method", select_tts)
@@ -151,6 +151,41 @@ def page_setting():
         
         elif select_tts == "f5tts":
             config_input("302ai API", "f5tts.302_api")
+
+        elif select_tts == "minimax_tts":
+            config_input(t("MiniMax API Key"), "minimax_tts.api_key")
+            minimax_voices = {
+                "English_Graceful_Lady": t("English Female, Graceful"),
+                "English_Insightful_Speaker": t("English Male, Insightful"),
+                "English_radiant_girl": t("English Female, Radiant"),
+                "English_Persuasive_Man": t("English Male, Persuasive"),
+                "English_Lucky_Robot": t("English, Robot"),
+                "Wise_Woman": t("Female, Wise"),
+                "Friendly_Person": t("Friendly"),
+                "Inspirational_girl": t("Female, Inspirational"),
+                "Deep_Voice_Man": t("Male, Deep Voice"),
+                "sweet_girl": t("Female, Sweet"),
+                "cute_boy": t("Male, Cute"),
+                "lovely_girl": t("Female, Lovely"),
+            }
+            selected_voice = st.selectbox(
+                t("MiniMax Voice"),
+                options=list(minimax_voices.keys()),
+                format_func=lambda x: minimax_voices[x],
+                index=list(minimax_voices.keys()).index(load_key("minimax_tts.voice")) if load_key("minimax_tts.voice") in minimax_voices else 0
+            )
+            if selected_voice != load_key("minimax_tts.voice"):
+                update_key("minimax_tts.voice", selected_voice)
+                st.rerun()
+            minimax_models = ["speech-2.8-hd", "speech-2.8-turbo"]
+            selected_model = st.selectbox(
+                t("MiniMax TTS Model"),
+                options=minimax_models,
+                index=minimax_models.index(load_key("minimax_tts.model")) if load_key("minimax_tts.model") in minimax_models else 0
+            )
+            if selected_model != load_key("minimax_tts.model"):
+                update_key("minimax_tts.model", selected_model)
+                st.rerun()
         
 def check_api():
     try:
diff --git a/core/tts_backend/minimax_tts.py b/core/tts_backend/minimax_tts.py
new file mode 100644
index 00000000..892a0a5f
--- /dev/null
+++ b/core/tts_backend/minimax_tts.py
@@ -0,0 +1,79 @@
+import io
+import requests
+from pathlib import Path
+from pydub import AudioSegment
+from core.utils import load_key, except_handler
+
+BASE_URL = "https://api.minimax.io/v1/t2a_v2"
+BASE_URL_CN = "https://api.minimaxi.com/v1/t2a_v2"
+
+VOICE_LIST = [
+    "English_Graceful_Lady",
+    "English_Insightful_Speaker",
+    "English_radiant_girl",
+    "English_Persuasive_Man",
+    "English_Lucky_Robot",
+    "Wise_Woman",
+    "Friendly_Person",
+    "Inspirational_girl",
+    "Deep_Voice_Man",
+    "sweet_girl",
+    "cute_boy",
+    "lovely_girl",
+]
+
+MODEL_LIST = ["speech-2.8-hd", "speech-2.8-turbo"]
+
+# refer to: https://platform.minimax.io/docs/api-reference/speech-t2a-http
+
+@except_handler("Failed to generate audio using MiniMax TTS", retry=3, delay=1)
+def minimax_tts(text, save_path):
+    API_KEY = load_key("minimax_tts.api_key")
+    voice = load_key("minimax_tts.voice")
+    model = load_key("minimax_tts.model")
+
+    if voice not in VOICE_LIST:
+        raise ValueError(f"Invalid voice: {voice}. Please choose from {VOICE_LIST}")
+
+    payload = {
+        "model": model,
+        "text": text,
+        "stream": False,
+        "voice_setting": {
+            "voice_id": voice,
+            "speed": 1.0,
+            "vol": 1.0,
+            "pitch": 0,
+        },
+        "audio_setting": {
+            "format": "mp3",
+            "sample_rate": 32000,
+        },
+    }
+
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json",
+    }
+
+    response = requests.post(BASE_URL, headers=headers, json=payload, timeout=60)
+    response.raise_for_status()
+
+    result = response.json()
+    if "data" not in result or "audio" not in result["data"]:
+        raise ValueError(f"Unexpected API response: {result}")
+
+    audio_hex = result["data"]["audio"]
+    audio_bytes = bytes.fromhex(audio_hex)
+
+    # Convert mp3 to wav using pydub
+    speech_file_path = Path(save_path)
+    speech_file_path.parent.mkdir(parents=True, exist_ok=True)
+
+    audio = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
+    audio.export(save_path, format="wav")
+    print(f"Audio saved to {speech_file_path}")
+
+
+if __name__ == "__main__":
+    minimax_tts("Hi! Welcome to VideoLingo!", "test.wav")
diff --git a/core/tts_backend/tts_main.py b/core/tts_backend/tts_main.py
index 49cc0c29..bcf56515 100644
--- a/core/tts_backend/tts_main.py
+++ b/core/tts_backend/tts_main.py
@@ -11,6 +11,7 @@
 from core.tts_backend.edge_tts import edge_tts
 from core.tts_backend.sf_cosyvoice2 import cosyvoice_tts_for_videolingo
 from core.tts_backend.custom_tts import custom_tts
+from core.tts_backend.minimax_tts import minimax_tts
 from core.prompts import get_correct_text_prompt
 from core.tts_backend._302_f5tts import f5_tts_for_videolingo
 from core.utils import *
@@ -64,6 +65,8 @@ def tts_main(text, save_as, number, task_df):
                 cosyvoice_tts_for_videolingo(text, save_as, number, task_df)
             elif TTS_METHOD == 'f5tts':
                 f5_tts_for_videolingo(text, save_as, number, task_df)
+            elif TTS_METHOD == 'minimax_tts':
+                minimax_tts(text, save_as)
                 
             # Check generated audio duration
             duration = get_audio_duration(save_as)
diff --git a/docs/pages/docs/start.en-US.md b/docs/pages/docs/start.en-US.md
index 8de34745..7b0a1835 100644
--- a/docs/pages/docs/start.en-US.md
+++ b/docs/pages/docs/start.en-US.md
@@ -34,9 +34,11 @@ VideoLingo provides multiple TTS integration methods. Here's a comparison (skip
 | 🎙️ SiliconFlow FishTTS | [SiliconFlow](https://cloud.siliconflow.cn/i/ttKDEsxE) | Voice Clone | Unstable cloning effect | 😃 | 😃 |
 | 🗣 Edge TTS | Local | Completely free | Average effect | 😐 | 😐 |
 | 🗣️ GPT-SoVITS | Local | Best voice cloning | Only supports Chinese/English, requires local inference, complex setup | 🏆 | 🚫 |
+| 🔊 MiniMax TTS | [MiniMax](https://www.minimax.io) | HD quality, many voices | Requires API key | 😃 | 🤩 |
 
 - For SiliconFlow FishTTS, get key from [SiliconFlow](https://cloud.siliconflow.cn/i/ttKDEsxE), note that cloning feature requires paid credits;
 - For OpenAI TTS, Azure TTS, and Fish TTS, use [302AI](https://gpt302.saaslink.net/C2oHR9) - one API key provides access to all three services
+- For MiniMax TTS, get API key from [MiniMax](https://www.minimax.io), supports `speech-2.8-hd` (recommended) and `speech-2.8-turbo` models
 > Wanna use your own TTS? Modify in `core/all_tts_functions/custom_tts.py`!
 
 <details>
diff --git a/docs/pages/docs/start.zh-CN.md b/docs/pages/docs/start.zh-CN.md
index 972c84e7..cf29b7a5 100644
--- a/docs/pages/docs/start.zh-CN.md
+++ b/docs/pages/docs/start.zh-CN.md
@@ -34,9 +34,11 @@ VideoLingo提供了多种 tts 接入方式，以下是对比（如不使用配
 | 🎙️ SiliconFlow FishTTS | [硅基流动](https://cloud.siliconflow.cn/i/ttKDEsxE) | 语音克隆 | 克隆效果不稳定 | 😃 | 😃 |
 | 🗣 Edge TTS | 本地 | 完全免费 | 效果一般 | 😐 | 😐 |
 | 🗣️ GPT-SoVITS | 本地 | 最强语音克隆 | 只支持中英文，需要本地训练推理，配置麻烦 | 🏆 | 🚫 |
+| 🔊 MiniMax TTS | [MiniMax](https://www.minimax.io) | 高清音质，多种音色 | 需要API密钥 | 😃 | 🤩 |
 
 - SiliconFlow FishTTS 请在 [硅基流动](https://cloud.siliconflow.cn/i/ttKDEsxE) 获取key，注意克隆功能需要付费充值积分；
 - OpenAI TTS、Azure TTS 和 Fish TTS，仅支持 [302AI](https://gpt302.saaslink.net/C2oHR9) - 一个 API key 即可使用所有服务
+- MiniMax TTS 请在 [MiniMax](https://www.minimax.io) 获取API密钥，支持 `speech-2.8-hd`（推荐）和 `speech-2.8-turbo` 模型
 > 现在还可以在 `core/all_tts_functions/custom_tts.py` 里自定义tts渠道！
 
 <details>
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_minimax_tts.py b/tests/test_minimax_tts.py
new file mode 100644
index 00000000..b7895643
--- /dev/null
+++ b/tests/test_minimax_tts.py
@@ -0,0 +1,272 @@
+"""Unit and integration tests for MiniMax TTS provider."""
+
+import io
+import json
+import os
+import tempfile
+import unittest
+from unittest.mock import patch, MagicMock
+
+from pydub import AudioSegment
+
+
+class TestMiniMaxTTSUnit(unittest.TestCase):
+    """Unit tests for MiniMax TTS provider (no real API calls)."""
+
+    def _make_fake_mp3_hex(self):
+        """Create a minimal valid MP3 audio and return its hex string."""
+        silence = AudioSegment.silent(duration=100)  # 100ms
+        buf = io.BytesIO()
+        silence.export(buf, format="mp3")
+        return buf.getvalue().hex()
+
+    @patch("core.tts_backend.minimax_tts.requests.post")
+    @patch("core.tts_backend.minimax_tts.load_key")
+    def test_successful_tts_generation(self, mock_load_key, mock_post):
+        """Test that minimax_tts generates a WAV file on successful API response."""
+        from core.tts_backend.minimax_tts import minimax_tts
+
+        mock_load_key.side_effect = lambda key: {
+            "minimax_tts.api_key": "test-key",
+            "minimax_tts.voice": "English_Graceful_Lady",
+            "minimax_tts.model": "speech-2.8-hd",
+        }[key]
+
+        fake_hex = self._make_fake_mp3_hex()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"data": {"audio": fake_hex}}
+        mock_response.raise_for_status = MagicMock()
+        mock_post.return_value = mock_response
+
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            save_path = f.name
+
+        try:
+            minimax_tts("Hello world", save_path)
+            self.assertTrue(os.path.exists(save_path))
+            self.assertGreater(os.path.getsize(save_path), 0)
+
+            # Verify it's a valid audio file
+            audio = AudioSegment.from_wav(save_path)
+            self.assertGreater(len(audio), 0)
+        finally:
+            if os.path.exists(save_path):
+                os.remove(save_path)
+
+    @patch("core.tts_backend.minimax_tts.requests.post")
+    @patch("core.tts_backend.minimax_tts.load_key")
+    def test_correct_api_request_params(self, mock_load_key, mock_post):
+        """Test that the API is called with correct parameters."""
+        from core.tts_backend.minimax_tts import minimax_tts
+
+        mock_load_key.side_effect = lambda key: {
+            "minimax_tts.api_key": "sk-test-123",
+            "minimax_tts.voice": "English_Insightful_Speaker",
+            "minimax_tts.model": "speech-2.8-turbo",
+        }[key]
+
+        fake_hex = self._make_fake_mp3_hex()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"data": {"audio": fake_hex}}
+        mock_response.raise_for_status = MagicMock()
+        mock_post.return_value = mock_response
+
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            save_path = f.name
+
+        try:
+            minimax_tts("Test text", save_path)
+
+            mock_post.assert_called_once()
+            call_kwargs = mock_post.call_args
+            self.assertEqual(call_kwargs.kwargs["headers"]["Authorization"], "Bearer sk-test-123")
+            payload = call_kwargs.kwargs["json"]
+            self.assertEqual(payload["model"], "speech-2.8-turbo")
+            self.assertEqual(payload["text"], "Test text")
+            self.assertEqual(payload["voice_setting"]["voice_id"], "English_Insightful_Speaker")
+            self.assertFalse(payload["stream"])
+        finally:
+            if os.path.exists(save_path):
+                os.remove(save_path)
+
+    @patch("core.tts_backend.minimax_tts.load_key")
+    def test_invalid_voice_raises_error(self, mock_load_key):
+        """Test that an invalid voice ID raises ValueError."""
+        from core.tts_backend.minimax_tts import minimax_tts
+
+        mock_load_key.side_effect = lambda key: {
+            "minimax_tts.api_key": "test-key",
+            "minimax_tts.voice": "NonExistent_Voice",
+            "minimax_tts.model": "speech-2.8-hd",
+        }[key]
+
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            save_path = f.name
+
+        try:
+            with self.assertRaises(Exception):
+                minimax_tts("Hello", save_path)
+        finally:
+            if os.path.exists(save_path):
+                os.remove(save_path)
+
+    @patch("core.tts_backend.minimax_tts.requests.post")
+    @patch("core.tts_backend.minimax_tts.load_key")
+    def test_missing_audio_data_raises_error(self, mock_load_key, mock_post):
+        """Test that missing audio data in response raises ValueError."""
+        from core.tts_backend.minimax_tts import minimax_tts
+
+        mock_load_key.side_effect = lambda key: {
+            "minimax_tts.api_key": "test-key",
+            "minimax_tts.voice": "English_Graceful_Lady",
+            "minimax_tts.model": "speech-2.8-hd",
+        }[key]
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"error": "some error"}
+        mock_response.raise_for_status = MagicMock()
+        mock_post.return_value = mock_response
+
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            save_path = f.name
+
+        try:
+            with self.assertRaises(Exception):
+                minimax_tts("Hello", save_path)
+        finally:
+            if os.path.exists(save_path):
+                os.remove(save_path)
+
+    @patch("core.tts_backend.minimax_tts.requests.post")
+    @patch("core.tts_backend.minimax_tts.load_key")
+    def test_creates_parent_directories(self, mock_load_key, mock_post):
+        """Test that parent directories are created if they don't exist."""
+        from core.tts_backend.minimax_tts import minimax_tts
+
+        mock_load_key.side_effect = lambda key: {
+            "minimax_tts.api_key": "test-key",
+            "minimax_tts.voice": "English_Graceful_Lady",
+            "minimax_tts.model": "speech-2.8-hd",
+        }[key]
+
+        fake_hex = self._make_fake_mp3_hex()
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"data": {"audio": fake_hex}}
+        mock_response.raise_for_status = MagicMock()
+        mock_post.return_value = mock_response
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            save_path = os.path.join(tmpdir, "nested", "dir", "output.wav")
+            minimax_tts("Hello", save_path)
+            self.assertTrue(os.path.exists(save_path))
+
+    def test_voice_list_not_empty(self):
+        """Test that VOICE_LIST contains expected voices."""
+        from core.tts_backend.minimax_tts import VOICE_LIST
+        self.assertGreater(len(VOICE_LIST), 0)
+        self.assertIn("English_Graceful_Lady", VOICE_LIST)
+        self.assertIn("Wise_Woman", VOICE_LIST)
+
+    def test_model_list_not_empty(self):
+        """Test that MODEL_LIST contains expected models."""
+        from core.tts_backend.minimax_tts import MODEL_LIST
+        self.assertIn("speech-2.8-hd", MODEL_LIST)
+        self.assertIn("speech-2.8-turbo", MODEL_LIST)
+
+    def test_base_url_uses_minimax_io(self):
+        """Test that default base URL points to api.minimax.io."""
+        from core.tts_backend.minimax_tts import BASE_URL
+        self.assertTrue(BASE_URL.startswith("https://api.minimax.io"))
+
+
+class TestMiniMaxTTSIntegration(unittest.TestCase):
+    """Integration tests that call the real MiniMax TTS API.
+
+    Requires MINIMAX_API_KEY environment variable.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        cls.api_key = os.environ.get("MINIMAX_API_KEY")
+        if not cls.api_key:
+            raise unittest.SkipTest("MINIMAX_API_KEY not set, skipping integration tests")
+
+    @patch("core.tts_backend.minimax_tts.load_key")
+    def test_real_tts_generation(self, mock_load_key):
+        """Test TTS generation with a real API call."""
+        from core.tts_backend.minimax_tts import minimax_tts
+
+        mock_load_key.side_effect = lambda key: {
+            "minimax_tts.api_key": self.api_key,
+            "minimax_tts.voice": "English_Graceful_Lady",
+            "minimax_tts.model": "speech-2.8-hd",
+        }[key]
+
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            save_path = f.name
+
+        try:
+            minimax_tts("Hello! Welcome to VideoLingo.", save_path)
+            self.assertTrue(os.path.exists(save_path))
+            self.assertGreater(os.path.getsize(save_path), 1000)
+
+            # Verify it's valid audio with reasonable duration
+            audio = AudioSegment.from_wav(save_path)
+            duration_ms = len(audio)
+            self.assertGreater(duration_ms, 500)  # At least 0.5 seconds
+            self.assertLess(duration_ms, 30000)  # Less than 30 seconds
+        finally:
+            if os.path.exists(save_path):
+                os.remove(save_path)
+
+    @patch("core.tts_backend.minimax_tts.load_key")
+    def test_real_tts_chinese_text(self, mock_load_key):
+        """Test TTS generation with Chinese text using Wise_Woman voice."""
+        from core.tts_backend.minimax_tts import minimax_tts
+
+        mock_load_key.side_effect = lambda key: {
+            "minimax_tts.api_key": self.api_key,
+            "minimax_tts.voice": "Wise_Woman",
+            "minimax_tts.model": "speech-2.8-hd",
+        }[key]
+
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            save_path = f.name
+
+        try:
+            minimax_tts("你好！欢迎使用VideoLingo。", save_path)
+            self.assertTrue(os.path.exists(save_path))
+            self.assertGreater(os.path.getsize(save_path), 1000)
+        finally:
+            if os.path.exists(save_path):
+                os.remove(save_path)
+
+    @patch("core.tts_backend.minimax_tts.load_key")
+    def test_real_tts_turbo_model(self, mock_load_key):
+        """Test TTS generation with speech-2.8-turbo model."""
+        from core.tts_backend.minimax_tts import minimax_tts
+
+        mock_load_key.side_effect = lambda key: {
+            "minimax_tts.api_key": self.api_key,
+            "minimax_tts.voice": "English_Insightful_Speaker",
+            "minimax_tts.model": "speech-2.8-turbo",
+        }[key]
+
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            save_path = f.name
+
+        try:
+            minimax_tts("This is a test with the turbo model.", save_path)
+            self.assertTrue(os.path.exists(save_path))
+            self.assertGreater(os.path.getsize(save_path), 1000)
+        finally:
+            if os.path.exists(save_path):
+                os.remove(save_path)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/translations/en.json b/translations/en.json
index fb216bce..f808a52c 100644
--- a/translations/en.json
+++ b/translations/en.json
@@ -106,5 +106,20 @@
     "No NVIDIA GPU detected": "No NVIDIA GPU detected",
     "No NVIDIA GPU detected or NVIDIA drivers not properly installed": "No NVIDIA GPU detected or NVIDIA drivers not properly installed",
     "LLM JSON Format Support": "LLM JSON Format Support",
-    "Enable if your LLM supports JSON mode output": "Enable if your LLM supports JSON mode output"
+    "Enable if your LLM supports JSON mode output": "Enable if your LLM supports JSON mode output",
+    "MiniMax API Key": "MiniMax API Key",
+    "MiniMax Voice": "MiniMax Voice",
+    "MiniMax TTS Model": "MiniMax TTS Model",
+    "English Female, Graceful": "English Female, Graceful",
+    "English Male, Insightful": "English Male, Insightful",
+    "English Female, Radiant": "English Female, Radiant",
+    "English Male, Persuasive": "English Male, Persuasive",
+    "English, Robot": "English, Robot",
+    "Female, Wise": "Female, Wise",
+    "Friendly": "Friendly",
+    "Female, Inspirational": "Female, Inspirational",
+    "Male, Deep Voice": "Male, Deep Voice",
+    "Female, Sweet": "Female, Sweet",
+    "Male, Cute": "Male, Cute",
+    "Female, Lovely": "Female, Lovely"
 }
diff --git a/translations/es.json b/translations/es.json
index 15fadf5f..ffbc9994 100644
--- a/translations/es.json
+++ b/translations/es.json
@@ -106,5 +106,20 @@
     "No NVIDIA GPU detected": "No se detectó GPU NVIDIA",
     "No NVIDIA GPU detected or NVIDIA drivers not properly installed": "No se detectó GPU NVIDIA o los controladores NVIDIA no están instalados correctamente",
     "LLM JSON Format Support": "Soporte de formato JSON para LLM",
-    "Enable if your LLM supports JSON mode output": "Activar si su LLM admite salida en modo JSON"
+    "Enable if your LLM supports JSON mode output": "Activar si su LLM admite salida en modo JSON",
+    "MiniMax API Key": "Clave API de MiniMax",
+    "MiniMax Voice": "Voz de MiniMax",
+    "MiniMax TTS Model": "Modelo TTS de MiniMax",
+    "English Female, Graceful": "Femenina inglesa, elegante",
+    "English Male, Insightful": "Masculino inglés, perspicaz",
+    "English Female, Radiant": "Femenina inglesa, radiante",
+    "English Male, Persuasive": "Masculino inglés, persuasivo",
+    "English, Robot": "Inglés, robot",
+    "Female, Wise": "Femenina, sabia",
+    "Friendly": "Amigable",
+    "Female, Inspirational": "Femenina, inspiradora",
+    "Male, Deep Voice": "Masculino, voz profunda",
+    "Female, Sweet": "Femenina, dulce",
+    "Male, Cute": "Masculino, adorable",
+    "Female, Lovely": "Femenina, encantadora"
 }
diff --git a/translations/fr.json b/translations/fr.json
index 446f3bbe..eeeba646 100644
--- a/translations/fr.json
+++ b/translations/fr.json
@@ -106,5 +106,20 @@
     "No NVIDIA GPU detected": "Aucun GPU NVIDIA détecté",
     "No NVIDIA GPU detected or NVIDIA drivers not properly installed": "Aucun GPU NVIDIA détecté ou pilotes NVIDIA mal installés",
     "LLM JSON Format Support": "Support du format JSON pour LLM",
-    "Enable if your LLM supports JSON mode output": "Activer si votre LLM prend en charge la sortie en mode JSON"
+    "Enable if your LLM supports JSON mode output": "Activer si votre LLM prend en charge la sortie en mode JSON",
+    "MiniMax API Key": "Clé API MiniMax",
+    "MiniMax Voice": "Voix MiniMax",
+    "MiniMax TTS Model": "Modèle TTS MiniMax",
+    "English Female, Graceful": "Femme anglaise, gracieuse",
+    "English Male, Insightful": "Homme anglais, perspicace",
+    "English Female, Radiant": "Femme anglaise, radieuse",
+    "English Male, Persuasive": "Homme anglais, persuasif",
+    "English, Robot": "Anglais, robot",
+    "Female, Wise": "Femme, sage",
+    "Friendly": "Amical",
+    "Female, Inspirational": "Femme, inspirante",
+    "Male, Deep Voice": "Homme, voix grave",
+    "Female, Sweet": "Femme, douce",
+    "Male, Cute": "Homme, mignon",
+    "Female, Lovely": "Femme, charmante"
 }
diff --git a/translations/ja.json b/translations/ja.json
index 1bce1783..2a16ec4b 100644
--- a/translations/ja.json
+++ b/translations/ja.json
@@ -106,5 +106,20 @@
     "No NVIDIA GPU detected": "NVIDIA GPUが検出されません",
     "No NVIDIA GPU detected or NVIDIA drivers not properly installed": "NVIDIA GPUが検出されないか、NVIDIAドライバーが正しくインストールされていません",
     "LLM JSON Format Support": "LLM JSON形式サポート",
-    "Enable if your LLM supports JSON mode output": "LLMがJSON出力モードをサポートしている場合に有効化"
+    "Enable if your LLM supports JSON mode output": "LLMがJSON出力モードをサポートしている場合に有効化",
+    "MiniMax API Key": "MiniMax APIキー",
+    "MiniMax Voice": "MiniMax 音声",
+    "MiniMax TTS Model": "MiniMax TTSモデル",
+    "English Female, Graceful": "英語女性、優雅",
+    "English Male, Insightful": "英語男性、洞察力",
+    "English Female, Radiant": "英語女性、明るい",
+    "English Male, Persuasive": "英語男性、説得力",
+    "English, Robot": "英語、ロボット",
+    "Female, Wise": "女性、賢い",
+    "Friendly": "フレンドリー",
+    "Female, Inspirational": "女性、インスピレーション",
+    "Male, Deep Voice": "男性、深い声",
+    "Female, Sweet": "女性、甘い",
+    "Male, Cute": "男性、可愛い",
+    "Female, Lovely": "女性、愛らしい"
 }
diff --git a/translations/ru.json b/translations/ru.json
index 07ba21cb..8f02a57c 100644
--- a/translations/ru.json
+++ b/translations/ru.json
@@ -106,5 +106,20 @@
     "No NVIDIA GPU detected": "GPU NVIDIA не обнаружен",
     "No NVIDIA GPU detected or NVIDIA drivers not properly installed": "GPU NVIDIA не обнаружен или драйверы NVIDIA установлены неправильно",
     "LLM JSON Format Support": "Поддержка формата JSON для LLM",
-    "Enable if your LLM supports JSON mode output": "Включите, если ваш LLM поддерживает вывод в формате JSON"
+    "Enable if your LLM supports JSON mode output": "Включите, если ваш LLM поддерживает вывод в формате JSON",
+    "MiniMax API Key": "API-ключ MiniMax",
+    "MiniMax Voice": "Голос MiniMax",
+    "MiniMax TTS Model": "Модель TTS MiniMax",
+    "English Female, Graceful": "Английский женский, грациозный",
+    "English Male, Insightful": "Английский мужской, проницательный",
+    "English Female, Radiant": "Английский женский, сияющий",
+    "English Male, Persuasive": "Английский мужской, убедительный",
+    "English, Robot": "Английский, робот",
+    "Female, Wise": "Женский, мудрый",
+    "Friendly": "Дружелюбный",
+    "Female, Inspirational": "Женский, вдохновляющий",
+    "Male, Deep Voice": "Мужской, глубокий голос",
+    "Female, Sweet": "Женский, сладкий",
+    "Male, Cute": "Мужской, милый",
+    "Female, Lovely": "Женский, прелестный"
 }
diff --git a/translations/zh-CN.json b/translations/zh-CN.json
index 04596db6..2abf5470 100644
--- a/translations/zh-CN.json
+++ b/translations/zh-CN.json
@@ -106,6 +106,21 @@
     "No NVIDIA GPU detected": "未检测到NVIDIA GPU",
     "No NVIDIA GPU detected or NVIDIA drivers not properly installed": "未检测到NVIDIA GPU或NVIDIA驱动未正确安装",
     "LLM JSON Format Support": "LLM JSON格式支持",
-    "Enable if your LLM supports JSON mode output": "如果选用的LLM支持JSON模式输出，请启用"
+    "Enable if your LLM supports JSON mode output": "如果选用的LLM支持JSON模式输出，请启用",
+    "MiniMax API Key": "MiniMax API密钥",
+    "MiniMax Voice": "MiniMax 语音",
+    "MiniMax TTS Model": "MiniMax TTS模型",
+    "English Female, Graceful": "英语女声，优雅",
+    "English Male, Insightful": "英语男声，沉稳",
+    "English Female, Radiant": "英语女声，活泼",
+    "English Male, Persuasive": "英语男声，有说服力",
+    "English, Robot": "英语，机器人",
+    "Female, Wise": "女声，智慧",
+    "Friendly": "友善",
+    "Female, Inspirational": "女声，励志",
+    "Male, Deep Voice": "男声，低沉",
+    "Female, Sweet": "女声，甜美",
+    "Male, Cute": "男声，可爱",
+    "Female, Lovely": "女声，可爱"
 }
 
diff --git a/translations/zh-HK.json b/translations/zh-HK.json
index cedc73f6..053c859a 100644
--- a/translations/zh-HK.json
+++ b/translations/zh-HK.json
@@ -106,5 +106,20 @@
     "No NVIDIA GPU detected": "未檢測到NVIDIA GPU",
     "No NVIDIA GPU detected or NVIDIA drivers not properly installed": "未檢測到NVIDIA GPU或NVIDIA驅動未正確安裝",
     "LLM JSON Format Support": "LLM JSON格式支持",
-    "Enable if your LLM supports JSON mode output": "如果選用的LLM支持JSON模式輸出，請啟用"
+    "Enable if your LLM supports JSON mode output": "如果選用的LLM支持JSON模式輸出，請啟用",
+    "MiniMax API Key": "MiniMax API密鑰",
+    "MiniMax Voice": "MiniMax 語音",
+    "MiniMax TTS Model": "MiniMax TTS模型",
+    "English Female, Graceful": "英語女聲，優雅",
+    "English Male, Insightful": "英語男聲，沉穩",
+    "English Female, Radiant": "英語女聲，活潑",
+    "English Male, Persuasive": "英語男聲，有說服力",
+    "English, Robot": "英語，機器人",
+    "Female, Wise": "女聲，智慧",
+    "Friendly": "友善",
+    "Female, Inspirational": "女聲，勵志",
+    "Male, Deep Voice": "男聲，低沉",
+    "Female, Sweet": "女聲，甜美",
+    "Male, Cute": "男聲，可愛",
+    "Female, Lovely": "女聲，可愛"
 }