Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ reflect_translate: true
pause_before_translate: false

## ======================== Dubbing Settings ======================== ##
# TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts]
# TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts, minimax_tts]
tts_method: 'azure_tts'

# SiliconFlow FishTTS
Expand Down Expand Up @@ -119,6 +119,12 @@ gpt_sovits:
f5tts:
302_api: 'YOUR_302_API_KEY'

# MiniMax TTS configuration, get API key from https://www.minimax.io
minimax_tts:
api_key: 'YOUR_MINIMAX_API_KEY'
model: 'speech-2.8-hd'
voice: 'English_Graceful_Lady'

# *Audio speed range
speed_factor:
min: 1
Expand Down
37 changes: 36 additions & 1 deletion core/st_utils/sidebar_setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def page_setting():
update_key("burn_subtitles", burn_subtitles)
st.rerun()
with st.expander(t("Dubbing Settings"), expanded=True):
tts_methods = ["azure_tts", "openai_tts", "fish_tts", "sf_fish_tts", "edge_tts", "gpt_sovits", "custom_tts", "sf_cosyvoice2", "f5tts"]
tts_methods = ["azure_tts", "openai_tts", "fish_tts", "sf_fish_tts", "edge_tts", "gpt_sovits", "custom_tts", "sf_cosyvoice2", "f5tts", "minimax_tts"]
select_tts = st.selectbox(t("TTS Method"), options=tts_methods, index=tts_methods.index(load_key("tts_method")))
if select_tts != load_key("tts_method"):
update_key("tts_method", select_tts)
Expand Down Expand Up @@ -151,6 +151,41 @@ def page_setting():

elif select_tts == "f5tts":
config_input("302ai API", "f5tts.302_api")

elif select_tts == "minimax_tts":
config_input(t("MiniMax API Key"), "minimax_tts.api_key")
minimax_voices = {
"English_Graceful_Lady": t("English Female, Graceful"),
"English_Insightful_Speaker": t("English Male, Insightful"),
"English_radiant_girl": t("English Female, Radiant"),
"English_Persuasive_Man": t("English Male, Persuasive"),
"English_Lucky_Robot": t("English, Robot"),
"Wise_Woman": t("Female, Wise"),
"Friendly_Person": t("Friendly"),
"Inspirational_girl": t("Female, Inspirational"),
"Deep_Voice_Man": t("Male, Deep Voice"),
"sweet_girl": t("Female, Sweet"),
"cute_boy": t("Male, Cute"),
"lovely_girl": t("Female, Lovely"),
}
selected_voice = st.selectbox(
t("MiniMax Voice"),
options=list(minimax_voices.keys()),
format_func=lambda x: minimax_voices[x],
index=list(minimax_voices.keys()).index(load_key("minimax_tts.voice")) if load_key("minimax_tts.voice") in minimax_voices else 0
)
if selected_voice != load_key("minimax_tts.voice"):
update_key("minimax_tts.voice", selected_voice)
st.rerun()
minimax_models = ["speech-2.8-hd", "speech-2.8-turbo"]
selected_model = st.selectbox(
t("MiniMax TTS Model"),
options=minimax_models,
index=minimax_models.index(load_key("minimax_tts.model")) if load_key("minimax_tts.model") in minimax_models else 0
)
if selected_model != load_key("minimax_tts.model"):
update_key("minimax_tts.model", selected_model)
st.rerun()

def check_api():
try:
Expand Down
79 changes: 79 additions & 0 deletions core/tts_backend/minimax_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import io
import requests
from pathlib import Path
from pydub import AudioSegment
from core.utils import load_key, except_handler

BASE_URL = "https://api.minimax.io/v1/t2a_v2"
BASE_URL_CN = "https://api.minimaxi.com/v1/t2a_v2"

VOICE_LIST = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"Wise_Woman",
"Friendly_Person",
"Inspirational_girl",
"Deep_Voice_Man",
"sweet_girl",
"cute_boy",
"lovely_girl",
]

MODEL_LIST = ["speech-2.8-hd", "speech-2.8-turbo"]

# refer to: https://platform.minimax.io/docs/api-reference/speech-t2a-http

@except_handler("Failed to generate audio using MiniMax TTS", retry=3, delay=1)
def minimax_tts(text, save_path):
API_KEY = load_key("minimax_tts.api_key")
voice = load_key("minimax_tts.voice")
model = load_key("minimax_tts.model")

if voice not in VOICE_LIST:
raise ValueError(f"Invalid voice: {voice}. Please choose from {VOICE_LIST}")

payload = {
"model": model,
"text": text,
"stream": False,
"voice_setting": {
"voice_id": voice,
"speed": 1.0,
"vol": 1.0,
"pitch": 0,
},
"audio_setting": {
"format": "mp3",
"sample_rate": 32000,
},
}

headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
}

response = requests.post(BASE_URL, headers=headers, json=payload, timeout=60)
response.raise_for_status()

result = response.json()
if "data" not in result or "audio" not in result["data"]:
raise ValueError(f"Unexpected API response: {result}")

audio_hex = result["data"]["audio"]
audio_bytes = bytes.fromhex(audio_hex)

# Convert mp3 to wav using pydub
speech_file_path = Path(save_path)
speech_file_path.parent.mkdir(parents=True, exist_ok=True)

audio = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
audio.export(save_path, format="wav")
print(f"Audio saved to {speech_file_path}")


if __name__ == "__main__":
minimax_tts("Hi! Welcome to VideoLingo!", "test.wav")
3 changes: 3 additions & 0 deletions core/tts_backend/tts_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from core.tts_backend.edge_tts import edge_tts
from core.tts_backend.sf_cosyvoice2 import cosyvoice_tts_for_videolingo
from core.tts_backend.custom_tts import custom_tts
from core.tts_backend.minimax_tts import minimax_tts
from core.prompts import get_correct_text_prompt
from core.tts_backend._302_f5tts import f5_tts_for_videolingo
from core.utils import *
Expand Down Expand Up @@ -64,6 +65,8 @@ def tts_main(text, save_as, number, task_df):
cosyvoice_tts_for_videolingo(text, save_as, number, task_df)
elif TTS_METHOD == 'f5tts':
f5_tts_for_videolingo(text, save_as, number, task_df)
elif TTS_METHOD == 'minimax_tts':
minimax_tts(text, save_as)

# Check generated audio duration
duration = get_audio_duration(save_as)
Expand Down
2 changes: 2 additions & 0 deletions docs/pages/docs/start.en-US.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ VideoLingo provides multiple TTS integration methods. Here's a comparison (skip
| 🎙️ SiliconFlow FishTTS | [SiliconFlow](https://cloud.siliconflow.cn/i/ttKDEsxE) | Voice Clone | Unstable cloning effect | 😃 | 😃 |
| 🗣 Edge TTS | Local | Completely free | Average effect | 😐 | 😐 |
| 🗣️ GPT-SoVITS | Local | Best voice cloning | Only supports Chinese/English, requires local inference, complex setup | 🏆 | 🚫 |
| 🔊 MiniMax TTS | [MiniMax](https://www.minimax.io) | HD quality, many voices | Requires API key | 😃 | 🤩 |

- For SiliconFlow FishTTS, get key from [SiliconFlow](https://cloud.siliconflow.cn/i/ttKDEsxE), note that cloning feature requires paid credits;
- For OpenAI TTS, Azure TTS, and Fish TTS, use [302AI](https://gpt302.saaslink.net/C2oHR9) - one API key provides access to all three services
- For MiniMax TTS, get API key from [MiniMax](https://www.minimax.io), supports `speech-2.8-hd` (recommended) and `speech-2.8-turbo` models
> Wanna use your own TTS? Modify in `core/all_tts_functions/custom_tts.py`!

<details>
Expand Down
2 changes: 2 additions & 0 deletions docs/pages/docs/start.zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ VideoLingo提供了多种 tts 接入方式,以下是对比(如不使用配
| 🎙️ SiliconFlow FishTTS | [硅基流动](https://cloud.siliconflow.cn/i/ttKDEsxE) | 语音克隆 | 克隆效果不稳定 | 😃 | 😃 |
| 🗣 Edge TTS | 本地 | 完全免费 | 效果一般 | 😐 | 😐 |
| 🗣️ GPT-SoVITS | 本地 | 最强语音克隆 | 只支持中英文,需要本地训练推理,配置麻烦 | 🏆 | 🚫 |
| 🔊 MiniMax TTS | [MiniMax](https://www.minimax.io) | 高清音质,多种音色 | 需要API密钥 | 😃 | 🤩 |

- SiliconFlow FishTTS 请在 [硅基流动](https://cloud.siliconflow.cn/i/ttKDEsxE) 获取key,注意克隆功能需要付费充值积分;
- OpenAI TTS、Azure TTS 和 Fish TTS,仅支持 [302AI](https://gpt302.saaslink.net/C2oHR9) - 一个 API key 即可使用所有服务
- MiniMax TTS 请在 [MiniMax](https://www.minimax.io) 获取API密钥,支持 `speech-2.8-hd`(推荐)和 `speech-2.8-turbo` 模型
> 现在还可以在 `core/all_tts_functions/custom_tts.py` 里自定义tts渠道!

<details>
Expand Down
Empty file added tests/__init__.py
Empty file.
Loading