diff --git a/config.yaml b/config.yaml index 099b4bdb..c3389b5f 100644 --- a/config.yaml +++ b/config.yaml @@ -7,7 +7,7 @@ ## ======================== Basic Settings ======================== ## -display_language: "zh-CN" +display_language: "en" # API settings api: @@ -71,7 +71,7 @@ pause_before_translate: false ## ======================== Dubbing Settings ======================== ## # TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts] -tts_method: 'azure_tts' +tts_method: 'camb_tts' # SiliconFlow FishTTS sf_fish_tts: @@ -119,6 +119,19 @@ gpt_sovits: f5tts: 302_api: 'YOUR_302_API_KEY' +# CAMB AI TTS configuration +camb_tts: + api_key: 'YOUR_CAMB_API_KEY' + voice_id: 147320 + language: 'en-us' + model: 'mars-flash' + +# CAMB AI End-to-End Dubbing configuration +camb_dubbing: + api_key: 'YOUR_CAMB_API_KEY' + source_language: 'English (US)' + target_language: 'Spanish (Spain)' + # *Audio speed range speed_factor: min: 1 diff --git a/core/dubbing_backend/camb_dubbing.py b/core/dubbing_backend/camb_dubbing.py new file mode 100644 index 00000000..66578da1 --- /dev/null +++ b/core/dubbing_backend/camb_dubbing.py @@ -0,0 +1,107 @@ +import time +import requests +from pathlib import Path +from rich import print as rprint +from core.utils import load_key + +CAMB_API_BASE = "https://client.camb.ai/apis" + +# Numeric language IDs for dubbing/translation endpoints +CAMB_LANGUAGE_IDS = { + "English (US)": 1, + "Spanish (Spain)": 54, + "French (France)": 76, + "German": 31, + "Japanese": 88, + "Hindi": 81, + "Portuguese (Brazil)": 111, + "Chinese (Mandarin)": 139, + "Korean": 94, + "Italian": 86, + "Dutch": 115, + "Russian": 46, + "Arabic": 50, +} + +def _headers(): + api_key = load_key("camb_dubbing.api_key") + return { + "x-api-key": api_key, + "Content-Type": "application/json", + } + +def create_dubbing_task(video_url, source_language_id, target_language_ids): + """Submit a dubbing task to CAMB AI. Returns task_id.""" + payload = { + "video_url": video_url, + "source_language": source_language_id, + "target_languages": target_language_ids, + } + response = requests.post(f"{CAMB_API_BASE}/dub", headers=_headers(), json=payload) + if response.status_code != 200: + raise RuntimeError(f"CAMB dubbing task creation failed: {response.status_code} - {response.text}") + task_id = response.json()["task_id"] + rprint(f"[green]CAMB dubbing task created: {task_id}[/green]") + return task_id + +def poll_dubbing_status(task_id, poll_interval=5, timeout=600): + """Poll dubbing task until completion. Returns run_id on success.""" + elapsed = 0 + while elapsed < timeout: + response = requests.get(f"{CAMB_API_BASE}/dub/{task_id}", headers=_headers()) + if response.status_code != 200: + raise RuntimeError(f"Failed to get dubbing status: {response.status_code} - {response.text}") + data = response.json() + status = data.get("status", "UNKNOWN") + rprint(f"[cyan]Dubbing status: {status} ({elapsed}s elapsed)[/cyan]") + + if status == "SUCCESS": + return data.get("run_id") + elif status in ("ERROR", "FAILED", "TIMEOUT"): + raise RuntimeError(f"CAMB dubbing failed with status: {status}. Full response: {data}") + + time.sleep(poll_interval) + elapsed += poll_interval + + raise TimeoutError(f"CAMB dubbing timed out after {timeout}s") + +def download_dubbing_result(run_id, save_path): + """Download the dubbed video result.""" + response = requests.get(f"{CAMB_API_BASE}/dub-result/{run_id}", headers=_headers()) + if response.status_code != 200: + raise RuntimeError(f"Failed to get dubbing result: {response.status_code} - {response.text}") + + data = response.json() + video_url = data.get("video_url") + if not video_url: + raise RuntimeError(f"No video_url in dubbing result: {data}") + + # Download the actual video file from the signed URL + rprint(f"[cyan]Downloading dubbed video...[/cyan]") + video_response = requests.get(video_url) + if video_response.status_code != 200: + raise RuntimeError(f"Failed to download dubbed video: {video_response.status_code}") + + save_path = Path(save_path) + save_path.parent.mkdir(parents=True, exist_ok=True) + + with open(save_path, "wb") as f: + f.write(video_response.content) + rprint(f"[green]Dubbed video saved to {save_path} ({len(video_response.content):,} bytes)[/green]") + return str(save_path) + +def camb_dub(video_url, source_language_id, target_language_ids, save_path, poll_interval=5, timeout=600): + """End-to-end dubbing: submit, poll, download.""" + task_id = create_dubbing_task(video_url, source_language_id, target_language_ids) + run_id = poll_dubbing_status(task_id, poll_interval=poll_interval, timeout=timeout) + return download_dubbing_result(run_id, save_path) + +if __name__ == "__main__": + # Quick test - requires a publicly accessible video URL + result = camb_dub( + video_url="https://www.youtube.com/watch?v=dQw4w9WgXcQ", + source_language_id=1, + target_language_ids=[54], + save_path="test_dubbed.mp4", + ) + print(f"Result: {result}") diff --git a/core/st_utils/sidebar_setting.py b/core/st_utils/sidebar_setting.py index 5c5d24aa..1fe31b3d 100644 --- a/core/st_utils/sidebar_setting.py +++ b/core/st_utils/sidebar_setting.py @@ -84,7 +84,7 @@ def page_setting(): update_key("burn_subtitles", burn_subtitles) st.rerun() with st.expander(t("Dubbing Settings"), expanded=True): - tts_methods = ["azure_tts", "openai_tts", "fish_tts", "sf_fish_tts", "edge_tts", "gpt_sovits", "custom_tts", "sf_cosyvoice2", "f5tts"] + tts_methods = ["azure_tts", "openai_tts", "fish_tts", "sf_fish_tts", "edge_tts", "gpt_sovits", "custom_tts", "sf_cosyvoice2", "f5tts", "camb_tts"] select_tts = st.selectbox(t("TTS Method"), options=tts_methods, index=tts_methods.index(load_key("tts_method"))) if select_tts != load_key("tts_method"): update_key("tts_method", select_tts) @@ -151,7 +151,22 @@ def page_setting(): elif select_tts == "f5tts": config_input("302ai API", "f5tts.302_api") - + + elif select_tts == "camb_tts": + config_input("CAMB AI API Key", "camb_tts.api_key") + config_input(t("CAMB Voice ID"), "camb_tts.voice_id") + camb_lang_options = ["en-us", "es-es", "fr-fr", "de-de", "ja-jp", "hi-in", "pt-br", "zh-cn", "ko-kr", "it-it", "nl-nl", "ru-ru", "ar-sa"] + camb_lang = st.selectbox("CAMB Language", options=camb_lang_options, index=camb_lang_options.index(load_key("camb_tts.language")) if load_key("camb_tts.language") in camb_lang_options else 0) + if camb_lang != load_key("camb_tts.language"): + update_key("camb_tts.language", camb_lang) + st.rerun() + camb_models = ["mars-flash", "mars-pro", "mars-instruct", "mars-nano"] + camb_model = st.selectbox("CAMB Model", options=camb_models, index=camb_models.index(load_key("camb_tts.model")) if load_key("camb_tts.model") in camb_models else 0) + if camb_model != load_key("camb_tts.model"): + update_key("camb_tts.model", camb_model) + st.rerun() + + def check_api(): try: resp = ask_gpt("This is a test, response 'message':'success' in json format.", diff --git a/core/tts_backend/camb_tts.py b/core/tts_backend/camb_tts.py new file mode 100644 index 00000000..d556336f --- /dev/null +++ b/core/tts_backend/camb_tts.py @@ -0,0 +1,51 @@ +from pathlib import Path +import requests +import json +from core.utils import load_key, except_handler + +CAMB_API_BASE = "https://client.camb.ai/apis" + +def _api_key(): + return load_key("camb_tts.api_key") + +def _headers(content_type="application/json"): + h = {"x-api-key": _api_key()} + if content_type: + h["Content-Type"] = content_type + return h + +@except_handler("Failed to generate audio using CAMB AI TTS", retry=3, delay=1) +def camb_tts(text, save_path): + voice_id = int(load_key("camb_tts.voice_id")) + language = load_key("camb_tts.language") + model = load_key("camb_tts.model") + + payload = json.dumps({ + "text": text, + "voice_id": voice_id, + "language": language, + "speech_model": model, + "output_configuration": {"format": "wav"}, + }) + + speech_file_path = Path(save_path) + speech_file_path.parent.mkdir(parents=True, exist_ok=True) + + response = requests.post( + f"{CAMB_API_BASE}/tts-stream", + headers=_headers(), + data=payload, + ) + + if response.status_code == 200: + with open(speech_file_path, "wb") as f: + f.write(response.content) + print(f"Audio saved to {speech_file_path}") + else: + print(f"Error: {response.status_code}") + print(response.text) + raise RuntimeError(f"CAMB AI TTS failed: {response.status_code}") + + +if __name__ == "__main__": + camb_tts("Hi! Welcome to VideoLingo!", "test_camb.wav") diff --git a/core/tts_backend/tts_main.py b/core/tts_backend/tts_main.py index 49cc0c29..6420079d 100644 --- a/core/tts_backend/tts_main.py +++ b/core/tts_backend/tts_main.py @@ -11,6 +11,7 @@ from core.tts_backend.edge_tts import edge_tts from core.tts_backend.sf_cosyvoice2 import cosyvoice_tts_for_videolingo from core.tts_backend.custom_tts import custom_tts +from core.tts_backend.camb_tts import camb_tts from core.prompts import get_correct_text_prompt from core.tts_backend._302_f5tts import f5_tts_for_videolingo from core.utils import * @@ -64,6 +65,8 @@ def tts_main(text, save_as, number, task_df): cosyvoice_tts_for_videolingo(text, save_as, number, task_df) elif TTS_METHOD == 'f5tts': f5_tts_for_videolingo(text, save_as, number, task_df) + elif TTS_METHOD == 'camb_tts': + camb_tts(text, save_as) # Check generated audio duration duration = get_audio_duration(save_as) diff --git a/st.py b/st.py index afc75b16..4a803895 100644 --- a/st.py +++ b/st.py @@ -2,6 +2,7 @@ import os, sys from core.st_utils.imports_and_utils import * from core import * +from core.dubbing_backend.camb_dubbing import camb_dub, CAMB_LANGUAGE_IDS # SET PATH current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -105,6 +106,46 @@ def process_audio(): st.success(t("Audio processing complete! 🎇")) st.balloons() +def camb_dubbing_section(): + st.markdown(f"

{t('Dub your video in one step using CAMB AI. Handles transcription, translation, and dubbing automatically.')}

", unsafe_allow_html=True) + + camb_dub_output = "output/output_camb_dub.mp4" + + # API key + api_key = st.text_input("CAMB AI API Key", value=load_key("camb_dubbing.api_key"), key="camb_dub_api_key") + if api_key != load_key("camb_dubbing.api_key"): + update_key("camb_dubbing.api_key", api_key) + + if not os.path.exists(camb_dub_output): + video_url = st.text_input(t("Video URL (YouTube, Google Drive, or direct link)"), key="camb_dub_url") + + lang_names = list(CAMB_LANGUAGE_IDS.keys()) + c1, c2 = st.columns(2) + with c1: + source_lang = st.selectbox(t("Source Language"), options=lang_names, index=0, key="camb_dub_source_main") + with c2: + target_lang = st.selectbox(t("Target Language"), options=lang_names, index=1, key="camb_dub_target_main") + + if st.button(t("Start CAMB Dubbing"), key="camb_dubbing_button"): + if not video_url: + st.error(t("Please enter a video URL")) + return + source_id = CAMB_LANGUAGE_IDS[source_lang] + target_id = CAMB_LANGUAGE_IDS[target_lang] + with st.spinner(t("CAMB AI is dubbing your video... This may take several minutes.")): + try: + camb_dub(video_url, source_id, [target_id], camb_dub_output) + st.success(t("CAMB dubbing complete!")) + st.rerun() + except Exception as e: + st.error(f"CAMB dubbing failed: {e}") + else: + st.success(t("CAMB AI dubbing is complete!")) + st.video(camb_dub_output) + if st.button(t("Delete CAMB dubbed video"), key="delete_camb_dub"): + os.remove(camb_dub_output) + st.rerun() + def main(): logo_col, _ = st.columns([1,1]) with logo_col: @@ -116,9 +157,16 @@ def main(): with st.sidebar: page_setting() st.markdown(give_star_button, unsafe_allow_html=True) - download_video_section() - text_processing_section() - audio_processing_section() + + tab_videolingo, tab_camb = st.tabs([t("VideoLingo Pipeline"), t("CAMB AI Dubbing")]) + + with tab_videolingo: + download_video_section() + text_processing_section() + audio_processing_section() + + with tab_camb: + camb_dubbing_section() if __name__ == "__main__": main()