Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

## ======================== Basic Settings ======================== ##

display_language: "zh-CN"
display_language: "en"

# API settings
api:
Expand Down Expand Up @@ -71,7 +71,7 @@ pause_before_translate: false

## ======================== Dubbing Settings ======================== ##
# TTS selection [sf_fish_tts, openai_tts, gpt_sovits, azure_tts, fish_tts, edge_tts, custom_tts]
tts_method: 'azure_tts'
tts_method: 'camb_tts'

# SiliconFlow FishTTS
sf_fish_tts:
Expand Down Expand Up @@ -119,6 +119,19 @@ gpt_sovits:
f5tts:
302_api: 'YOUR_302_API_KEY'

# CAMB AI TTS configuration
camb_tts:
api_key: 'YOUR_CAMB_API_KEY'
voice_id: 147320
language: 'en-us'
model: 'mars-flash'

# CAMB AI End-to-End Dubbing configuration
camb_dubbing:
api_key: 'YOUR_CAMB_API_KEY'
source_language: 'English (US)'
target_language: 'Spanish (Spain)'

# *Audio speed range
speed_factor:
min: 1
Expand Down
107 changes: 107 additions & 0 deletions core/dubbing_backend/camb_dubbing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import time
import requests
from pathlib import Path
from rich import print as rprint
from core.utils import load_key

CAMB_API_BASE = "https://client.camb.ai/apis"

# Numeric language IDs for dubbing/translation endpoints
CAMB_LANGUAGE_IDS = {
"English (US)": 1,
"Spanish (Spain)": 54,
"French (France)": 76,
"German": 31,
"Japanese": 88,
"Hindi": 81,
"Portuguese (Brazil)": 111,
"Chinese (Mandarin)": 139,
"Korean": 94,
"Italian": 86,
"Dutch": 115,
"Russian": 46,
"Arabic": 50,
}

def _headers():
api_key = load_key("camb_dubbing.api_key")
return {
"x-api-key": api_key,
"Content-Type": "application/json",
}

def create_dubbing_task(video_url, source_language_id, target_language_ids):
"""Submit a dubbing task to CAMB AI. Returns task_id."""
payload = {
"video_url": video_url,
"source_language": source_language_id,
"target_languages": target_language_ids,
}
response = requests.post(f"{CAMB_API_BASE}/dub", headers=_headers(), json=payload)
if response.status_code != 200:
raise RuntimeError(f"CAMB dubbing task creation failed: {response.status_code} - {response.text}")
task_id = response.json()["task_id"]
rprint(f"[green]CAMB dubbing task created: {task_id}[/green]")
return task_id

def poll_dubbing_status(task_id, poll_interval=5, timeout=600):
"""Poll dubbing task until completion. Returns run_id on success."""
elapsed = 0
while elapsed < timeout:
response = requests.get(f"{CAMB_API_BASE}/dub/{task_id}", headers=_headers())
if response.status_code != 200:
raise RuntimeError(f"Failed to get dubbing status: {response.status_code} - {response.text}")
data = response.json()
status = data.get("status", "UNKNOWN")
rprint(f"[cyan]Dubbing status: {status} ({elapsed}s elapsed)[/cyan]")

if status == "SUCCESS":
return data.get("run_id")
elif status in ("ERROR", "FAILED", "TIMEOUT"):
raise RuntimeError(f"CAMB dubbing failed with status: {status}. Full response: {data}")

time.sleep(poll_interval)
elapsed += poll_interval

raise TimeoutError(f"CAMB dubbing timed out after {timeout}s")

def download_dubbing_result(run_id, save_path):
"""Download the dubbed video result."""
response = requests.get(f"{CAMB_API_BASE}/dub-result/{run_id}", headers=_headers())
if response.status_code != 200:
raise RuntimeError(f"Failed to get dubbing result: {response.status_code} - {response.text}")

data = response.json()
video_url = data.get("video_url")
if not video_url:
raise RuntimeError(f"No video_url in dubbing result: {data}")

# Download the actual video file from the signed URL
rprint(f"[cyan]Downloading dubbed video...[/cyan]")
video_response = requests.get(video_url)
if video_response.status_code != 200:
raise RuntimeError(f"Failed to download dubbed video: {video_response.status_code}")

save_path = Path(save_path)
save_path.parent.mkdir(parents=True, exist_ok=True)

with open(save_path, "wb") as f:
f.write(video_response.content)
rprint(f"[green]Dubbed video saved to {save_path} ({len(video_response.content):,} bytes)[/green]")
return str(save_path)

def camb_dub(video_url, source_language_id, target_language_ids, save_path, poll_interval=5, timeout=600):
"""End-to-end dubbing: submit, poll, download."""
task_id = create_dubbing_task(video_url, source_language_id, target_language_ids)
run_id = poll_dubbing_status(task_id, poll_interval=poll_interval, timeout=timeout)
return download_dubbing_result(run_id, save_path)

if __name__ == "__main__":
# Quick test - requires a publicly accessible video URL
result = camb_dub(
video_url="https://www.youtube.com/watch?v=dQw4w9WgXcQ",
source_language_id=1,
target_language_ids=[54],
save_path="test_dubbed.mp4",
)
print(f"Result: {result}")
19 changes: 17 additions & 2 deletions core/st_utils/sidebar_setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def page_setting():
update_key("burn_subtitles", burn_subtitles)
st.rerun()
with st.expander(t("Dubbing Settings"), expanded=True):
tts_methods = ["azure_tts", "openai_tts", "fish_tts", "sf_fish_tts", "edge_tts", "gpt_sovits", "custom_tts", "sf_cosyvoice2", "f5tts"]
tts_methods = ["azure_tts", "openai_tts", "fish_tts", "sf_fish_tts", "edge_tts", "gpt_sovits", "custom_tts", "sf_cosyvoice2", "f5tts", "camb_tts"]
select_tts = st.selectbox(t("TTS Method"), options=tts_methods, index=tts_methods.index(load_key("tts_method")))
if select_tts != load_key("tts_method"):
update_key("tts_method", select_tts)
Expand Down Expand Up @@ -151,7 +151,22 @@ def page_setting():

elif select_tts == "f5tts":
config_input("302ai API", "f5tts.302_api")


elif select_tts == "camb_tts":
config_input("CAMB AI API Key", "camb_tts.api_key")
config_input(t("CAMB Voice ID"), "camb_tts.voice_id")
camb_lang_options = ["en-us", "es-es", "fr-fr", "de-de", "ja-jp", "hi-in", "pt-br", "zh-cn", "ko-kr", "it-it", "nl-nl", "ru-ru", "ar-sa"]
camb_lang = st.selectbox("CAMB Language", options=camb_lang_options, index=camb_lang_options.index(load_key("camb_tts.language")) if load_key("camb_tts.language") in camb_lang_options else 0)
if camb_lang != load_key("camb_tts.language"):
update_key("camb_tts.language", camb_lang)
st.rerun()
camb_models = ["mars-flash", "mars-pro", "mars-instruct", "mars-nano"]
camb_model = st.selectbox("CAMB Model", options=camb_models, index=camb_models.index(load_key("camb_tts.model")) if load_key("camb_tts.model") in camb_models else 0)
if camb_model != load_key("camb_tts.model"):
update_key("camb_tts.model", camb_model)
st.rerun()


def check_api():
try:
resp = ask_gpt("This is a test, response 'message':'success' in json format.",
Expand Down
51 changes: 51 additions & 0 deletions core/tts_backend/camb_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from pathlib import Path
import requests
import json
from core.utils import load_key, except_handler

CAMB_API_BASE = "https://client.camb.ai/apis"

def _api_key():
return load_key("camb_tts.api_key")

def _headers(content_type="application/json"):
h = {"x-api-key": _api_key()}
if content_type:
h["Content-Type"] = content_type
return h

@except_handler("Failed to generate audio using CAMB AI TTS", retry=3, delay=1)
def camb_tts(text, save_path):
voice_id = int(load_key("camb_tts.voice_id"))
language = load_key("camb_tts.language")
model = load_key("camb_tts.model")

payload = json.dumps({
"text": text,
"voice_id": voice_id,
"language": language,
"speech_model": model,
"output_configuration": {"format": "wav"},
})

speech_file_path = Path(save_path)
speech_file_path.parent.mkdir(parents=True, exist_ok=True)

response = requests.post(
f"{CAMB_API_BASE}/tts-stream",
headers=_headers(),
data=payload,
)

if response.status_code == 200:
with open(speech_file_path, "wb") as f:
f.write(response.content)
print(f"Audio saved to {speech_file_path}")
else:
print(f"Error: {response.status_code}")
print(response.text)
raise RuntimeError(f"CAMB AI TTS failed: {response.status_code}")


if __name__ == "__main__":
camb_tts("Hi! Welcome to VideoLingo!", "test_camb.wav")
3 changes: 3 additions & 0 deletions core/tts_backend/tts_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from core.tts_backend.edge_tts import edge_tts
from core.tts_backend.sf_cosyvoice2 import cosyvoice_tts_for_videolingo
from core.tts_backend.custom_tts import custom_tts
from core.tts_backend.camb_tts import camb_tts
from core.prompts import get_correct_text_prompt
from core.tts_backend._302_f5tts import f5_tts_for_videolingo
from core.utils import *
Expand Down Expand Up @@ -64,6 +65,8 @@ def tts_main(text, save_as, number, task_df):
cosyvoice_tts_for_videolingo(text, save_as, number, task_df)
elif TTS_METHOD == 'f5tts':
f5_tts_for_videolingo(text, save_as, number, task_df)
elif TTS_METHOD == 'camb_tts':
camb_tts(text, save_as)

# Check generated audio duration
duration = get_audio_duration(save_as)
Expand Down
54 changes: 51 additions & 3 deletions st.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os, sys
from core.st_utils.imports_and_utils import *
from core import *
from core.dubbing_backend.camb_dubbing import camb_dub, CAMB_LANGUAGE_IDS

# SET PATH
current_dir = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -105,6 +106,46 @@ def process_audio():
st.success(t("Audio processing complete! πŸŽ‡"))
st.balloons()

def camb_dubbing_section():
st.markdown(f"<p style='font-size: 20px;'>{t('Dub your video in one step using CAMB AI. Handles transcription, translation, and dubbing automatically.')}</p>", unsafe_allow_html=True)

camb_dub_output = "output/output_camb_dub.mp4"

# API key
api_key = st.text_input("CAMB AI API Key", value=load_key("camb_dubbing.api_key"), key="camb_dub_api_key")
if api_key != load_key("camb_dubbing.api_key"):
update_key("camb_dubbing.api_key", api_key)

if not os.path.exists(camb_dub_output):
video_url = st.text_input(t("Video URL (YouTube, Google Drive, or direct link)"), key="camb_dub_url")

lang_names = list(CAMB_LANGUAGE_IDS.keys())
c1, c2 = st.columns(2)
with c1:
source_lang = st.selectbox(t("Source Language"), options=lang_names, index=0, key="camb_dub_source_main")
with c2:
target_lang = st.selectbox(t("Target Language"), options=lang_names, index=1, key="camb_dub_target_main")

if st.button(t("Start CAMB Dubbing"), key="camb_dubbing_button"):
if not video_url:
st.error(t("Please enter a video URL"))
return
source_id = CAMB_LANGUAGE_IDS[source_lang]
target_id = CAMB_LANGUAGE_IDS[target_lang]
with st.spinner(t("CAMB AI is dubbing your video... This may take several minutes.")):
try:
camb_dub(video_url, source_id, [target_id], camb_dub_output)
st.success(t("CAMB dubbing complete!"))
st.rerun()
except Exception as e:
st.error(f"CAMB dubbing failed: {e}")
else:
st.success(t("CAMB AI dubbing is complete!"))
st.video(camb_dub_output)
if st.button(t("Delete CAMB dubbed video"), key="delete_camb_dub"):
os.remove(camb_dub_output)
st.rerun()

def main():
logo_col, _ = st.columns([1,1])
with logo_col:
Expand All @@ -116,9 +157,16 @@ def main():
with st.sidebar:
page_setting()
st.markdown(give_star_button, unsafe_allow_html=True)
download_video_section()
text_processing_section()
audio_processing_section()

tab_videolingo, tab_camb = st.tabs([t("VideoLingo Pipeline"), t("CAMB AI Dubbing")])

with tab_videolingo:
download_video_section()
text_processing_section()
audio_processing_section()

with tab_camb:
camb_dubbing_section()

if __name__ == "__main__":
main()