Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions frontend/src/components/UrlInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ export function UrlInput({ onSubmit, disabled }: UrlInputProps) {
return;
}

const youtubePattern = /^https?:\/\/(www\.)?(youtube\.com\/watch\?v=|youtu\.be\/)/;
if (!youtubePattern.test(url)) {
const urlPattern = /^https?:\/\/.+/;
if (!urlPattern.test(url)) {
setError(t('error.invalid_url'));
return;
}

const request: JobCreateRequest = {
youtube_url: url,
source_url: url,
};
if (startSeconds !== undefined) request.start_time = startSeconds;
if (endSeconds !== undefined) request.end_time = endSeconds;
Expand Down
9 changes: 4 additions & 5 deletions frontend/src/i18n/en.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"app": {
"title": "BilingualSub",
"subtitle": "YouTube Bilingual Subtitle Generator",
"subtitle": "Bilingual Subtitle Generator for YouTube, X, TikTok & More",
"processing_title": "Processing Video",
"processing_desc": "Please wait while we transcribe and translate.",
"error_title": "Something went wrong",
Expand All @@ -17,8 +17,7 @@
"download_audio": "Download Audio"
},
"form": {
"url_placeholder": "Paste YouTube video URL",
"paste_placeholder": "Paste YouTube URL...",
"paste_placeholder": "Paste video URL (YouTube / X / Twitter / TikTok…)",
"source_lang": "Source Language",
"target_lang": "Target Language",
"label_translate": "Translate",
Expand All @@ -33,7 +32,7 @@
"enable_range": "Set Range",
"disable_range": "Disable Range",
"clear_range": "Clear",
"inputModeUrl": "YouTube URL",
"inputModeUrl": "Video URL",
"inputModeFile": "Upload File",
"filePlaceholder": "Choose a video or audio file",
"fileSelected": "Selected: {{filename}}"
Expand Down Expand Up @@ -66,7 +65,7 @@
"unsupported": "Your browser does not support video playback"
},
"error": {
"invalid_url": "Please enter a valid YouTube URL",
"invalid_url": "Please enter a valid video URL",
"job_not_found": "Job not found",
"download_failed": "Video download failed",
"transcription_failed": "Transcription failed",
Expand Down
9 changes: 4 additions & 5 deletions frontend/src/i18n/zh-TW.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"app": {
"title": "BilingualSub",
"subtitle": "YouTube 雙語字幕自動生成工具",
"subtitle": "多平台雙語字幕自動生成工具",
"processing_title": "影片處理中",
"processing_desc": "請稍候,我們正在進行語音辨識與翻譯。",
"error_title": "發生錯誤",
Expand All @@ -17,8 +17,7 @@
"download_audio": "下載音訊"
},
"form": {
"url_placeholder": "貼上 YouTube 影片網址",
"paste_placeholder": "貼上 YouTube 網址...",
"paste_placeholder": "貼上影片網址(YouTube / X / Twitter / TikTok…)",
"source_lang": "原始語言",
"target_lang": "翻譯語言",
"label_translate": "翻譯語言",
Expand All @@ -33,7 +32,7 @@
"enable_range": "設定範圍",
"disable_range": "取消範圍",
"clear_range": "清除",
"inputModeUrl": "YouTube 網址",
"inputModeUrl": "影片網址",
"inputModeFile": "上傳檔案",
"filePlaceholder": "選擇影片或音訊檔案",
"fileSelected": "已選擇:{{filename}}"
Expand Down Expand Up @@ -66,7 +65,7 @@
"unsupported": "您的瀏覽器不支援影片播放"
},
"error": {
"invalid_url": "請輸入有效的 YouTube 網址",
"invalid_url": "請輸入有效的影片網址",
"job_not_found": "找不到此任務",
"download_failed": "影片下載失敗",
"transcription_failed": "語音辨識失敗",
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import type { FileType, JobStatus } from './constants';

export interface JobCreateRequest {
youtube_url: string;
source_url: string;
source_lang?: string;
target_lang?: string;
start_time?: number; // seconds
Expand Down
8 changes: 4 additions & 4 deletions src/bilingualsub/api/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Job:
"""Represents a subtitle generation job."""

id: str
youtube_url: str = ""
source_url: str = ""
source_lang: str = ""
target_lang: str = ""
local_video_path: Path | None = None
Expand Down Expand Up @@ -59,7 +59,7 @@ def __init__(self) -> None:

def create_job(
self,
youtube_url: str = "",
source_url: str = "",
source_lang: str = "",
target_lang: str = "",
start_time: float | None = None,
Expand All @@ -70,15 +70,15 @@ def create_job(
job_id = uuid.uuid4().hex[:12]
job = Job(
id=job_id,
youtube_url=youtube_url,
source_url=source_url,
source_lang=source_lang,
target_lang=target_lang,
local_video_path=local_video_path,
start_time=start_time,
end_time=end_time,
)
self._jobs[job_id] = job
logger.info("job_created", job_id=job_id, youtube_url=youtube_url)
logger.info("job_created", job_id=job_id, source_url=source_url)
return job

def get_job(self, job_id: str) -> Job | None:
Expand Down
112 changes: 3 additions & 109 deletions src/bilingualsub/api/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
TranscriptionError,
TranslationError,
VideoMetadata,
download_youtube_video,
download_video,
merge_subtitles,
transcribe_audio,
translate_subtitle,
Expand Down Expand Up @@ -236,8 +236,8 @@ def _put_event() -> None:
loop.call_soon_threadsafe(_put_event)

metadata = await asyncio.to_thread(
download_youtube_video,
job.youtube_url,
download_video,
job.source_url,
video_path,
on_progress=_on_download_progress,
start_time=job.start_time,
Expand Down Expand Up @@ -385,112 +385,6 @@ async def run_subtitle(job: Job) -> None:
)


async def run_pipeline(job: Job) -> None:
"""Execute the full subtitle generation pipeline for a job.

Steps: download -> transcribe -> translate -> merge/serialize -> burn.
All blocking core calls are wrapped in asyncio.to_thread().
Progress events are sent to job.event_queue at each step.
"""
log = logger.bind(job_id=job.id)
work_dir = Path(tempfile.mkdtemp(prefix=f"bilingualsub_{job.id}_"))

try:
# --- Step 1: Download or use local file ---
video_path, metadata = await _acquire_video(job, work_dir, log)
# Trimming is now handled during download via start_time/end_time

# --- Step 1.5: Extract audio ---
audio_path = await _extract_audio_step(job, video_path, work_dir, log)

# --- Step 2: Transcribe ---
_send_progress(
job, JobStatus.TRANSCRIBING, 20.0, "transcribe", "Transcribing audio"
)
t0 = time.monotonic()
original_sub = await asyncio.to_thread(
transcribe_audio, audio_path, language=job.source_lang
)
log.info(
"step_done",
step="transcribe",
duration_ms=int((time.monotonic() - t0) * 1000),
)

# --- Step 3: Translate ---
_send_progress(
job, JobStatus.TRANSLATING, 50.0, "translate", "Translating subtitles"
)
t0 = time.monotonic()

_on_translate_progress = _make_translate_progress_cb(job)
_on_rate_limit = _make_rate_limit_cb(job)

translated_sub = await asyncio.to_thread(
translate_subtitle,
original_sub,
source_lang=job.source_lang,
target_lang=job.target_lang,
video_title=metadata.title,
video_description=metadata.description,
on_progress=_on_translate_progress,
on_rate_limit=_on_rate_limit,
)
log.info(
"step_done",
step="translate",
duration_ms=int((time.monotonic() - t0) * 1000),
)

# --- Step 4: Merge & Serialize ---
_send_progress(
job, JobStatus.MERGING, 70.0, "merge", "Merging bilingual subtitles"
)
t0 = time.monotonic()

merged_entries = await asyncio.to_thread(
merge_subtitles, original_sub.entries, translated_sub.entries
)
merged_sub = Subtitle(entries=merged_entries)

srt_content = serialize_srt(merged_sub)
srt_path = work_dir / "subtitle.srt"
srt_path.write_text(srt_content, encoding="utf-8")
job.output_files[FileType.SRT] = srt_path

ass_content = serialize_bilingual_ass(
original_sub,
translated_sub,
video_width=metadata.width,
video_height=metadata.height,
)
ass_path = work_dir / "subtitle.ass"
ass_path.write_text(ass_content, encoding="utf-8")
job.output_files[FileType.ASS] = ass_path

log.info(
"step_done", step="merge", duration_ms=int((time.monotonic() - t0) * 1000)
)

# --- Step 5: Save source video & complete ---
job.output_files[FileType.SOURCE_VIDEO] = video_path
_send_complete(job)
log.info("pipeline_complete", job_id=job.id)

except PipelineError:
raise
except Exception as exc:
pipeline_err = _to_pipeline_error(exc)
_send_error(
job, pipeline_err.code, pipeline_err.message, pipeline_err.detail or ""
)
log.error(
"pipeline_failed",
error_code=pipeline_err.code,
error=str(exc),
)


async def run_burn(job: Job, srt_content: str) -> None:
"""Burn user-edited SRT into the source video."""
log = logger.bind(job_id=job.id)
Expand Down
75 changes: 52 additions & 23 deletions src/bilingualsub/api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import asyncio
import json
import re
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, NamedTuple

import structlog
from fastapi import APIRouter, Form, Request, UploadFile
Expand Down Expand Up @@ -53,6 +54,52 @@
".webm",
}

_DEFAULT_FILENAME = "video"
_SUFFIX_ORIGINAL = "(original)"
_LANG_SEPARATOR = "_to_"


class _FileMeta(NamedTuple):
ext: str
media_type: str


_FILE_META: dict[FileType, _FileMeta] = {
FileType.SRT: _FileMeta("srt", "text/plain"),
FileType.ASS: _FileMeta("ass", "text/plain"),
FileType.VIDEO: _FileMeta("mp4", "video/mp4"),
FileType.AUDIO: _FileMeta("mp3", "audio/mpeg"),
FileType.SOURCE_VIDEO: _FileMeta("mp4", "video/mp4"),
}

# Windows-reserved + POSIX control characters (NTFS/FAT32/ext4 safe)
_FILENAME_BAD_CHARS = re.compile(r'[<>:"/\\|?*\x00-\x1f]')

_MAX_UPLOAD_BYTES = 500 * 1024 * 1024 # 500 MB


def _sanitize_filename(name: str) -> str:
"""Remove filesystem-unsafe characters and truncate to 120 chars."""
cleaned = _FILENAME_BAD_CHARS.sub("", name).strip(" .")
truncated = (cleaned or _DEFAULT_FILENAME)[:120]
return truncated.rstrip(" .") or _DEFAULT_FILENAME


def _build_download_filename(job: Job, file_type: FileType) -> str:
"""Build a human-readable download filename for the given job and file type."""
base = _sanitize_filename(job.video_title or _DEFAULT_FILENAME)
original_only = file_type in (FileType.SOURCE_VIDEO, FileType.AUDIO)
same_lang = (
not job.source_lang or not job.target_lang or job.source_lang == job.target_lang
)
suffix = (
_SUFFIX_ORIGINAL
if original_only or same_lang
else f"({job.source_lang}{_LANG_SEPARATOR}{job.target_lang})"
)
ext = _FILE_META[file_type].ext
return f"{base} {suffix}.{ext}"


def _get_job_manager(request: Request) -> JobManager:
"""Get the JobManager from app state."""
Expand Down Expand Up @@ -83,7 +130,7 @@ async def create_job(body: JobCreateRequest, request: Request) -> JobCreateRespo
"""Create a new subtitle generation job."""
manager = _get_job_manager(request)
job = manager.create_job(
youtube_url=str(body.youtube_url),
source_url=str(body.source_url),
source_lang=body.source_lang,
target_lang=body.target_lang,
start_time=body.start_time,
Expand All @@ -104,7 +151,6 @@ async def create_job_from_upload(
request: Request,
) -> JobCreateResponse:
"""Create a subtitle generation job from an uploaded file."""
# Validate file extension
filename = file.filename or ""
suffix = Path(filename).suffix.lower()
if suffix not in _ALLOWED_UPLOAD_EXTENSIONS:
Expand All @@ -113,11 +159,9 @@ async def create_job_from_upload(
detail=f"Allowed: {', '.join(sorted(_ALLOWED_UPLOAD_EXTENSIONS))}",
)

# Sanitize filename to prevent path traversal
safe_name = Path(filename).name or f"upload{suffix}"

# Save uploaded file to temp directory with size limit
max_size = 500 * 1024 * 1024 # 500 MB
max_size = _MAX_UPLOAD_BYTES
tmp_dir = Path(tempfile.mkdtemp(prefix="bilingualsub_upload_"))
saved_path = tmp_dir / safe_name
bytes_written = 0
Expand Down Expand Up @@ -212,25 +256,10 @@ async def download_file(job_id: str, file_type: str, request: Request) -> FileRe
detail="Job may not have completed this step",
)

# Set appropriate media type and filename
media_types = {
FileType.SRT: "text/plain",
FileType.ASS: "text/plain",
FileType.VIDEO: "video/mp4",
FileType.AUDIO: "audio/mpeg",
FileType.SOURCE_VIDEO: "video/mp4",
}
extensions = {
FileType.SRT: "srt",
FileType.ASS: "ass",
FileType.VIDEO: "mp4",
FileType.AUDIO: "mp3",
FileType.SOURCE_VIDEO: "mp4",
}
return FileResponse(
path=path,
media_type=media_types[ft],
filename=f"bilingualsub.{extensions[ft]}",
media_type=_FILE_META[ft].media_type,
filename=_build_download_filename(job, ft),
)


Expand Down
Loading
Loading