Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/bilingualsub/api/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ async def _acquire_video(
width=int(meta_dict["width"]),
height=int(meta_dict["height"]),
fps=float(meta_dict["fps"]),
has_audio=bool(meta_dict.get("has_audio", True)),
)
log.info("step_done", step="upload", source=str(video_path))
return video_path, metadata
Expand Down Expand Up @@ -288,7 +289,14 @@ async def run_download(job: Job) -> None:
try:
video_path, metadata = await _acquire_video(job, work_dir, log)
if job.processing_mode != ProcessingMode.VISUAL_DESCRIPTION:
await _extract_audio_step(job, video_path, work_dir, log)
if not metadata.has_audio:
log.info(
"no_audio_stream_detected",
msg="Auto-switching to visual description mode",
)
job.processing_mode = ProcessingMode.VISUAL_DESCRIPTION
else:
await _extract_audio_step(job, video_path, work_dir, log)

# Save metadata for subtitle phase
job.video_width = metadata.width
Expand Down
15 changes: 15 additions & 0 deletions src/bilingualsub/core/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class VideoMetadata:
height: int
fps: float
description: str = ""
has_audio: bool = True

def __post_init__(self) -> None:
"""Validate metadata constraints."""
Expand Down Expand Up @@ -279,13 +280,24 @@ def _extract_metadata_from_info_dict(
if fps is None or fps <= 0:
fps = 30.0

# Detect audio: check acodec field and requested_formats
acodec = info_dict.get("acodec", "none")
has_audio = acodec not in ("none", None)
if not has_audio:
# Also check requested_formats for separate audio streams
requested_formats = info_dict.get("requested_formats") or []
has_audio = any(
fmt.get("acodec", "none") not in ("none", None) for fmt in requested_formats
)

return VideoMetadata(
title=title,
duration=float(duration),
width=int(width),
height=int(height),
fps=float(fps),
description=_sanitize_description(info_dict.get("description", "")),
has_audio=has_audio,
)


Expand Down Expand Up @@ -320,6 +332,8 @@ def _extract_metadata_with_ffprobe(video_path: Path) -> VideoMetadata:
if not video_stream:
raise DownloadError("No video stream found in file")

has_audio = any(s.get("codec_type") == "audio" for s in data.get("streams", []))

# Extract metadata
try:
title = data.get("format", {}).get("tags", {}).get("title", video_path.stem)
Expand All @@ -341,4 +355,5 @@ def _extract_metadata_with_ffprobe(video_path: Path) -> VideoMetadata:
width=width,
height=height,
fps=fps,
has_audio=has_audio,
)
5 changes: 4 additions & 1 deletion src/bilingualsub/utils/ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def extract_video_metadata(video_path: Path) -> dict[str, str | float | int]:
video_path: Path to the video file

Returns:
Dict with keys: title, duration, width, height, fps
Dict with keys: title, duration, width, height, fps, has_audio

Raises:
FFmpegError: If ffprobe fails or no video stream found
Expand Down Expand Up @@ -298,6 +298,8 @@ def extract_video_metadata(video_path: Path) -> dict[str, str | float | int]:
if not video_stream:
raise FFmpegError(f"No video stream found in {video_path}")

has_audio = any(s.get("codec_type") == "audio" for s in data.get("streams", []))

try:
title = data.get("format", {}).get("tags", {}).get("title", video_path.stem)
duration = float(data.get("format", {}).get("duration", 0))
Expand All @@ -317,6 +319,7 @@ def extract_video_metadata(video_path: Path) -> dict[str, str | float | int]:
"width": width,
"height": height,
"fps": fps,
"has_audio": has_audio,
}


Expand Down
48 changes: 47 additions & 1 deletion tests/unit/api/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pytest

from bilingualsub.api.constants import FileType, JobStatus, SSEEvent
from bilingualsub.api.constants import FileType, JobStatus, ProcessingMode, SSEEvent
from bilingualsub.api.jobs import Job
from bilingualsub.api.pipeline import run_download, run_subtitle
from bilingualsub.core.downloader import DownloadError, VideoMetadata
Expand Down Expand Up @@ -216,6 +216,52 @@ async def test_run_download_extract_audio_failure_sends_error(
assert "ffmpeg segfault" in error_events[0]["data"]["detail"]
assert job.status == JobStatus.FAILED

@patch("bilingualsub.api.pipeline.download_video")
async def test_run_download_no_audio_switches_to_visual_description(
self, mock_download
) -> None:
"""When video has no audio stream, auto-switch to visual description mode."""
metadata = VideoMetadata(
title="Silent Video",
duration=60.0,
width=1920,
height=1080,
fps=30.0,
has_audio=False,
)
mock_download.return_value = metadata

job = _make_job()
assert job.processing_mode == ProcessingMode.SUBTITLE

await run_download(job)

assert job.processing_mode == ProcessingMode.VISUAL_DESCRIPTION
assert job.status == JobStatus.DOWNLOAD_COMPLETE

@patch("bilingualsub.api.pipeline.extract_audio")
@patch("bilingualsub.api.pipeline.download_video")
async def test_run_download_with_audio_keeps_subtitle_mode(
self, mock_download, mock_extract_audio
) -> None:
"""When video has audio stream, processing mode stays as SUBTITLE."""
metadata = VideoMetadata(
title="Normal Video",
duration=60.0,
width=1920,
height=1080,
fps=30.0,
has_audio=True,
)
mock_download.return_value = metadata

job = _make_job()
await run_download(job)

assert job.processing_mode == ProcessingMode.SUBTITLE
assert job.status == JobStatus.DOWNLOAD_COMPLETE
mock_extract_audio.assert_called_once()


@pytest.mark.unit
@pytest.mark.asyncio
Expand Down
68 changes: 68 additions & 0 deletions tests/unit/utils/test_ffmpeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
FFmpegError,
burn_subtitles,
extract_audio,
extract_video_metadata,
get_audio_duration,
split_audio,
trim_video,
Expand Down Expand Up @@ -48,6 +49,7 @@ def mock_ffmpeg(self):
"height": 1080,
"fps": 30.0,
"title": "test video",
"has_audio": True,
}

yield {
Expand Down Expand Up @@ -804,3 +806,69 @@ def test_non_existent_file_raises_error(self, tmp_path):

with pytest.raises(ValueError, match="Audio file does not exist"):
split_audio(audio_path, output_dir=tmp_path)


def _ffprobe_json(
streams: list[dict], duration: float = 120.0, title: str = "test"
) -> str:
"""Build a minimal ffprobe JSON output."""
return json.dumps(
{
"streams": streams,
"format": {"duration": str(duration), "tags": {"title": title}},
}
)


_VIDEO_STREAM = {
"codec_type": "video",
"width": 1920,
"height": 1080,
"r_frame_rate": "30/1",
}
_AUDIO_STREAM = {"codec_type": "audio", "codec_name": "aac"}


@pytest.mark.unit
class TestExtractVideoMetadata:
"""Test cases for extract_video_metadata has_audio detection."""

@patch("bilingualsub.utils.ffmpeg.subprocess.run")
def test_has_audio_true_when_audio_stream_present(self, mock_run, tmp_path):
"""Given video with audio+video streams, has_audio is True."""
mock_run.return_value = MagicMock(
stdout=_ffprobe_json([_VIDEO_STREAM, _AUDIO_STREAM]),
)

result = extract_video_metadata(tmp_path / "video.mp4")

assert result["has_audio"] is True

@patch("bilingualsub.utils.ffmpeg.subprocess.run")
def test_has_audio_false_when_no_audio_stream(self, mock_run, tmp_path):
"""Given video with only video stream, has_audio is False."""
mock_run.return_value = MagicMock(
stdout=_ffprobe_json([_VIDEO_STREAM]),
)

result = extract_video_metadata(tmp_path / "video.mp4")

assert result["has_audio"] is False

@patch("bilingualsub.utils.ffmpeg.subprocess.run")
def test_returns_standard_metadata_fields(self, mock_run, tmp_path):
"""Given a normal video, all standard metadata fields are returned."""
mock_run.return_value = MagicMock(
stdout=_ffprobe_json(
[_VIDEO_STREAM, _AUDIO_STREAM], duration=60.0, title="My Video"
),
)

result = extract_video_metadata(tmp_path / "video.mp4")

assert result["title"] == "My Video"
assert result["duration"] == 60.0
assert result["width"] == 1920
assert result["height"] == 1080
assert result["fps"] == 30.0
assert "has_audio" in result
Loading