From 85b7e30bf5dcd0576cb251edf56057345578c283 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maple=EF=BC=81?= <mapleee723@gmail.com>
Date: Fri, 24 Apr 2026 14:12:42 +0800
Subject: [PATCH 1/5] feat: add visual description mode for videos without
 speech

New pipeline mode that uses Gemini to analyze video frames and generate
translated subtitles from visual content (on-screen text, UI elements,
scene descriptions). Users toggle between speech subtitles and visual
description via a new UI switch.

Backend: core/visual_describer.py (Gemini File API), pipeline branch on
processing_mode, configurable model via VISUAL_DESCRIPTION_MODEL env var.
Frontend: Toggle in UrlInput, i18n keys, processing_mode in request types.
Tests: 7 unit tests + 3 integration tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/arch/visual-description-mode.md          | 248 +++++++++++++
 docs/design/visual-description-mode.md        | 127 +++++++
 frontend/src/components/UrlInput.tsx          |  41 +++
 frontend/src/i18n/en.json                     |  13 +-
 frontend/src/i18n/zh-TW.json                  |  13 +-
 frontend/src/types.ts                         |   2 +
 pyproject.toml                                |  10 +
 src/bilingualsub/api/constants.py             |   1 +
 src/bilingualsub/api/jobs.py                  |   4 +
 src/bilingualsub/api/pipeline.py              |  97 +++++
 src/bilingualsub/api/routes.py                |   3 +
 src/bilingualsub/api/schemas.py               |   4 +-
 src/bilingualsub/core/__init__.py             |   6 +
 src/bilingualsub/core/visual_describer.py     | 120 +++++++
 src/bilingualsub/utils/config.py              |  20 ++
 .../test_visual_description_pipeline.py       | 339 ++++++++++++++++++
 tests/unit/core/test_visual_describer.py      | 186 ++++++++++
 uv.lock                                       | 225 ++++++++++++
 18 files changed, 1452 insertions(+), 7 deletions(-)
 create mode 100644 docs/arch/visual-description-mode.md
 create mode 100644 docs/design/visual-description-mode.md
 create mode 100644 src/bilingualsub/core/visual_describer.py
 create mode 100644 tests/integration/test_visual_description_pipeline.py
 create mode 100644 tests/unit/core/test_visual_describer.py

diff --git a/docs/arch/visual-description-mode.md b/docs/arch/visual-description-mode.md
new file mode 100644
index 0000000..8630446
--- /dev/null
+++ b/docs/arch/visual-description-mode.md
@@ -0,0 +1,248 @@
+# Architecture: Visual Description Mode
+
+## 概述
+
+在現有 download → subtitle → burn 三階段管線上，新增一條平行的字幕生成路徑：當使用者選擇「視覺描述」模式時，subtitle phase 以 `describe_video()` 取代 `transcribe_audio()`。Gemini 2.5 Flash 直接讀取影片檔（`FileType.SOURCE_VIDEO`）並回傳帶時間戳的畫面描述，再由現有 `translate_subtitle()` 翻譯成目標語言。因為視覺描述不存在「原文字幕」概念，merge 步驟跳過，只序列化目標語言 SRT。
+
+## Files to Create / Modify
+
+### 新建
+
+| 路徑                                                    | 說明                                                                                                    |
+| ------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- |
+| `src/bilingualsub/core/visual_describer.py`             | Gemini File API 封裝；對外唯一函數 `describe_video(video_path, *, source_lang) -> Subtitle`             |
+| `tests/unit/core/test_visual_describer.py`              | UT：mock `google.genai.Client`，驗證解析邏輯與錯誤路徑                                                  |
+| `tests/integration/test_visual_description_pipeline.py` | IT：Journey 1 端到端鏈（POST /jobs → POST /jobs/:id/subtitle 含 processing_mode → validate SRT exists） |
+
+### 修改
+
+| 路徑                                   | 修改內容                                                                                                                                                              |
+| -------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `src/bilingualsub/api/constants.py`    | `SubtitleSource` 新增 `VISUAL_DESCRIPTION = "visual_description"`                                                                                                     |
+| `src/bilingualsub/api/jobs.py`         | `Job` dataclass 新增 `processing_mode: str = "subtitle"` 和 `video_duration: float = 0.0`；`JobManager.create_job()` 新增 `processing_mode` 參數                      |
+| `src/bilingualsub/api/schemas.py`      | `JobCreateRequest` 新增 `processing_mode: Literal["subtitle", "visual_description"] = "subtitle"`；`StartSubtitleRequest` 新增同欄位                                  |
+| `src/bilingualsub/api/routes.py`       | `create_job()` 傳 `processing_mode`；`start_subtitle()` 覆寫 `job.processing_mode`                                                                                    |
+| `src/bilingualsub/api/pipeline.py`     | `run_subtitle()` 依 `job.processing_mode` 分支；新增 `_run_visual_description_subtitle()` 和 `_serialize_translated_only()`；`_ERROR_MAP` 加 `VisualDescriptionError` |
+| `src/bilingualsub/core/__init__.py`    | 匯出 `VisualDescriptionError`, `describe_video`                                                                                                                       |
+| `src/bilingualsub/utils/config.py`     | `Settings` 新增 `gemini_api_key: str = ""`；新增 `get_gemini_api_key()` guard function                                                                                |
+| `pyproject.toml`                       | `dependencies` 加 `google-genai>=1.0.0`；mypy override 加 `google.genai.*`                                                                                            |
+| `frontend/src/types.ts`                | `JobCreateRequest` 新增 `processing_mode?: 'subtitle' \| 'visual_description'`                                                                                        |
+| `frontend/src/components/UrlInput.tsx` | 新增 `processingMode` state 與 Toggle UI（參考 rangeEnabled 模式）                                                                                                    |
+| `frontend/src/i18n/zh-TW.json`         | `form` 加 Toggle 相關 key；`progress` 加 `describing`；`error` 加 `visual_description_failed`                                                                         |
+| `frontend/src/i18n/en.json`            | 同上英文 key                                                                                                                                                          |
+
+## Responsibility Map
+
+| 元件                                                     | 層級       | 負責                                                                | 不碰                          |
+| -------------------------------------------------------- | ---------- | ------------------------------------------------------------------- | ----------------------------- |
+| `core/visual_describer.py`                               | Core       | Gemini API 呼叫、response 解析、timestamp regex、回傳 `Subtitle`    | pipeline 進度、job 狀態、翻譯 |
+| `api/pipeline.py` — `_run_visual_description_subtitle()` | Pipeline   | 進度管理、呼叫 describe_video + translate、影片時長驗證、SRT 序列化 | Gemini API 細節、前端狀態     |
+| `api/pipeline.py` — `_serialize_translated_only()`       | Pipeline   | 單語 SRT 序列化、寫入 output_files                                  | 翻譯邏輯、merge 邏輯          |
+| `api/routes.py`                                          | Controller | schema 驗證、`processing_mode` 傳遞給 Job 和 pipeline               | pipeline 邏輯、Gemini 細節    |
+| `api/schemas.py`                                         | Schema     | request 驗證（`Literal["subtitle", "visual_description"]`）         | 業務邏輯                      |
+| `frontend/UrlInput.tsx`                                  | View       | Toggle 渲染、`processing_mode` 附加到 request                       | API 呼叫、狀態管理            |
+
+## Interface Design
+
+### `describe_video` 函數簽名
+
+```python
+def describe_video(
+    video_path: Path,
+    *,
+    source_lang: str = "en",
+) -> Subtitle:
+    """Analyze video frames with Gemini 2.5 Flash and return timestamped descriptions.
+
+    Raises:
+        VisualDescriptionError: If Gemini API fails or no segments can be parsed.
+        ValueError: If GEMINI_API_KEY is not set or video_path doesn't exist.
+    """
+```
+
+### `VisualDescriptionError`
+
+```python
+class VisualDescriptionError(Exception):
+    """Raised when Gemini visual description fails."""
+```
+
+### `Settings` 新增欄位
+
+```python
+gemini_api_key: str = ""
+```
+
+### `get_gemini_api_key()`
+
+```python
+def get_gemini_api_key() -> str:
+    settings = get_settings()
+    if not settings.gemini_api_key:
+        raise ValueError(
+            "GEMINI_API_KEY environment variable is not set. "
+            "Please set it with your Gemini API key."
+        )
+    return settings.gemini_api_key
+```
+
+### `JobCreateRequest` 更新
+
+```python
+processing_mode: Literal["subtitle", "visual_description"] = "subtitle"
+```
+
+### `StartSubtitleRequest` 更新
+
+```python
+processing_mode: Literal["subtitle", "visual_description"] | None = None
+```
+
+### 前端 `JobCreateRequest` 更新
+
+```typescript
+processing_mode?: 'subtitle' | 'visual_description';
+```
+
+## Data Flow
+
+### 視覺描述路徑（Journey 1）
+
+```
+使用者切換 Toggle → processing_mode: "visual_description"
+      │
+POST /api/jobs { source_url, processing_mode: "visual_description" }
+      │
+  JobManager.create_job(processing_mode="visual_description")
+      │
+  run_download(job)
+      ├── _acquire_video() → job.output_files[SOURCE_VIDEO], job.video_duration
+      ├── _extract_audio_step()                ← 仍執行（架構簡單，多幾秒無害）
+      └── _send_download_complete()
+      │
+前端 download_complete → 使用者點「產生字幕」
+      │
+POST /api/jobs/:id/subtitle { processing_mode: "visual_description" }
+      │
+  routes.start_subtitle() → job.processing_mode = "visual_description"
+      │
+  run_subtitle(job) → job.processing_mode == "visual_description"
+      │
+  _run_visual_description_subtitle(job)
+      ├── validate video_duration <= 5400 (90 min)
+      ├── progress 20% "describe" — "分析畫面內容中..."
+      ├── describe_video(SOURCE_VIDEO, source_lang=job.source_lang) → Subtitle
+      │       └── google-genai: files.upload → models.generate_content → parse timestamps
+      ├── job.subtitle_source = VISUAL_DESCRIPTION
+      ├── progress 50% "translate"
+      ├── translate_subtitle(described_sub, ...) → translated_sub
+      ├── progress 70% "serialize"
+      ├── _serialize_translated_only(translated_sub)
+      │       └── serialize_srt → subtitle.srt → job.output_files[SRT]
+      └── _send_complete(job)
+      │
+前端 completed → SubtitleEditor 載入 SRT（單語，只有 translated 欄位）
+      │
+POST /api/jobs/:id/burn { srt_content } → run_burn()（完全複用）
+```
+
+### 語音字幕路徑（不受影響）
+
+`job.processing_mode == "subtitle"` → 現有 `run_subtitle()` 主體邏輯不變。
+
+## Build Sequence
+
+### Phase 1：後端基礎（additive）
+
+- `pyproject.toml`：加 `google-genai>=1.0.0` 依賴；加 mypy override
+- `utils/config.py`：加 `gemini_api_key` 欄位、`get_gemini_api_key()` 函數
+- `api/constants.py`：`SubtitleSource` 加 `VISUAL_DESCRIPTION`
+- `api/jobs.py`：`Job` 加 `processing_mode`, `video_duration`；`JobManager.create_job()` 加 `processing_mode` 參數
+- `api/pipeline.py`：`run_download()` 補存 `job.video_duration`
+
+### Phase 2：Core 模組（additive）
+
+- `core/visual_describer.py`：實作 `describe_video()`，含 `DESCRIBE_PROMPT`、timestamp regex parser、`VisualDescriptionError`
+- `core/__init__.py`：匯出新符號
+
+### Phase 3：Pipeline 分支（breaking — run_subtitle 需同步改動）
+
+- `api/pipeline.py`：`_run_visual_description_subtitle()`；`_serialize_translated_only()`；`run_subtitle()` 加分支；`_ERROR_MAP` 加 `VisualDescriptionError`
+- `api/schemas.py`：`JobCreateRequest` 加 `processing_mode`；`StartSubtitleRequest` 加 `processing_mode`
+- `api/routes.py`：`create_job()` 傳 `processing_mode`；`start_subtitle()` 覆寫 `job.processing_mode`
+
+### Phase 4：前端（additive）
+
+- `frontend/src/types.ts`：`JobCreateRequest` 加 `processing_mode`
+- `frontend/src/i18n/zh-TW.json` & `en.json`：加新 i18n key
+- `frontend/src/components/UrlInput.tsx`：加 `processingMode` state 與 Toggle UI
+
+### Phase 5：測試（additive）
+
+- `tests/unit/core/test_visual_describer.py`
+- `tests/integration/test_visual_description_pipeline.py`
+
+## Infra Reuse
+
+| 現有元件                        | 視覺描述路徑如何複用                                                  |
+| ------------------------------- | --------------------------------------------------------------------- |
+| `run_download()`                | 完全複用，`SOURCE_VIDEO` 已存於 `output_files`，補存 `video_duration` |
+| `translate_subtitle()`          | 完全複用，`described_sub` 與 `original_sub` 型別相同（`Subtitle`）    |
+| `serialize_srt()`               | 複用，只呼叫一次（翻譯後字幕）                                        |
+| `run_burn()`                    | 完全複用，接受 SRT 字串即可，不感知生成路徑                           |
+| `SubtitleEditor`                | 複用，`original` 欄位在視覺描述模式下留空                             |
+| `_make_translate_progress_cb()` | 完全複用，仍映射 50-70%                                               |
+
+## Test Strategy
+
+### Unit Test 邊界
+
+**`tests/unit/core/test_visual_describer.py`**
+
+| 目標             | 測試行為                                                                       |
+| ---------------- | ------------------------------------------------------------------------------ |
+| `describe_video` | 有效 Gemini response（3 條 MM:SS 時間戳）→ `Subtitle` with 3 entries，時間正確 |
+| `describe_video` | response 無法解析出任何 entry → 拋 `VisualDescriptionError`                    |
+| `describe_video` | `generate_content` 拋 exception → 包裝成 `VisualDescriptionError`              |
+| `describe_video` | `GEMINI_API_KEY` 未設 → `ValueError`                                           |
+| `describe_video` | 傳入不存在的 `video_path` → `ValueError`                                       |
+| `describe_video` | response 中混有不符格式的行 → 只保留可解析的 entries，不拋錯                   |
+
+**`tests/unit/api/` — pipeline 視覺描述分支**
+
+| 目標                               | 測試行為                                                          |
+| ---------------------------------- | ----------------------------------------------------------------- |
+| `_run_visual_description_subtitle` | `job.video_duration = 5401.0` → `PipelineError("video_too_long")` |
+| `_serialize_translated_only`       | 只寫 `FileType.SRT`，`FileType.ASS` 不在 output_files             |
+
+### Integration Test 邊界
+
+**`tests/integration/test_visual_description_pipeline.py`**
+
+| Journey 步驟                | Test Chain                                                                                                                                                               |
+| --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| 使用者選視覺描述 → 產出 SRT | POST /jobs (processing_mode=visual_description) → inject DOWNLOAD_COMPLETE state → POST /subtitle → poll until COMPLETED → GET /download/srt → 200, SRT 非空, ASS 不存在 |
+| 影片超過 90 分鐘 → 失敗     | POST /jobs → inject video_duration=5401 → POST /subtitle → poll → status=failed, error_code="video_too_long"                                                             |
+| 缺 GEMINI_API_KEY → 失敗    | monkeypatch.delenv GEMINI_API_KEY → POST /subtitle → status=failed                                                                                                       |
+
+### Mock 決策
+
+| 對象                      | Mock / Real            | 原因                                        |
+| ------------------------- | ---------------------- | ------------------------------------------- |
+| `google.genai.Client`     | Mock                   | 外部 API，不穩定且需付費                    |
+| `translate_subtitle` (IT) | Mock                   | 避免呼叫 Groq/OpenAI，回傳固定 `Subtitle`   |
+| `describe_video` (IT)     | Mock                   | 避免呼叫 Gemini，但驗證其輸出能正確流入下游 |
+| `get_settings`            | Real + monkeypatch env | 驗證 env 讀取邏輯正確                       |
+
+### Coverage 要求
+
+- `core/visual_describer.py` ≥ 80%
+- `api/pipeline.py` 視覺描述分支被 IT 覆蓋
+- 整體 ≥ 80%
+
+## 開放問題
+
+1. **Gemini 時間戳格式**：實際輸出格式（`MM:SS` vs `HH:MM:SS`）需 API 測試確認。timestamp regex 應寬鬆設計，覆蓋兩種格式。
+2. **audio extraction 是否跳過**：視覺描述不需要音訊，但 `run_download()` 不感知 `processing_mode`。選擇維持現狀（多幾秒但架構簡單）。
+3. **SubtitleEditor 對空 `original` 的處理**：需確認渲染邏輯對空字串的容忍度（可能顯示單行而非雙行）。
+4. **Gemini 上傳檔案清理**：`client.files.upload()` 上傳的檔案預設 TTL 48 小時。第一版不主動清理。
diff --git a/docs/design/visual-description-mode.md b/docs/design/visual-description-mode.md
new file mode 100644
index 0000000..8b677b7
--- /dev/null
+++ b/docs/design/visual-description-mode.md
@@ -0,0 +1,127 @@
+# 視覺描述模式（Visual Description Mode）
+
+## 背景與問題
+
+現有系統僅支援語音轉字幕（Whisper ASR → LLM 翻譯），遇到無語音的影片（品牌形象影片、純音樂 MV、產品展示動畫等）時，Whisper 回傳空 segments 直接拋出 `TranscriptionError`，使用者只會看到一個模糊的「轉錄失敗」錯誤訊息。
+
+這類影片的畫面上往往有大量有價值的視覺資訊——標題文字、產品名稱、UI 介面文字、場景說明——但系統完全無法處理，使用者只能放棄。
+
+不做的後果：整個工具的適用範圍被限縮在「有人說話的影片」，大量品牌內容、教學動畫、產品 Demo 無法使用。
+
+## 使用者角色
+
+**一般觀眾**：想理解外語品牌影片或產品介紹的內容，貼入 URL 後期望系統能產出翻譯後的說明字幕。
+
+## 需求情境
+
+- 一般觀眾：When 我看到一支外語品牌形象影片，畫面上有文字但沒有旁白，I want to 讓系統分析畫面內容並翻譯成我的語言，so I can 理解影片在傳達什麼。
+
+## 設計意圖
+
+- **手動切換而非自動偵測** → 自動偵測需要先跑 Whisper 才能判斷有無語音，浪費時間且判斷邊界模糊（幾句話算「有語音」？）。手動切換讓使用者掌控意圖，流程更直覺。
+- **只產出翻譯後的單語字幕** → 視覺描述的「原文」是畫面內容而非語言文字，雙語對照在此場景沒有意義。
+- **第一版不做混合模式** → 混合模式需要時間軸對齊和內容類型判斷，複雜度高。先做純模式，驗證價值後再擴展。
+- **使用 Gemini 2.5 Flash** → 目前唯一支援原生影片輸入的主流模型，可直接吃整段影片（最長 90 分鐘），同時處理視覺和音訊，不需自行抽 frame。成本低、速度快，適合生產環境。
+
+## User Journey
+
+### Journey 1：觀眾 — 取得品牌影片的視覺描述字幕
+
+前置條件：使用者已開啟 BilingualSub 網頁
+
+1. 使用者看到 URL 輸入框上方的 Toggle，預設為「語音字幕」模式
+2. 使用者將 Toggle 切換到「視覺描述」模式
+   → 頁面提示文字變更，說明此模式會分析畫面內容而非語音
+3. 使用者貼入影片 URL，選擇目標語言，點擊「開始處理」
+   → 系統開始下載影片
+4. 下載完成後，系統將影片送入 Gemini 2.5 Flash 分析
+   → 進度條顯示「分析畫面內容中...」
+5. Gemini 回傳帶時間戳的畫面描述（英文或原始語言）
+   → 系統將描述翻譯成目標語言
+6. 翻譯完成，使用者看到字幕預覽
+   → 字幕以時間軸格式顯示，每條字幕對應一個畫面片段
+7. 使用者可選擇「下載字幕檔」（SRT）或「燒錄進影片」
+   → 與現有語音字幕流程一致的輸出選項
+
+### Journey 2：觀眾 — 切換回語音字幕模式
+
+前置條件：使用者目前在「視覺描述」模式
+
+1. 使用者將 Toggle 切回「語音字幕」
+   → 回到原有的語音字幕流程，所有現有功能不受影響
+
+## 替代流程
+
+- **影片過長（超過 90 分鐘）**：系統提示「影片過長，視覺描述模式最長支援 90 分鐘」，建議使用者裁剪影片或使用時間範圍功能
+- **Gemini 回傳內容極少**：影片畫面資訊不足（如純黑畫面、靜態圖片），系統仍產出結果但字幕數量可能很少，不額外提示
+
+## 錯誤情境
+
+### 系統錯誤
+
+- Gemini API 呼叫失敗（網路、quota、API key 無效）：顯示明確錯誤訊息「視覺分析服務暫時無法使用，請稍後再試」
+- 影片下載失敗：與現有語音模式共用相同的下載錯誤處理
+
+### 使用者誤操作
+
+- 對有大量語音的影片使用視覺描述模式：系統正常執行，只是產出的字幕是畫面描述而非語音轉錄。不阻擋，因為使用者可能確實想要畫面描述
+- 未設定 Gemini API key 就使用視覺描述模式：啟動時檢查，提示「請設定 GEMINI_API_KEY 環境變數」
+
+### 惡意行為
+
+- 不適用（無額外攻擊面，影片下載的安全性由現有 yt-dlp 處理）
+
+## Out of Scope
+
+- 語音字幕 + 視覺描述混合模式
+- 自動偵測影片有無語音並切換模式
+- 雙語對照輸出（原文描述 + 翻譯）
+- 自訂 Gemini prompt / 描述風格
+- 支援 Gemini 以外的視覺模型
+
+## 整合點
+
+- **Gemini API**：新增 `GEMINI_API_KEY` 環境變數，透過 Google AI SDK 呼叫 Gemini 2.5 Flash
+- **現有 Pipeline**：視覺描述模式複用現有的 download → translate → merge → burn 步驟，僅將 transcribe 步驟替換為 Gemini 視覺分析
+- **前端狀態**：`useJob` hook 需支援新的模式參數，Toggle 狀態影響 API 請求的 payload
+- **翻譯模組**：視覺描述的翻譯複用現有的 translator，輸入格式與語音轉錄的字幕條目相同
+
+## Acceptance Criteria
+
+- Given 使用者在首頁
+  When 頁面載入
+  Then 看到 Toggle 預設為「語音字幕」模式
+
+- Given 使用者切換到「視覺描述」模式
+  When 貼入影片 URL 並點擊開始
+  Then 系統使用 Gemini 分析畫面內容，而非 Whisper 語音辨識
+
+- Given 視覺分析完成
+  When 使用者查看結果
+  Then 看到帶時間戳的翻譯後字幕，內容描述畫面中的文字和視覺元素
+
+- Given 視覺描述字幕產出完成
+  When 使用者選擇「燒錄進影片」
+  Then 字幕被燒錄進影片，與語音字幕的燒錄效果一致
+
+- Given 視覺描述字幕產出完成
+  When 使用者選擇「下載字幕檔」
+  Then 下載到 SRT 格式的字幕檔
+
+- Given 使用者切換回「語音字幕」模式
+  When 操作流程
+  Then 所有現有功能不受影響，行為與切換前完全一致
+
+- Given 未設定 GEMINI_API_KEY
+  When 使用者嘗試使用視覺描述模式
+  Then 顯示明確提示要求設定 API key
+
+- Given 影片超過 90 分鐘
+  When 使用者以視覺描述模式處理
+  Then 顯示影片過長的提示訊息
+
+## 開放問題
+
+- Gemini 回傳的時間戳精度是否足夠產出流暢的字幕體驗？需實際測試驗證
+- 視覺描述的翻譯品質是否需要針對描述性文本調整 prompt？與語音轉錄的翻譯 prompt 可能有差異
+- 是否需要讓使用者指定「原始語言」？Gemini 可能需要知道畫面上文字的語言才能更準確辨識
diff --git a/frontend/src/components/UrlInput.tsx b/frontend/src/components/UrlInput.tsx
index 6ac15f6..b1fefb1 100644
--- a/frontend/src/components/UrlInput.tsx
+++ b/frontend/src/components/UrlInput.tsx
@@ -28,6 +28,9 @@ export function UrlInput({ onSubmit, disabled }: UrlInputProps) {
   const [url, setUrl] = useState('');
   const [selectedFile, setSelectedFile] = useState<File | null>(null);
   const fileInputRef = useRef<HTMLInputElement>(null);
+  const [processingMode, setProcessingMode] = useState<'subtitle' | 'visual_description'>(
+    'subtitle'
+  );
   const [rangeEnabled, setRangeEnabled] = useState(false);
   const [startTime, setStartTime] = useState<TimeParts>({
     hours: '00',
@@ -65,6 +68,7 @@ export function UrlInput({ onSubmit, disabled }: UrlInputProps) {
       }
       const request: JobUploadRequest = {
         file: selectedFile,
+        processing_mode: processingMode,
       };
       if (startSeconds !== undefined) request.start_time = startSeconds;
       if (endSeconds !== undefined) request.end_time = endSeconds;
@@ -80,6 +84,7 @@ export function UrlInput({ onSubmit, disabled }: UrlInputProps) {
 
     const request: JobCreateRequest = {
       source_url: url,
+      processing_mode: processingMode,
     };
     if (startSeconds !== undefined) request.start_time = startSeconds;
     if (endSeconds !== undefined) request.end_time = endSeconds;
@@ -153,6 +158,42 @@ export function UrlInput({ onSubmit, disabled }: UrlInputProps) {
         )}
       </div>
 
+      <div className="flex flex-col items-center gap-2 text-gray-400">
+        <div className="flex items-center gap-4">
+          <span className="text-sm text-gray-600 dark:text-gray-400">
+            {t('form.processingModeLabel')}
+          </span>
+          <button
+            type="button"
+            disabled={disabled}
+            onClick={() =>
+              setProcessingMode(prev => (prev === 'subtitle' ? 'visual_description' : 'subtitle'))
+            }
+            className={`relative inline-flex h-6 w-11 items-center rounded-full transition-colors disabled:opacity-50 disabled:cursor-not-allowed ${
+              processingMode === 'visual_description'
+                ? 'bg-blue-600'
+                : 'bg-gray-300 dark:bg-gray-600'
+            }`}
+          >
+            <span
+              className={`inline-block h-4 w-4 transform rounded-full bg-white transition-transform ${
+                processingMode === 'visual_description' ? 'translate-x-6' : 'translate-x-1'
+              }`}
+            />
+          </button>
+          <span className="text-sm text-gray-600 dark:text-gray-400">
+            {processingMode === 'visual_description'
+              ? t('form.processingModeVisual')
+              : t('form.processingModeSubtitle')}
+          </span>
+        </div>
+        {processingMode === 'visual_description' && (
+          <p className="text-xs text-blue-600 dark:text-blue-400">
+            {t('form.processingModeVisualHint')}
+          </p>
+        )}
+      </div>
+
       <div className="flex flex-col items-center gap-4 text-gray-400">
         <div className="flex items-center gap-4">
           <label className="text-xs uppercase tracking-widest font-bold">
diff --git a/frontend/src/i18n/en.json b/frontend/src/i18n/en.json
index 3e9584b..89d20bd 100644
--- a/frontend/src/i18n/en.json
+++ b/frontend/src/i18n/en.json
@@ -35,7 +35,11 @@
     "inputModeUrl": "Video URL",
     "inputModeFile": "Upload File",
     "filePlaceholder": "Choose a video or audio file",
-    "fileSelected": "Selected: {{filename}}"
+    "fileSelected": "Selected: {{filename}}",
+    "processingModeLabel": "Processing Mode",
+    "processingModeSubtitle": "Speech Subtitles",
+    "processingModeVisual": "Visual Description",
+    "processingModeVisualHint": "Analyze visual content for subtitles (for videos without speech)"
   },
   "lang": {
     "en": "English",
@@ -55,7 +59,8 @@
     "subtitleSource": "Source",
     "subtitleSourceYoutube": "YouTube (manual)",
     "subtitleSourceWhisper": "Whisper",
-    "nonYoutubeHint": "Non-YouTube platforms may take longer to download"
+    "nonYoutubeHint": "Non-YouTube platforms may take longer to download",
+    "describing": "Analyzing visual content..."
   },
   "download": {
     "title": "Download Results",
@@ -78,7 +83,9 @@
     "network_error": "Network connection error",
     "unknown_error": "An unknown error occurred",
     "invalidTimeFormat": "Invalid time format",
-    "invalidTimeRange": "End time must be greater than start time"
+    "invalidTimeRange": "End time must be greater than start time",
+    "visual_description_failed": "Visual analysis failed",
+    "video_too_long": "Video exceeds 90-minute limit for visual description mode"
   },
   "language_switcher": {
     "label": "Language"
diff --git a/frontend/src/i18n/zh-TW.json b/frontend/src/i18n/zh-TW.json
index 4e7d777..86a3e7d 100644
--- a/frontend/src/i18n/zh-TW.json
+++ b/frontend/src/i18n/zh-TW.json
@@ -35,7 +35,11 @@
     "inputModeUrl": "影片網址",
     "inputModeFile": "上傳檔案",
     "filePlaceholder": "選擇影片或音訊檔案",
-    "fileSelected": "已選擇：{{filename}}"
+    "fileSelected": "已選擇：{{filename}}",
+    "processingModeLabel": "處理模式",
+    "processingModeSubtitle": "語音字幕",
+    "processingModeVisual": "視覺描述",
+    "processingModeVisualHint": "分析畫面內容產生字幕（適用於無語音影片）"
   },
   "lang": {
     "en": "英文",
@@ -55,7 +59,8 @@
     "subtitleSource": "字幕來源",
     "subtitleSourceYoutube": "YouTube（手動上傳）",
     "subtitleSourceWhisper": "Whisper 語音辨識",
-    "nonYoutubeHint": "非 YouTube 平台的影片下載可能較慢"
+    "nonYoutubeHint": "非 YouTube 平台的影片下載可能較慢",
+    "describing": "分析畫面內容中..."
   },
   "download": {
     "title": "下載結果",
@@ -78,7 +83,9 @@
     "network_error": "網路連線錯誤",
     "unknown_error": "發生未知錯誤",
     "invalidTimeFormat": "時間格式不正確",
-    "invalidTimeRange": "結束時間必須大於開始時間"
+    "invalidTimeRange": "結束時間必須大於開始時間",
+    "visual_description_failed": "視覺分析失敗",
+    "video_too_long": "影片過長，視覺描述模式最長支援 90 分鐘"
   },
   "language_switcher": {
     "label": "語言"
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index 698c53b..32d146d 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -6,6 +6,7 @@ export interface JobCreateRequest {
   target_lang?: string;
   start_time?: number; // seconds
   end_time?: number; // seconds
+  processing_mode?: 'subtitle' | 'visual_description';
 }
 
 export interface JobCreateResponse {
@@ -48,6 +49,7 @@ export interface JobUploadRequest {
   target_lang?: string;
   start_time?: number;
   end_time?: number;
+  processing_mode?: 'subtitle' | 'visual_description';
 }
 
 export interface RetranslateEntryPayload {
diff --git a/pyproject.toml b/pyproject.toml
index 44ef00f..3db8bf8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,7 @@ dependencies = [
     "ollama>=0.6.1",
     "openai>=2.17.0",
     "python-dotenv>=1.2.1",
+    "google-genai>=1.0.0",
 ]
 
 [project.optional-dependencies]
@@ -135,6 +136,7 @@ module = [
     "agno.*",
     "ffmpeg.*",
     "openai.*",
+    "google.*",
 ]
 ignore_missing_imports = true
 
@@ -198,3 +200,11 @@ sort_by_size = true
 targets = ["src/bilingualsub"]
 exclude_dirs = ["tests", ".venv"]
 skips = ["B101"]  # Skip assert warnings (used in tests)
+
+[dependency-groups]
+dev = [
+    "mypy>=1.19.1",
+    "pytest-asyncio>=1.3.0",
+    "pytest-cov>=7.0.0",
+    "ruff>=0.14.14",
+]
diff --git a/src/bilingualsub/api/constants.py b/src/bilingualsub/api/constants.py
index a349f6e..f26fb74 100644
--- a/src/bilingualsub/api/constants.py
+++ b/src/bilingualsub/api/constants.py
@@ -42,6 +42,7 @@ class SubtitleSource(StrEnum):
 
     WHISPER = "whisper"
     YOUTUBE_MANUAL = "youtube_manual"
+    VISUAL_DESCRIPTION = "visual_description"
 
 
 JOB_TTL_SECONDS = 1800
diff --git a/src/bilingualsub/api/jobs.py b/src/bilingualsub/api/jobs.py
index 4e64b85..162f7c0 100644
--- a/src/bilingualsub/api/jobs.py
+++ b/src/bilingualsub/api/jobs.py
@@ -47,6 +47,8 @@ class Job:
     video_description: str = ""
     glossary_text: str = ""
     subtitle_source: str = ""
+    processing_mode: str = "subtitle"
+    video_duration: float = 0.0
     output_files: dict[FileType, Path] = field(default_factory=dict)
     event_queue: asyncio.Queue[dict[str, object]] = field(default_factory=asyncio.Queue)
     created_at: float = field(default_factory=time.monotonic)
@@ -67,6 +69,7 @@ def create_job(
         start_time: float | None = None,
         end_time: float | None = None,
         local_video_path: Path | None = None,
+        processing_mode: str = "subtitle",
     ) -> Job:
         """Create a new job and store it."""
         job_id = uuid.uuid4().hex[:12]
@@ -78,6 +81,7 @@ def create_job(
             local_video_path=local_video_path,
             start_time=start_time,
             end_time=end_time,
+            processing_mode=processing_mode,
         )
         self._jobs[job_id] = job
         logger.info("job_created", job_id=job_id, source_url=source_url)
diff --git a/src/bilingualsub/api/pipeline.py b/src/bilingualsub/api/pipeline.py
index 8913692..37f9537 100644
--- a/src/bilingualsub/api/pipeline.py
+++ b/src/bilingualsub/api/pipeline.py
@@ -23,6 +23,8 @@
     TranscriptionError,
     TranslationError,
     VideoMetadata,
+    VisualDescriptionError,
+    describe_video,
     download_video,
     merge_subtitles,
     transcribe_audio,
@@ -46,6 +48,10 @@
     TranscriptionError: ("transcription_failed", "Failed to transcribe audio"),
     TranslationError: ("translation_failed", "Failed to translate subtitles"),
     FFmpegError: ("burn_failed", "Failed to burn subtitles into video"),
+    VisualDescriptionError: (
+        "visual_description_failed",
+        "Failed to analyze video content",
+    ),
     ValueError: ("invalid_input", "Invalid input"),
 }
 
@@ -282,6 +288,7 @@ async def run_download(job: Job) -> None:
         job.video_height = metadata.height
         job.video_title = metadata.title
         job.video_description = metadata.description
+        job.video_duration = metadata.duration
         job.output_files[FileType.SOURCE_VIDEO] = video_path
 
         _send_download_complete(job)
@@ -332,10 +339,100 @@ async def _merge_and_serialize(
     log.info("step_done", step="merge", duration_ms=int((time.monotonic() - t0) * 1000))
 
 
+async def _serialize_translated_only(job: Job, translated_sub: Subtitle) -> None:
+    """Serialize only the translated subtitle to SRT (no bilingual merge)."""
+    _send_progress(
+        job, JobStatus.MERGING, 70.0, "serialize", "Generating subtitle file..."
+    )
+    work_dir = job.output_files[FileType.SOURCE_VIDEO].parent
+
+    srt_content = serialize_srt(translated_sub)
+    srt_path = work_dir / "subtitle.srt"
+    await asyncio.to_thread(srt_path.write_text, srt_content, "utf-8")
+    job.output_files[FileType.SRT] = srt_path
+
+
+async def _run_visual_description_subtitle(job: Job) -> None:
+    """Run visual description subtitle pipeline."""
+    try:
+        video_path = job.output_files.get(FileType.SOURCE_VIDEO)
+        if not video_path:
+            raise PipelineError("visual_description_failed", "Source video not found")
+
+        if job.video_duration > 5400.0:
+            raise PipelineError(
+                "video_too_long",
+                "Video exceeds 90-minute limit for visual description mode",
+            )
+
+        # Describe video (20-50%)
+        _send_progress(
+            job,
+            JobStatus.TRANSCRIBING,
+            20.0,
+            "describe",
+            "Analyzing video content...",
+        )
+        described_sub = await asyncio.to_thread(
+            describe_video, video_path, source_lang=job.source_lang
+        )
+        job.subtitle_source = SubtitleSource.VISUAL_DESCRIPTION
+
+        # Translate (50-70%)
+        _send_progress(
+            job,
+            JobStatus.TRANSLATING,
+            50.0,
+            "translate",
+            "Translating descriptions...",
+        )
+        translated_sub = await asyncio.to_thread(
+            translate_subtitle,
+            described_sub,
+            source_lang=job.source_lang,
+            target_lang=job.target_lang,
+            video_title=job.video_title,
+            video_description=job.video_description,
+            glossary_text=job.glossary_text,
+            on_progress=_make_translate_progress_cb(job),
+            on_rate_limit=_make_rate_limit_cb(job),
+        )
+
+        # Serialize translated-only SRT (70-80%)
+        await _serialize_translated_only(job, translated_sub)
+
+        _send_complete(job)
+
+    except PipelineError as exc:
+        _send_error(job, exc.code, exc.message, exc.detail or "")
+        log = logger.bind(job_id=job.id)
+        log.error("visual_description_failed", error_code=exc.code, error=str(exc))
+        raise
+    except Exception as exc:
+        pipeline_err = _to_pipeline_error(exc)
+        log = logger.bind(job_id=job.id)
+        log.error(
+            "visual_description_failed",
+            error_code=pipeline_err.code,
+            error=str(exc),
+        )
+        _send_error(
+            job,
+            pipeline_err.code,
+            pipeline_err.message,
+            detail=str(exc),
+        )
+        raise pipeline_err from exc
+
+
 async def run_subtitle(job: Job) -> None:
     """Phase 2: Transcribe -> Translate -> Merge -> Serialize."""
     log = logger.bind(job_id=job.id)
 
+    if job.processing_mode == "visual_description":
+        await _run_visual_description_subtitle(job)
+        return
+
     try:
         audio_path = job.output_files[FileType.AUDIO]
         work_dir = audio_path.parent
diff --git a/src/bilingualsub/api/routes.py b/src/bilingualsub/api/routes.py
index 97b41ba..8382754 100644
--- a/src/bilingualsub/api/routes.py
+++ b/src/bilingualsub/api/routes.py
@@ -150,6 +150,7 @@ async def create_job(body: JobCreateRequest, request: Request) -> JobCreateRespo
         target_lang=body.target_lang,
         start_time=body.start_time,
         end_time=body.end_time,
+        processing_mode=body.processing_mode,
     )
     _start_background_task(request, run_download(job))
     return JobCreateResponse(job_id=job.id)
@@ -296,6 +297,8 @@ async def start_subtitle(
             job.source_lang = body.source_lang
         if body.target_lang:
             job.target_lang = body.target_lang
+        if body.processing_mode is not None:
+            job.processing_mode = body.processing_mode
     glossary_manager = _get_glossary_manager(request)
     job.glossary_text = glossary_manager.format_for_prompt()
     _start_background_task(request, run_subtitle(job))
diff --git a/src/bilingualsub/api/schemas.py b/src/bilingualsub/api/schemas.py
index 0ed3f0f..06fa1dd 100644
--- a/src/bilingualsub/api/schemas.py
+++ b/src/bilingualsub/api/schemas.py
@@ -1,6 +1,6 @@
 """Pydantic v2 request/response schemas."""
 
-from typing import Self
+from typing import Literal, Self
 
 from pydantic import BaseModel, ConfigDict, HttpUrl, model_validator
 
@@ -17,6 +17,7 @@ class JobCreateRequest(BaseModel):
     target_lang: str = "zh-TW"
     start_time: float | None = None
     end_time: float | None = None
+    processing_mode: Literal["subtitle", "visual_description"] = "subtitle"
 
     @model_validator(mode="after")
     def validate_time_range(self) -> Self:
@@ -71,6 +72,7 @@ class StartSubtitleRequest(BaseModel):
 
     source_lang: str | None = None
     target_lang: str | None = None
+    processing_mode: Literal["subtitle", "visual_description"] | None = None
 
 
 class PartialRetranslateEntry(BaseModel):
diff --git a/src/bilingualsub/core/__init__.py b/src/bilingualsub/core/__init__.py
index dfea0c4..e3d2e7b 100644
--- a/src/bilingualsub/core/__init__.py
+++ b/src/bilingualsub/core/__init__.py
@@ -16,6 +16,10 @@
     retranslate_entries,
     translate_subtitle,
 )
+from bilingualsub.core.visual_describer import (
+    VisualDescriptionError,
+    describe_video,
+)
 
 __all__ = [
     "DownloadError",
@@ -29,6 +33,8 @@
     "TranscriptionError",
     "TranslationError",
     "VideoMetadata",
+    "VisualDescriptionError",
+    "describe_video",
     "download_video",
     "fetch_manual_subtitle",
     "merge_subtitles",
diff --git a/src/bilingualsub/core/visual_describer.py b/src/bilingualsub/core/visual_describer.py
new file mode 100644
index 0000000..80d4e55
--- /dev/null
+++ b/src/bilingualsub/core/visual_describer.py
@@ -0,0 +1,120 @@
+"""Video visual description using Gemini."""
+
+from __future__ import annotations
+
+import re
+import time
+from datetime import timedelta
+from typing import TYPE_CHECKING
+
+from bilingualsub.core.subtitle import Subtitle, SubtitleEntry
+from bilingualsub.utils.config import get_gemini_api_key, get_settings
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+try:
+    from google import genai as _genai
+except ImportError:
+    _genai = None
+
+DESCRIBE_PROMPT = (
+    "Analyze this video and provide a timestamped visual description. "
+    "Format each segment as: MM:SS - MM:SS | Description\n"
+    "Focus on: on-screen text, titles, product names, UI elements, "
+    "and key visual scenes.\n"
+    "Provide descriptions in the video's original language. "
+    "Be concise (one sentence per segment)."
+)
+
+_TIMESTAMP_PATTERN = re.compile(
+    r"(\d{1,2}:\d{2}(?::\d{2})?)\s*[-–]\s*"  # noqa: RUF001
+    r"(\d{1,2}:\d{2}(?::\d{2})?)\s*[|｜:：]\s*(.+)"  # noqa: RUF001
+)
+
+
+class VisualDescriptionError(Exception):
+    """Raised when Gemini visual description fails."""
+
+
+def _parse_timestamp(timestamp: str) -> timedelta:
+    parts = timestamp.strip().split(":")
+    if len(parts) == 2:
+        minutes, seconds = int(parts[0]), int(parts[1])
+        return timedelta(minutes=minutes, seconds=seconds)
+    hours, minutes, seconds = int(parts[0]), int(parts[1]), int(parts[2])
+    return timedelta(hours=hours, minutes=minutes, seconds=seconds)
+
+
+def describe_video(
+    video_path: Path,
+    *,
+    source_lang: str = "en",  # noqa: ARG001
+) -> Subtitle:
+    """Analyze video frames with Gemini and return timestamped descriptions."""
+    if not video_path.exists():
+        raise ValueError(f"Video file not found: {video_path}")
+
+    api_key = get_gemini_api_key()
+
+    if _genai is None:
+        raise VisualDescriptionError(
+            "google-genai package is not installed. Run: uv add google-genai"
+        )
+
+    settings = get_settings()
+
+    try:
+        client = _genai.Client(api_key=api_key)
+        uploaded_file = client.files.upload(file=video_path)
+
+        # Wait for file processing to complete
+        while uploaded_file.state == "PROCESSING":
+            time.sleep(2)
+            uploaded_file = client.files.get(name=uploaded_file.name)
+        if uploaded_file.state != "ACTIVE":
+            raise VisualDescriptionError(
+                f"File processing failed with state: {uploaded_file.state}"
+            )
+
+        response = client.models.generate_content(
+            model=settings.visual_description_model,
+            contents=[uploaded_file, DESCRIBE_PROMPT],
+        )
+    except VisualDescriptionError:
+        raise
+    except Exception as exc:
+        raise VisualDescriptionError(f"Gemini API call failed: {exc}") from exc
+
+    response_text = response.text or ""
+    entries: list[SubtitleEntry] = []
+
+    for line in response_text.splitlines():
+        match = _TIMESTAMP_PATTERN.search(line)
+        if not match:
+            continue
+        start_str, end_str, description = (
+            match.group(1),
+            match.group(2),
+            match.group(3).strip(),
+        )
+        try:
+            start = _parse_timestamp(start_str)
+            end = _parse_timestamp(end_str)
+            if start >= end or not description:
+                continue
+            entries.append(
+                SubtitleEntry(
+                    index=len(entries) + 1,
+                    start=start,
+                    end=end,
+                    text=description,
+                )
+            )
+        except (ValueError, IndexError):
+            continue
+
+    if not entries:
+        raise VisualDescriptionError("No visual description segments returned")
+
+    return Subtitle(entries=entries)
diff --git a/src/bilingualsub/utils/config.py b/src/bilingualsub/utils/config.py
index 1c83ad7..88977fb 100644
--- a/src/bilingualsub/utils/config.py
+++ b/src/bilingualsub/utils/config.py
@@ -24,6 +24,8 @@ class Settings(BaseSettings):
 
     translator_model: str = "groq:openai/gpt-oss-120b"
     glossary_path: str = "glossary.json"
+    gemini_api_key: str = ""
+    visual_description_model: str = "gemini-3.1-flash-lite-preview"
 
     model_config = SettingsConfigDict(
         env_file=".env",
@@ -80,3 +82,21 @@ def get_openai_api_key() -> str:
             "Please set it with your OpenAI API key."
         )
     return settings.openai_api_key
+
+
+def get_gemini_api_key() -> str:
+    """Get Gemini API key from environment.
+
+    Returns:
+        Gemini API key string
+
+    Raises:
+        ValueError: If GEMINI_API_KEY is not set or empty
+    """
+    settings = get_settings()
+    if not settings.gemini_api_key:
+        raise ValueError(
+            "GEMINI_API_KEY environment variable is not set. "
+            "Please set it with your Gemini API key."
+        )
+    return settings.gemini_api_key
diff --git a/tests/integration/test_visual_description_pipeline.py b/tests/integration/test_visual_description_pipeline.py
new file mode 100644
index 0000000..37bb975
--- /dev/null
+++ b/tests/integration/test_visual_description_pipeline.py
@@ -0,0 +1,339 @@
+"""Integration tests for the visual description subtitle pipeline.
+
+Covers three causal chains:
+1. Happy path: POST /api/jobs → inject state → POST subtitle → poll → COMPLETED
+2. Video too long: duration > 5400 s → job fails with "video_too_long"
+3. Missing API key: no GEMINI_API_KEY → job fails with "invalid_input"
+"""
+
+from __future__ import annotations
+
+import time
+from datetime import timedelta
+from typing import TYPE_CHECKING, Any
+from unittest.mock import patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+from bilingualsub.api.app import create_app
+from bilingualsub.api.constants import FileType, JobStatus
+from bilingualsub.core.subtitle import Subtitle, SubtitleEntry
+from bilingualsub.utils.config import get_settings
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+async def _noop_run_download(*_args: object, **_kwargs: object) -> None:
+    """Async no-op that prevents the real download pipeline from running."""
+
+
+# ---------------------------------------------------------------------------
+# Mock subtitle data
+# ---------------------------------------------------------------------------
+
+
+def _make_described_subtitle() -> Subtitle:
+    return Subtitle(
+        entries=[
+            SubtitleEntry(
+                index=1,
+                start=timedelta(seconds=0),
+                end=timedelta(seconds=5),
+                text="Product showcase",
+            ),
+            SubtitleEntry(
+                index=2,
+                start=timedelta(seconds=5),
+                end=timedelta(seconds=10),
+                text="Brand logo appears",
+            ),
+            SubtitleEntry(
+                index=3,
+                start=timedelta(seconds=10),
+                end=timedelta(seconds=15),
+                text="Contact information",
+            ),
+        ]
+    )
+
+
+def _make_translated_subtitle() -> Subtitle:
+    return Subtitle(
+        entries=[
+            SubtitleEntry(
+                index=1,
+                start=timedelta(seconds=0),
+                end=timedelta(seconds=5),
+                text="產品展示",
+            ),
+            SubtitleEntry(
+                index=2,
+                start=timedelta(seconds=5),
+                end=timedelta(seconds=10),
+                text="品牌標誌出現",
+            ),
+            SubtitleEntry(
+                index=3,
+                start=timedelta(seconds=10),
+                end=timedelta(seconds=15),
+                text="聯絡資訊",
+            ),
+        ]
+    )
+
+
+# ---------------------------------------------------------------------------
+# Polling helper
+# ---------------------------------------------------------------------------
+
+
+def _poll_until_terminal(
+    client: TestClient, job_id: str, deadline_seconds: float = 10.0
+) -> dict[str, Any]:
+    """Poll GET /api/jobs/{job_id} until status is completed or failed."""
+    deadline = time.monotonic() + deadline_seconds
+    status_data: dict[str, Any] = {}
+    while time.monotonic() < deadline:
+        resp = client.get(f"/api/jobs/{job_id}")
+        assert resp.status_code == 200
+        status_data = resp.json()
+        if status_data["status"] in ("completed", "failed"):
+            return status_data
+        time.sleep(0.1)
+    return status_data
+
+
+# ===========================================================================
+# Tests
+# ===========================================================================
+
+
+@pytest.mark.integration
+class TestVisualDescriptionPipeline:
+    """Integration tests for the visual_description processing mode."""
+
+    def test_visual_description_journey_produces_srt(
+        self,
+        tmp_path: Path,
+    ) -> None:
+        """Happy path: visual description pipeline completes and writes SRT file.
+
+        Causal chain:
+        1. POST /api/jobs (visual_description) → 200, job_id
+        2. Inject DOWNLOAD_COMPLETE state + fake files into job_manager
+        3. Mock describe_video + translate_subtitle
+        4. POST /api/jobs/{job_id}/subtitle → 200
+        5. Poll until completed
+        6. Assert subtitle_source, SRT exists and non-empty, ASS absent
+        """
+        app = create_app()
+
+        with TestClient(app, raise_server_exceptions=False) as client:
+            # Step 1: create job — patch run_download so it never runs and can't
+            # race with the state we inject below.
+            with patch("bilingualsub.api.routes.run_download", _noop_run_download):
+                create_resp = client.post(
+                    "/api/jobs",
+                    json={
+                        "source_url": "https://example.com/video.mp4",
+                        "processing_mode": "visual_description",
+                    },
+                )
+            assert create_resp.status_code == 200
+            job_id = create_resp.json()["job_id"]
+
+            # Step 2: inject state directly into job
+            job = app.state.job_manager.get_job(job_id)
+            assert job is not None, "Job should exist in manager after creation"
+
+            video_path = tmp_path / "video.mp4"
+            video_path.write_bytes(b"fake video content")
+            audio_path = tmp_path / "audio.wav"
+            audio_path.write_bytes(b"fake audio content")
+
+            job.status = JobStatus.DOWNLOAD_COMPLETE
+            job.video_duration = 60.0
+            job.output_files[FileType.SOURCE_VIDEO] = video_path
+            job.output_files[FileType.AUDIO] = audio_path
+
+            # Step 3 + 4: mock pipeline functions, then trigger subtitle step
+            with (
+                patch("bilingualsub.api.pipeline.describe_video") as mock_describe,
+                patch("bilingualsub.api.pipeline.translate_subtitle") as mock_translate,
+            ):
+                mock_describe.return_value = _make_described_subtitle()
+                mock_translate.return_value = _make_translated_subtitle()
+
+                subtitle_resp = client.post(
+                    f"/api/jobs/{job_id}/subtitle",
+                    json={"processing_mode": "visual_description"},
+                )
+                assert subtitle_resp.status_code == 200
+
+                # Step 5: poll for completion
+                status_data = _poll_until_terminal(client, job_id)
+
+            # Step 6: assertions
+            assert status_data["status"] == "completed", (
+                f"Expected completed, got {status_data['status']!r}. "
+                f"Error: {status_data.get('error')}"
+            )
+
+            # Reload job from manager to inspect final state
+            job = app.state.job_manager.get_job(job_id)
+            assert job is not None
+
+            assert job.subtitle_source == "visual_description", (
+                f"Expected subtitle_source='visual_description', got {job.subtitle_source!r}"
+            )
+
+            assert FileType.SRT in job.output_files, (
+                "Expected SRT file in output_files after visual description"
+            )
+            srt_path = job.output_files[FileType.SRT]
+            assert srt_path.exists(), f"SRT file does not exist at {srt_path}"
+            assert srt_path.stat().st_size > 0, "SRT file should be non-empty"
+
+            assert FileType.ASS not in job.output_files, (
+                "Visual description mode should NOT produce an ASS file"
+            )
+
+    def test_visual_description_video_too_long_fails(
+        self,
+        tmp_path: Path,
+    ) -> None:
+        """Video duration > 5400 s causes job to fail with 'video_too_long' error.
+
+        Causal chain:
+        1. POST /api/jobs (visual_description) → 200
+        2. Inject DOWNLOAD_COMPLETE + duration 5401.0 s
+        3. POST /api/jobs/{job_id}/subtitle → 200
+        4. Poll until failed
+        5. Assert error.code == "video_too_long"
+        """
+        app = create_app()
+
+        with TestClient(app, raise_server_exceptions=False) as client:
+            # Step 1: create job — prevent download from racing with state injection
+            with patch("bilingualsub.api.routes.run_download", _noop_run_download):
+                create_resp = client.post(
+                    "/api/jobs",
+                    json={
+                        "source_url": "https://example.com/long-video.mp4",
+                        "processing_mode": "visual_description",
+                    },
+                )
+            assert create_resp.status_code == 200
+            job_id = create_resp.json()["job_id"]
+
+            # Step 2: inject state with a video that exceeds the limit
+            job = app.state.job_manager.get_job(job_id)
+            assert job is not None
+
+            video_path = tmp_path / "long_video.mp4"
+            video_path.write_bytes(b"fake long video")
+
+            job.status = JobStatus.DOWNLOAD_COMPLETE
+            job.video_duration = 5401.0
+            job.output_files[FileType.SOURCE_VIDEO] = video_path
+
+            # Step 3: trigger subtitle step
+            subtitle_resp = client.post(
+                f"/api/jobs/{job_id}/subtitle",
+                json={"processing_mode": "visual_description"},
+            )
+            assert subtitle_resp.status_code == 200
+
+            # Step 4: poll until failed
+            status_data = _poll_until_terminal(client, job_id)
+
+        # Step 5: verify error
+        assert status_data["status"] == "failed", (
+            f"Expected failed, got {status_data['status']!r}"
+        )
+        error = status_data.get("error")
+        assert error is not None, "Expected error detail in response"
+        assert error["code"] == "video_too_long", (
+            f"Expected error code 'video_too_long', got {error['code']!r}"
+        )
+
+    def test_visual_description_missing_api_key_fails(
+        self,
+        tmp_path: Path,
+        monkeypatch: pytest.MonkeyPatch,
+    ) -> None:
+        """Missing GEMINI_API_KEY causes job to fail with 'invalid_input' error.
+
+        The ValueError raised by get_gemini_api_key() maps to 'invalid_input'
+        via _ERROR_MAP in pipeline.py.
+
+        Causal chain:
+        1. Remove GEMINI_API_KEY from env + clear settings cache
+        2. POST /api/jobs (visual_description) → 200
+        3. Inject DOWNLOAD_COMPLETE state
+        4. POST /api/jobs/{job_id}/subtitle → 200
+        5. Poll until failed (describe_video raises ValueError for missing key)
+        6. Assert error.code in {"invalid_input", "visual_description_failed"}
+        """
+        # Step 1: remove the API key so describe_video raises ValueError
+        monkeypatch.delenv("GEMINI_API_KEY", raising=False)
+        get_settings.cache_clear()
+
+        app = create_app()
+
+        with TestClient(app, raise_server_exceptions=False) as client:
+            # Step 2: create job — prevent download from racing with state injection
+            with patch("bilingualsub.api.routes.run_download", _noop_run_download):
+                create_resp = client.post(
+                    "/api/jobs",
+                    json={
+                        "source_url": "https://example.com/video.mp4",
+                        "processing_mode": "visual_description",
+                    },
+                )
+            assert create_resp.status_code == 200
+            job_id = create_resp.json()["job_id"]
+
+            # Step 3: inject DOWNLOAD_COMPLETE state (no mocking of describe_video —
+            # let it attempt to fetch the real API key and raise ValueError)
+            job = app.state.job_manager.get_job(job_id)
+            assert job is not None
+
+            video_path = tmp_path / "video_no_key.mp4"
+            video_path.write_bytes(b"fake video")
+            audio_path = tmp_path / "audio_no_key.wav"
+            audio_path.write_bytes(b"fake audio")
+
+            job.status = JobStatus.DOWNLOAD_COMPLETE
+            job.video_duration = 60.0
+            job.output_files[FileType.SOURCE_VIDEO] = video_path
+            job.output_files[FileType.AUDIO] = audio_path
+
+            # Step 4: trigger subtitle step (no mock — real describe_video will
+            # raise ValueError because GEMINI_API_KEY is absent)
+            subtitle_resp = client.post(
+                f"/api/jobs/{job_id}/subtitle",
+                json={"processing_mode": "visual_description"},
+            )
+            assert subtitle_resp.status_code == 200
+
+            # Step 5: poll until failed
+            status_data = _poll_until_terminal(client, job_id)
+
+        # Step 6: verify error
+        assert status_data["status"] == "failed", (
+            f"Expected failed, got {status_data['status']!r}"
+        )
+        error = status_data.get("error")
+        assert error is not None, "Expected error detail in response"
+        # ValueError → "invalid_input"; VisualDescriptionError → "visual_description_failed"
+        assert error["code"] in {"invalid_input", "visual_description_failed"}, (
+            f"Expected error code 'invalid_input' or 'visual_description_failed', "
+            f"got {error['code']!r}"
+        )
diff --git a/tests/unit/core/test_visual_describer.py b/tests/unit/core/test_visual_describer.py
new file mode 100644
index 0000000..76c3032
--- /dev/null
+++ b/tests/unit/core/test_visual_describer.py
@@ -0,0 +1,186 @@
+"""Unit tests for video visual description using Gemini."""
+
+from datetime import timedelta
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from bilingualsub.core.subtitle import Subtitle
+from bilingualsub.core.visual_describer import VisualDescriptionError, describe_video
+
+
+@pytest.mark.unit
+class TestDescribeVideo:
+    """Test cases for describe_video function."""
+
+    @pytest.fixture
+    def mock_genai(self):
+        """Mock google.genai module-level reference used by visual_describer."""
+        with patch("bilingualsub.core.visual_describer._genai") as mock:
+            yield mock
+
+    @pytest.fixture
+    def mock_get_gemini_api_key(self):
+        """Mock get_gemini_api_key to return a fixed key."""
+        with patch(
+            "bilingualsub.core.visual_describer.get_gemini_api_key",
+            return_value="fake-gemini-key",
+        ) as mock:
+            yield mock
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    def _setup_client(self, mock_genai, response_text: str) -> MagicMock:
+        """Wire up mock_genai so generate_content returns response_text."""
+        mock_client = MagicMock()
+        mock_genai.Client.return_value = mock_client
+
+        mock_file = MagicMock()
+        mock_client.files.upload.return_value = mock_file
+
+        mock_response = MagicMock()
+        mock_response.text = response_text
+        mock_client.models.generate_content.return_value = mock_response
+
+        return mock_client
+
+    # ------------------------------------------------------------------
+    # Test cases
+    # ------------------------------------------------------------------
+
+    def test_valid_response_parses_to_subtitle(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """Three well-formed lines produce a Subtitle with 3 entries."""
+        response_text = (
+            "00:00 - 00:05 | Opening title card\n"
+            "00:05 - 00:15 | Product logo appears on screen\n"
+            "00:15 - 00:30 | Presenter walks into frame\n"
+        )
+        self._setup_client(mock_genai, response_text)
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        result = describe_video(video_path, source_lang="en")
+
+        assert isinstance(result, Subtitle)
+        assert len(result.entries) == 3
+
+        # Indices must start at 1
+        assert result.entries[0].index == 1
+        assert result.entries[1].index == 2
+        assert result.entries[2].index == 3
+
+        # start < end for every entry
+        for entry in result.entries:
+            assert entry.start < entry.end
+
+        # Text is preserved verbatim (stripped)
+        assert result.entries[0].text == "Opening title card"
+        assert result.entries[1].text == "Product logo appears on screen"
+        assert result.entries[2].text == "Presenter walks into frame"
+
+        # Spot-check timestamps
+        assert result.entries[0].start == timedelta(seconds=0)
+        assert result.entries[0].end == timedelta(seconds=5)
+        assert result.entries[2].start == timedelta(seconds=15)
+        assert result.entries[2].end == timedelta(seconds=30)
+
+    def test_no_segments_raises_error(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """Empty response text must raise VisualDescriptionError."""
+        self._setup_client(mock_genai, "")
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        with pytest.raises(
+            VisualDescriptionError,
+            match="No visual description segments returned",
+        ):
+            describe_video(video_path, source_lang="en")
+
+    def test_no_segments_unparseable_content_raises_error(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """Response with only unparseable lines must raise VisualDescriptionError."""
+        self._setup_client(
+            mock_genai, "This video shows nothing interesting.\nNo timestamps here!"
+        )
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        with pytest.raises(
+            VisualDescriptionError,
+            match="No visual description segments returned",
+        ):
+            describe_video(video_path, source_lang="en")
+
+    def test_api_error_raises_visual_description_error(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """Exception from generate_content is wrapped into VisualDescriptionError."""
+        mock_client = MagicMock()
+        mock_genai.Client.return_value = mock_client
+        mock_client.files.upload.return_value = MagicMock()
+        mock_client.models.generate_content.side_effect = Exception("quota exceeded")
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        with pytest.raises(
+            VisualDescriptionError,
+            match="Gemini API call failed",
+        ):
+            describe_video(video_path, source_lang="en")
+
+    def test_missing_api_key_raises_value_error(self, tmp_path, mock_genai):
+        """ValueError from get_gemini_api_key propagates unchanged."""
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        with (
+            patch(
+                "bilingualsub.core.visual_describer.get_gemini_api_key",
+                side_effect=ValueError(
+                    "GEMINI_API_KEY environment variable is not set"
+                ),
+            ),
+            pytest.raises(ValueError),
+        ):
+            describe_video(video_path, source_lang="en")
+
+    def test_file_not_exists_raises_value_error(self, tmp_path):
+        """Non-existent video path raises ValueError before any API call."""
+        video_path = tmp_path / "missing.mp4"
+
+        with pytest.raises(ValueError, match="Video file not found"):
+            describe_video(video_path, source_lang="en")
+
+    def test_malformed_lines_are_skipped(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """Lines that don't match the timestamp pattern are silently ignored."""
+        response_text = (
+            "00:00 - 00:10 | Valid first entry\n"
+            "This line has no timestamp at all\n"
+            "01:00 - 01:10 | Valid second entry\n"
+            "just some random text\n"
+            "NOT A TIMESTAMP | description without time\n"
+        )
+        self._setup_client(mock_genai, response_text)
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        result = describe_video(video_path, source_lang="en")
+
+        assert isinstance(result, Subtitle)
+        assert len(result.entries) == 2
+        assert result.entries[0].text == "Valid first entry"
+        assert result.entries[1].text == "Valid second entry"
diff --git a/uv.lock b/uv.lock
index 5eaa5fd..c9b6477 100644
--- a/uv.lock
+++ b/uv.lock
@@ -80,6 +80,7 @@ dependencies = [
     { name = "agno" },
     { name = "fastapi" },
     { name = "ffmpeg-python" },
+    { name = "google-genai" },
     { name = "groq" },
     { name = "httpx" },
     { name = "ollama" },
@@ -114,12 +115,21 @@ e2e = [
     { name = "pytest-playwright" },
 ]
 
+[package.dev-dependencies]
+dev = [
+    { name = "mypy" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-cov" },
+    { name = "ruff" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "agno", specifier = ">=1.0.0" },
     { name = "bandit", marker = "extra == 'dev'", specifier = ">=1.7.0" },
     { name = "fastapi", specifier = ">=0.115.0" },
     { name = "ffmpeg-python", specifier = ">=0.2.0" },
+    { name = "google-genai", specifier = ">=1.0.0" },
     { name = "groq", specifier = ">=0.11.0" },
     { name = "httpx", specifier = ">=0.27.0" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10.0" },
@@ -148,6 +158,14 @@ requires-dist = [
 ]
 provides-extras = ["dev", "e2e"]
 
+[package.metadata.requires-dev]
+dev = [
+    { name = "mypy", specifier = ">=1.19.1" },
+    { name = "pytest-asyncio", specifier = ">=1.3.0" },
+    { name = "pytest-cov", specifier = ">=7.0.0" },
+    { name = "ruff", specifier = ">=0.14.14" },
+]
+
 [[package]]
 name = "boolean-py"
 version = "5.0"
@@ -184,6 +202,76 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" },
 ]
 
+[[package]]
+name = "cffi"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344, upload-time = "2025-09-08T23:22:26.456Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560, upload-time = "2025-09-08T23:22:28.197Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/56/6033f5e86e8cc9bb629f0077ba71679508bdf54a9a5e112a3c0b91870332/cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93", size = 216476, upload-time = "2025-09-08T23:22:31.063Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/7f/55fecd70f7ece178db2f26128ec41430d8720f2d12ca97bf8f0a628207d5/cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5", size = 203374, upload-time = "2025-09-08T23:22:32.507Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ef/a7b77c8bdc0f77adc3b46888f1ad54be8f3b7821697a7b89126e829e676a/cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664", size = 202597, upload-time = "2025-09-08T23:22:34.132Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/91/500d892b2bf36529a75b77958edfcd5ad8e2ce4064ce2ecfeab2125d72d1/cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26", size = 215574, upload-time = "2025-09-08T23:22:35.443Z" },
+    { url = "https://files.pythonhosted.org/packages/44/64/58f6255b62b101093d5df22dcb752596066c7e89dd725e0afaed242a61be/cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9", size = 218971, upload-time = "2025-09-08T23:22:36.805Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/49/fa72cebe2fd8a55fbe14956f9970fe8eb1ac59e5df042f603ef7c8ba0adc/cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414", size = 211972, upload-time = "2025-09-08T23:22:38.436Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/28/dd0967a76aab36731b6ebfe64dec4e981aff7e0608f60c2d46b46982607d/cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743", size = 217078, upload-time = "2025-09-08T23:22:39.776Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/c0/015b25184413d7ab0a410775fdb4a50fca20f5589b5dab1dbbfa3baad8ce/cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5", size = 172076, upload-time = "2025-09-08T23:22:40.95Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/8f/dc5531155e7070361eb1b7e4c1a9d896d0cb21c49f807a6c03fd63fc877e/cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5", size = 182820, upload-time = "2025-09-08T23:22:42.463Z" },
+    { url = "https://files.pythonhosted.org/packages/95/5c/1b493356429f9aecfd56bc171285a4c4ac8697f76e9bbbbb105e537853a1/cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d", size = 177635, upload-time = "2025-09-08T23:22:43.623Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" },
+    { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+    { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" },
+    { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" },
+    { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
+]
+
 [[package]]
 name = "cfgv"
 version = "3.5.0"
@@ -379,6 +467,65 @@ toml = [
     { name = "tomli", marker = "python_full_version <= '3.11'" },
 ]
 
+[[package]]
+name = "cryptography"
+version = "46.0.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/47/93/ac8f3d5ff04d54bc814e961a43ae5b0b146154c89c61b47bb07557679b18/cryptography-46.0.7.tar.gz", hash = "sha256:e4cfd68c5f3e0bfdad0d38e023239b96a2fe84146481852dffbcca442c245aa5", size = 750652, upload-time = "2026-04-08T01:57:54.692Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0b/5d/4a8f770695d73be252331e60e526291e3df0c9b27556a90a6b47bccca4c2/cryptography-46.0.7-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:ea42cbe97209df307fdc3b155f1b6fa2577c0defa8f1f7d3be7d31d189108ad4", size = 7179869, upload-time = "2026-04-08T01:56:17.157Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/45/6d80dc379b0bbc1f9d1e429f42e4cb9e1d319c7a8201beffd967c516ea01/cryptography-46.0.7-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b36a4695e29fe69215d75960b22577197aca3f7a25b9cf9d165dcfe9d80bc325", size = 4275492, upload-time = "2026-04-08T01:56:19.36Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/9a/1765afe9f572e239c3469f2cb429f3ba7b31878c893b246b4b2994ffe2fe/cryptography-46.0.7-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ad9ef796328c5e3c4ceed237a183f5d41d21150f972455a9d926593a1dcb308", size = 4426670, upload-time = "2026-04-08T01:56:21.415Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/3e/af9246aaf23cd4ee060699adab1e47ced3f5f7e7a8ffdd339f817b446462/cryptography-46.0.7-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:73510b83623e080a2c35c62c15298096e2a5dc8d51c3b4e1740211839d0dea77", size = 4280275, upload-time = "2026-04-08T01:56:23.539Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/54/6bbbfc5efe86f9d71041827b793c24811a017c6ac0fd12883e4caa86b8ed/cryptography-46.0.7-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cbd5fb06b62bd0721e1170273d3f4d5a277044c47ca27ee257025146c34cbdd1", size = 4928402, upload-time = "2026-04-08T01:56:25.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/cf/054b9d8220f81509939599c8bdbc0c408dbd2bdd41688616a20731371fe0/cryptography-46.0.7-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:420b1e4109cc95f0e5700eed79908cef9268265c773d3a66f7af1eef53d409ef", size = 4459985, upload-time = "2026-04-08T01:56:27.309Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/46/4e4e9c6040fb01c7467d47217d2f882daddeb8828f7df800cb806d8a2288/cryptography-46.0.7-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:24402210aa54baae71d99441d15bb5a1919c195398a87b563df84468160a65de", size = 3990652, upload-time = "2026-04-08T01:56:29.095Z" },
+    { url = "https://files.pythonhosted.org/packages/36/5f/313586c3be5a2fbe87e4c9a254207b860155a8e1f3cca99f9910008e7d08/cryptography-46.0.7-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:8a469028a86f12eb7d2fe97162d0634026d92a21f3ae0ac87ed1c4a447886c83", size = 4279805, upload-time = "2026-04-08T01:56:30.928Z" },
+    { url = "https://files.pythonhosted.org/packages/69/33/60dfc4595f334a2082749673386a4d05e4f0cf4df8248e63b2c3437585f2/cryptography-46.0.7-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:9694078c5d44c157ef3162e3bf3946510b857df5a3955458381d1c7cfc143ddb", size = 4892883, upload-time = "2026-04-08T01:56:32.614Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/0b/333ddab4270c4f5b972f980adef4faa66951a4aaf646ca067af597f15563/cryptography-46.0.7-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:42a1e5f98abb6391717978baf9f90dc28a743b7d9be7f0751a6f56a75d14065b", size = 4459756, upload-time = "2026-04-08T01:56:34.306Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/14/633913398b43b75f1234834170947957c6b623d1701ffc7a9600da907e89/cryptography-46.0.7-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91bbcb08347344f810cbe49065914fe048949648f6bd5c2519f34619142bbe85", size = 4410244, upload-time = "2026-04-08T01:56:35.977Z" },
+    { url = "https://files.pythonhosted.org/packages/10/f2/19ceb3b3dc14009373432af0c13f46aa08e3ce334ec6eff13492e1812ccd/cryptography-46.0.7-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5d1c02a14ceb9148cc7816249f64f623fbfee39e8c03b3650d842ad3f34d637e", size = 4674868, upload-time = "2026-04-08T01:56:38.034Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/bb/a5c213c19ee94b15dfccc48f363738633a493812687f5567addbcbba9f6f/cryptography-46.0.7-cp311-abi3-win32.whl", hash = "sha256:d23c8ca48e44ee015cd0a54aeccdf9f09004eba9fc96f38c911011d9ff1bd457", size = 3026504, upload-time = "2026-04-08T01:56:39.666Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/02/7788f9fefa1d060ca68717c3901ae7fffa21ee087a90b7f23c7a603c32ae/cryptography-46.0.7-cp311-abi3-win_amd64.whl", hash = "sha256:397655da831414d165029da9bc483bed2fe0e75dde6a1523ec2fe63f3c46046b", size = 3488363, upload-time = "2026-04-08T01:56:41.893Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/56/15619b210e689c5403bb0540e4cb7dbf11a6bf42e483b7644e471a2812b3/cryptography-46.0.7-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:d151173275e1728cf7839aaa80c34fe550c04ddb27b34f48c232193df8db5842", size = 7119671, upload-time = "2026-04-08T01:56:44Z" },
+    { url = "https://files.pythonhosted.org/packages/74/66/e3ce040721b0b5599e175ba91ab08884c75928fbeb74597dd10ef13505d2/cryptography-46.0.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:db0f493b9181c7820c8134437eb8b0b4792085d37dbb24da050476ccb664e59c", size = 4268551, upload-time = "2026-04-08T01:56:46.071Z" },
+    { url = "https://files.pythonhosted.org/packages/03/11/5e395f961d6868269835dee1bafec6a1ac176505a167f68b7d8818431068/cryptography-46.0.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ebd6daf519b9f189f85c479427bbd6e9c9037862cf8fe89ee35503bd209ed902", size = 4408887, upload-time = "2026-04-08T01:56:47.718Z" },
+    { url = "https://files.pythonhosted.org/packages/40/53/8ed1cf4c3b9c8e611e7122fb56f1c32d09e1fff0f1d77e78d9ff7c82653e/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:b7b412817be92117ec5ed95f880defe9cf18a832e8cafacf0a22337dc1981b4d", size = 4271354, upload-time = "2026-04-08T01:56:49.312Z" },
+    { url = "https://files.pythonhosted.org/packages/50/46/cf71e26025c2e767c5609162c866a78e8a2915bbcfa408b7ca495c6140c4/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:fbfd0e5f273877695cb93baf14b185f4878128b250cc9f8e617ea0c025dfb022", size = 4905845, upload-time = "2026-04-08T01:56:50.916Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/ea/01276740375bac6249d0a971ebdf6b4dc9ead0ee0a34ef3b5a88c1a9b0d4/cryptography-46.0.7-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ffca7aa1d00cf7d6469b988c581598f2259e46215e0140af408966a24cf086ce", size = 4444641, upload-time = "2026-04-08T01:56:52.882Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/4c/7d258f169ae71230f25d9f3d06caabcff8c3baf0978e2b7d65e0acac3827/cryptography-46.0.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:60627cf07e0d9274338521205899337c5d18249db56865f943cbe753aa96f40f", size = 3967749, upload-time = "2026-04-08T01:56:54.597Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/2a/2ea0767cad19e71b3530e4cad9605d0b5e338b6a1e72c37c9c1ceb86c333/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:80406c3065e2c55d7f49a9550fe0c49b3f12e5bfff5dedb727e319e1afb9bf99", size = 4270942, upload-time = "2026-04-08T01:56:56.416Z" },
+    { url = "https://files.pythonhosted.org/packages/41/3d/fe14df95a83319af25717677e956567a105bb6ab25641acaa093db79975d/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:c5b1ccd1239f48b7151a65bc6dd54bcfcc15e028c8ac126d3fada09db0e07ef1", size = 4871079, upload-time = "2026-04-08T01:56:58.31Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/4a479e0f36f8f378d397f4eab4c850b4ffb79a2f0d58704b8fa0703ddc11/cryptography-46.0.7-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:d5f7520159cd9c2154eb61eb67548ca05c5774d39e9c2c4339fd793fe7d097b2", size = 4443999, upload-time = "2026-04-08T01:57:00.508Z" },
+    { url = "https://files.pythonhosted.org/packages/28/17/b59a741645822ec6d04732b43c5d35e4ef58be7bfa84a81e5ae6f05a1d33/cryptography-46.0.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fcd8eac50d9138c1d7fc53a653ba60a2bee81a505f9f8850b6b2888555a45d0e", size = 4399191, upload-time = "2026-04-08T01:57:02.654Z" },
+    { url = "https://files.pythonhosted.org/packages/59/6a/bb2e166d6d0e0955f1e9ff70f10ec4b2824c9cfcdb4da772c7dd69cc7d80/cryptography-46.0.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:65814c60f8cc400c63131584e3e1fad01235edba2614b61fbfbfa954082db0ee", size = 4655782, upload-time = "2026-04-08T01:57:04.592Z" },
+    { url = "https://files.pythonhosted.org/packages/95/b6/3da51d48415bcb63b00dc17c2eff3a651b7c4fed484308d0f19b30e8cb2c/cryptography-46.0.7-cp314-cp314t-win32.whl", hash = "sha256:fdd1736fed309b4300346f88f74cd120c27c56852c3838cab416e7a166f67298", size = 3002227, upload-time = "2026-04-08T01:57:06.91Z" },
+    { url = "https://files.pythonhosted.org/packages/32/a8/9f0e4ed57ec9cebe506e58db11ae472972ecb0c659e4d52bbaee80ca340a/cryptography-46.0.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e06acf3c99be55aa3b516397fe42f5855597f430add9c17fa46bf2e0fb34c9bb", size = 3475332, upload-time = "2026-04-08T01:57:08.807Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/7f/cd42fc3614386bc0c12f0cb3c4ae1fc2bbca5c9662dfed031514911d513d/cryptography-46.0.7-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:462ad5cb1c148a22b2e3bcc5ad52504dff325d17daf5df8d88c17dda1f75f2a4", size = 7165618, upload-time = "2026-04-08T01:57:10.645Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/d0/36a49f0262d2319139d2829f773f1b97ef8aef7f97e6e5bd21455e5a8fb5/cryptography-46.0.7-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:84d4cced91f0f159a7ddacad249cc077e63195c36aac40b4150e7a57e84fffe7", size = 4270628, upload-time = "2026-04-08T01:57:12.885Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/6c/1a42450f464dda6ffbe578a911f773e54dd48c10f9895a23a7e88b3e7db5/cryptography-46.0.7-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:128c5edfe5e5938b86b03941e94fac9ee793a94452ad1365c9fc3f4f62216832", size = 4415405, upload-time = "2026-04-08T01:57:14.923Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/92/4ed714dbe93a066dc1f4b4581a464d2d7dbec9046f7c8b7016f5286329e2/cryptography-46.0.7-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5e51be372b26ef4ba3de3c167cd3d1022934bc838ae9eaad7e644986d2a3d163", size = 4272715, upload-time = "2026-04-08T01:57:16.638Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/e6/a26b84096eddd51494bba19111f8fffe976f6a09f132706f8f1bf03f51f7/cryptography-46.0.7-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:cdf1a610ef82abb396451862739e3fc93b071c844399e15b90726ef7470eeaf2", size = 4918400, upload-time = "2026-04-08T01:57:19.021Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/08/ffd537b605568a148543ac3c2b239708ae0bd635064bab41359252ef88ed/cryptography-46.0.7-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1d25aee46d0c6f1a501adcddb2d2fee4b979381346a78558ed13e50aa8a59067", size = 4450634, upload-time = "2026-04-08T01:57:21.185Z" },
+    { url = "https://files.pythonhosted.org/packages/16/01/0cd51dd86ab5b9befe0d031e276510491976c3a80e9f6e31810cce46c4ad/cryptography-46.0.7-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:cdfbe22376065ffcf8be74dc9a909f032df19bc58a699456a21712d6e5eabfd0", size = 3985233, upload-time = "2026-04-08T01:57:22.862Z" },
+    { url = "https://files.pythonhosted.org/packages/92/49/819d6ed3a7d9349c2939f81b500a738cb733ab62fbecdbc1e38e83d45e12/cryptography-46.0.7-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:abad9dac36cbf55de6eb49badd4016806b3165d396f64925bf2999bcb67837ba", size = 4271955, upload-time = "2026-04-08T01:57:24.814Z" },
+    { url = "https://files.pythonhosted.org/packages/80/07/ad9b3c56ebb95ed2473d46df0847357e01583f4c52a85754d1a55e29e4d0/cryptography-46.0.7-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:935ce7e3cfdb53e3536119a542b839bb94ec1ad081013e9ab9b7cfd478b05006", size = 4879888, upload-time = "2026-04-08T01:57:26.88Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/c7/201d3d58f30c4c2bdbe9b03844c291feb77c20511cc3586daf7edc12a47b/cryptography-46.0.7-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:35719dc79d4730d30f1c2b6474bd6acda36ae2dfae1e3c16f2051f215df33ce0", size = 4449961, upload-time = "2026-04-08T01:57:29.068Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/ef/649750cbf96f3033c3c976e112265c33906f8e462291a33d77f90356548c/cryptography-46.0.7-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:7bbc6ccf49d05ac8f7d7b5e2e2c33830d4fe2061def88210a126d130d7f71a85", size = 4401696, upload-time = "2026-04-08T01:57:31.029Z" },
+    { url = "https://files.pythonhosted.org/packages/41/52/a8908dcb1a389a459a29008c29966c1d552588d4ae6d43f3a1a4512e0ebe/cryptography-46.0.7-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a1529d614f44b863a7b480c6d000fe93b59acee9c82ffa027cfadc77521a9f5e", size = 4664256, upload-time = "2026-04-08T01:57:33.144Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/fa/f0ab06238e899cc3fb332623f337a7364f36f4bb3f2534c2bb95a35b132c/cryptography-46.0.7-cp38-abi3-win32.whl", hash = "sha256:f247c8c1a1fb45e12586afbb436ef21ff1e80670b2861a90353d9b025583d246", size = 3013001, upload-time = "2026-04-08T01:57:34.933Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/f1/00ce3bde3ca542d1acd8f8cfa38e446840945aa6363f9b74746394b14127/cryptography-46.0.7-cp38-abi3-win_amd64.whl", hash = "sha256:506c4ff91eff4f82bdac7633318a526b1d1309fc07ca76a3ad182cb5b686d6d3", size = 3472985, upload-time = "2026-04-08T01:57:36.714Z" },
+    { url = "https://files.pythonhosted.org/packages/63/0c/dca8abb64e7ca4f6b2978769f6fea5ad06686a190cec381f0a796fdcaaba/cryptography-46.0.7-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:fc9ab8856ae6cf7c9358430e49b368f3108f050031442eaeb6b9d87e4dcf4e4f", size = 3476879, upload-time = "2026-04-08T01:57:38.664Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/ea/075aac6a84b7c271578d81a2f9968acb6e273002408729f2ddff517fed4a/cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d3b99c535a9de0adced13d159c5a9cf65c325601aa30f4be08afd680643e9c15", size = 4219700, upload-time = "2026-04-08T01:57:40.625Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/7b/1c55db7242b5e5612b29fc7a630e91ee7a6e3c8e7bf5406d22e206875fbd/cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d02c738dacda7dc2a74d1b2b3177042009d5cab7c7079db74afc19e56ca1b455", size = 4385982, upload-time = "2026-04-08T01:57:42.725Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/da/9870eec4b69c63ef5925bf7d8342b7e13bc2ee3d47791461c4e49ca212f4/cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:04959522f938493042d595a736e7dbdff6eb6cc2339c11465b3ff89343b65f65", size = 4219115, upload-time = "2026-04-08T01:57:44.939Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/72/05aa5832b82dd341969e9a734d1812a6aadb088d9eb6f0430fc337cc5a8f/cryptography-46.0.7-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:3986ac1dee6def53797289999eabe84798ad7817f3e97779b5061a95b0ee4968", size = 4385479, upload-time = "2026-04-08T01:57:46.86Z" },
+    { url = "https://files.pythonhosted.org/packages/20/2a/1b016902351a523aa2bd446b50a5bc1175d7a7d1cf90fe2ef904f9b84ebc/cryptography-46.0.7-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:258514877e15963bd43b558917bc9f54cf7cf866c38aa576ebf47a77ddbc43a4", size = 3412829, upload-time = "2026-04-08T01:57:48.874Z" },
+]
+
 [[package]]
 name = "cyclonedx-python-lib"
 version = "11.6.0"
@@ -501,6 +648,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" },
 ]
 
+[[package]]
+name = "google-auth"
+version = "2.49.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+    { name = "pyasn1-modules" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/fc/e925290a1ad95c975c459e2df070fac2b90954e13a0370ac505dff78cb99/google_auth-2.49.2.tar.gz", hash = "sha256:c1ae38500e73065dcae57355adb6278cf8b5c8e391994ae9cbadbcb9631ab409", size = 333958, upload-time = "2026-04-10T00:41:21.888Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/76/d241a5c927433420507215df6cac1b1fa4ac0ba7a794df42a84326c68da8/google_auth-2.49.2-py3-none-any.whl", hash = "sha256:c2720924dfc82dedb962c9f52cabb2ab16714fd0a6a707e40561d217574ed6d5", size = 240638, upload-time = "2026-04-10T00:41:14.501Z" },
+]
+
+[package.optional-dependencies]
+requests = [
+    { name = "requests" },
+]
+
+[[package]]
+name = "google-genai"
+version = "1.73.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "google-auth", extra = ["requests"] },
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "requests" },
+    { name = "sniffio" },
+    { name = "tenacity" },
+    { name = "typing-extensions" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/d8/40f5f107e5a2976bbac52d421f04d14fc221b55a8f05e66be44b2f739fe6/google_genai-1.73.1.tar.gz", hash = "sha256:b637e3a3b9e2eccc46f27136d470165803de84eca52abfed2e7352081a4d5a15", size = 530998, upload-time = "2026-04-14T21:06:19.153Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/65/af/508e0528015240d710c6763f7c89ff44fab9a94a80b4377e265d692cbfd6/google_genai-1.73.1-py3-none-any.whl", hash = "sha256:af2d2287d25e42a187de19811ef33beb2e347c7e2bdb4dc8c467d78254e43a2c", size = 783595, upload-time = "2026-04-14T21:06:17.464Z" },
+]
+
 [[package]]
 name = "greenlet"
 version = "3.3.1"
@@ -1176,6 +1362,36 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9b/bf/7595e817906a29453ba4d99394e781b6fabe55d21f3c15d240f85dd06bb1/py_serializable-2.1.0-py3-none-any.whl", hash = "sha256:b56d5d686b5a03ba4f4db5e769dc32336e142fc3bd4d68a8c25579ebb0a67304", size = 23045, upload-time = "2025-07-21T09:56:46.848Z" },
 ]
 
+[[package]]
+name = "pyasn1"
+version = "0.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" },
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyasn1" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
+]
+
+[[package]]
+name = "pycparser"
+version = "3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" },
+]
+
 [[package]]
 name = "pydantic"
 version = "2.12.5"
@@ -1646,6 +1862,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" },
 ]
 
+[[package]]
+name = "tenacity"
+version = "9.1.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" },
+]
+
 [[package]]
 name = "text-unidecode"
 version = "1.3"

From 99c44fade6cccc7cd3a261335b883374ee7b8857 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maple=EF=BC=81?= <mapleee723@gmail.com>
Date: Mon, 27 Apr 2026 12:42:02 +0800
Subject: [PATCH 2/5] refine: improve visual description prompt with anthropic
 writing principles

Restructured DESCRIBE_PROMPT using XML sections with why-what-how flow:
pacing (3-8s segments), on_screen_text (quote actual text for translation),
ui_actions (narrate purpose not labels), skip (omit logo cards).
Also fixed file processing wait loop and mypy type issues.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/bilingualsub/core/visual_describer.py | 34 ++++++++++++++++++-----
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/bilingualsub/core/visual_describer.py b/src/bilingualsub/core/visual_describer.py
index 80d4e55..7e384ee 100644
--- a/src/bilingualsub/core/visual_describer.py
+++ b/src/bilingualsub/core/visual_describer.py
@@ -19,12 +19,31 @@
     _genai = None
 
 DESCRIBE_PROMPT = (
-    "Analyze this video and provide a timestamped visual description. "
-    "Format each segment as: MM:SS - MM:SS | Description\n"
-    "Focus on: on-screen text, titles, product names, UI elements, "
-    "and key visual scenes.\n"
-    "Provide descriptions in the video's original language. "
-    "Be concise (one sentence per segment)."
+    "You are a narrator producing subtitles for a silent video.\n\n"
+    "Format each subtitle as: MM:SS - MM:SS | Text\n\n"
+    "<pacing>\n"
+    "Viewers need time to read each subtitle while watching the video.\n"
+    "Keep each segment 3-8 seconds. Combine related actions into one line "
+    "(e.g. a user typing a prompt and pressing Send is one segment, "
+    "not three separate ones).\n"
+    "</pacing>\n\n"
+    "<on_screen_text>\n"
+    "When someone types or a message appears on screen, quote the actual "
+    "text so it can be translated later. A viewer watching a foreign-language "
+    "product demo needs to read what was typed, not be told 'the user typed "
+    "a prompt about a globe.'\n"
+    "</on_screen_text>\n\n"
+    "<ui_actions>\n"
+    "For sequences of clicks, menus, and transitions, summarize the goal "
+    "of the sequence in one line (e.g. 'The user customizes the globe's "
+    "appearance using a settings panel'). Individual button labels like "
+    "'Send' or 'Edit' are not useful as standalone subtitles.\n"
+    "</ui_actions>\n\n"
+    "<skip>\n"
+    "Omit static logo cards and branding screens — they carry no information "
+    "a subtitle can add.\n"
+    "</skip>\n\n"
+    "Output in the video's original language."
 )
 
 _TIMESTAMP_PATTERN = re.compile(
@@ -71,7 +90,8 @@ def describe_video(
         # Wait for file processing to complete
         while uploaded_file.state == "PROCESSING":
             time.sleep(2)
-            uploaded_file = client.files.get(name=uploaded_file.name)
+            file_name = uploaded_file.name or ""
+            uploaded_file = client.files.get(name=file_name)
         if uploaded_file.state != "ACTIVE":
             raise VisualDescriptionError(
                 f"File processing failed with state: {uploaded_file.state}"

From e88bce8304508e070c7304ecb4de88311b8f9783 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maple=EF=BC=81?= <mapleee723@gmail.com>
Date: Mon, 27 Apr 2026 21:37:51 +0800
Subject: [PATCH 3/5] fix: address code review and test review findings

- Add ProcessingMode StrEnum to eliminate magic strings
- Add polling timeout (600s) and FAILED state handling for Gemini file upload
- Clean up uploaded files from Gemini after use (try/finally)
- Fix error handling in _run_visual_description_subtitle (no re-raise)
- Fix check order: _genai import before API key validation
- Add processing_mode to upload route
- Extract _require_api_key helper (Rule of Three)
- Use source_lang in prompt instead of ignoring it
- Pass work_dir as parameter to _serialize_translated_only
- Tighten test assertions (remove assertion roulette)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/bilingualsub/api/constants.py             |   7 ++
 src/bilingualsub/api/jobs.py                  |   5 +-
 src/bilingualsub/api/pipeline.py              |  24 ++--
 src/bilingualsub/api/routes.py                |   7 +-
 src/bilingualsub/core/visual_describer.py     | 109 +++++++++++-------
 src/bilingualsub/utils/config.py              |  54 ++-------
 .../test_visual_description_pipeline.py       |  11 +-
 tests/unit/core/test_visual_describer.py      |   9 +-
 8 files changed, 118 insertions(+), 108 deletions(-)

diff --git a/src/bilingualsub/api/constants.py b/src/bilingualsub/api/constants.py
index f26fb74..f85ec2b 100644
--- a/src/bilingualsub/api/constants.py
+++ b/src/bilingualsub/api/constants.py
@@ -45,6 +45,13 @@ class SubtitleSource(StrEnum):
     VISUAL_DESCRIPTION = "visual_description"
 
 
+class ProcessingMode(StrEnum):
+    """Processing mode for a subtitle generation job."""
+
+    SUBTITLE = "subtitle"
+    VISUAL_DESCRIPTION = "visual_description"
+
+
 JOB_TTL_SECONDS = 1800
 CLEANUP_INTERVAL_SECONDS = 300
 SSE_KEEPALIVE_SECONDS = 30
diff --git a/src/bilingualsub/api/jobs.py b/src/bilingualsub/api/jobs.py
index 162f7c0..37ad00f 100644
--- a/src/bilingualsub/api/jobs.py
+++ b/src/bilingualsub/api/jobs.py
@@ -19,6 +19,7 @@
     JOB_TTL_SECONDS,
     FileType,
     JobStatus,
+    ProcessingMode,
 )
 
 logger = structlog.get_logger()
@@ -47,7 +48,7 @@ class Job:
     video_description: str = ""
     glossary_text: str = ""
     subtitle_source: str = ""
-    processing_mode: str = "subtitle"
+    processing_mode: ProcessingMode = ProcessingMode.SUBTITLE
     video_duration: float = 0.0
     output_files: dict[FileType, Path] = field(default_factory=dict)
     event_queue: asyncio.Queue[dict[str, object]] = field(default_factory=asyncio.Queue)
@@ -69,7 +70,7 @@ def create_job(
         start_time: float | None = None,
         end_time: float | None = None,
         local_video_path: Path | None = None,
-        processing_mode: str = "subtitle",
+        processing_mode: ProcessingMode = ProcessingMode.SUBTITLE,
     ) -> Job:
         """Create a new job and store it."""
         job_id = uuid.uuid4().hex[:12]
diff --git a/src/bilingualsub/api/pipeline.py b/src/bilingualsub/api/pipeline.py
index 37f9537..a656a97 100644
--- a/src/bilingualsub/api/pipeline.py
+++ b/src/bilingualsub/api/pipeline.py
@@ -10,7 +10,13 @@
 
 import structlog
 
-from bilingualsub.api.constants import FileType, JobStatus, SSEEvent, SubtitleSource
+from bilingualsub.api.constants import (
+    FileType,
+    JobStatus,
+    ProcessingMode,
+    SSEEvent,
+    SubtitleSource,
+)
 from bilingualsub.api.errors import PipelineError
 
 if TYPE_CHECKING:
@@ -339,21 +345,23 @@ async def _merge_and_serialize(
     log.info("step_done", step="merge", duration_ms=int((time.monotonic() - t0) * 1000))
 
 
-async def _serialize_translated_only(job: Job, translated_sub: Subtitle) -> None:
+def _serialize_translated_only(
+    job: Job, translated_sub: Subtitle, work_dir: Path
+) -> None:
     """Serialize only the translated subtitle to SRT (no bilingual merge)."""
     _send_progress(
         job, JobStatus.MERGING, 70.0, "serialize", "Generating subtitle file..."
     )
-    work_dir = job.output_files[FileType.SOURCE_VIDEO].parent
 
     srt_content = serialize_srt(translated_sub)
     srt_path = work_dir / "subtitle.srt"
-    await asyncio.to_thread(srt_path.write_text, srt_content, "utf-8")
+    srt_path.write_text(srt_content, encoding="utf-8")
     job.output_files[FileType.SRT] = srt_path
 
 
 async def _run_visual_description_subtitle(job: Job) -> None:
     """Run visual description subtitle pipeline."""
+    log = logger.bind(job_id=job.id)
     try:
         video_path = job.output_files.get(FileType.SOURCE_VIDEO)
         if not video_path:
@@ -399,18 +407,15 @@ async def _run_visual_description_subtitle(job: Job) -> None:
         )
 
         # Serialize translated-only SRT (70-80%)
-        await _serialize_translated_only(job, translated_sub)
+        _serialize_translated_only(job, translated_sub, work_dir=video_path.parent)
 
         _send_complete(job)
 
     except PipelineError as exc:
         _send_error(job, exc.code, exc.message, exc.detail or "")
-        log = logger.bind(job_id=job.id)
         log.error("visual_description_failed", error_code=exc.code, error=str(exc))
-        raise
     except Exception as exc:
         pipeline_err = _to_pipeline_error(exc)
-        log = logger.bind(job_id=job.id)
         log.error(
             "visual_description_failed",
             error_code=pipeline_err.code,
@@ -422,14 +427,13 @@ async def _run_visual_description_subtitle(job: Job) -> None:
             pipeline_err.message,
             detail=str(exc),
         )
-        raise pipeline_err from exc
 
 
 async def run_subtitle(job: Job) -> None:
     """Phase 2: Transcribe -> Translate -> Merge -> Serialize."""
     log = logger.bind(job_id=job.id)
 
-    if job.processing_mode == "visual_description":
+    if job.processing_mode == ProcessingMode.VISUAL_DESCRIPTION:
         await _run_visual_description_subtitle(job)
         return
 
diff --git a/src/bilingualsub/api/routes.py b/src/bilingualsub/api/routes.py
index 8382754..fa931b1 100644
--- a/src/bilingualsub/api/routes.py
+++ b/src/bilingualsub/api/routes.py
@@ -18,6 +18,7 @@
     SSE_KEEPALIVE_SECONDS,
     FileType,
     JobStatus,
+    ProcessingMode,
     SSEEvent,
 )
 from bilingualsub.api.errors import (
@@ -150,7 +151,7 @@ async def create_job(body: JobCreateRequest, request: Request) -> JobCreateRespo
         target_lang=body.target_lang,
         start_time=body.start_time,
         end_time=body.end_time,
-        processing_mode=body.processing_mode,
+        processing_mode=ProcessingMode(body.processing_mode),
     )
     _start_background_task(request, run_download(job))
     return JobCreateResponse(job_id=job.id)
@@ -163,6 +164,7 @@ async def create_job_from_upload(
     target_lang: str = Form("zh-TW"),
     start_time: float | None = Form(None),
     end_time: float | None = Form(None),
+    processing_mode: str = Form("subtitle"),
     *,
     request: Request,
 ) -> JobCreateResponse:
@@ -200,6 +202,7 @@ async def create_job_from_upload(
         start_time=start_time,
         end_time=end_time,
         local_video_path=saved_path,
+        processing_mode=ProcessingMode(processing_mode),
     )
     _start_background_task(request, run_download(job))
     return JobCreateResponse(job_id=job.id)
@@ -298,7 +301,7 @@ async def start_subtitle(
         if body.target_lang:
             job.target_lang = body.target_lang
         if body.processing_mode is not None:
-            job.processing_mode = body.processing_mode
+            job.processing_mode = ProcessingMode(body.processing_mode)
     glossary_manager = _get_glossary_manager(request)
     job.glossary_text = glossary_manager.format_for_prompt()
     _start_background_task(request, run_subtitle(job))
diff --git a/src/bilingualsub/core/visual_describer.py b/src/bilingualsub/core/visual_describer.py
index 7e384ee..77c1a22 100644
--- a/src/bilingualsub/core/visual_describer.py
+++ b/src/bilingualsub/core/visual_describer.py
@@ -2,10 +2,11 @@
 
 from __future__ import annotations
 
+import contextlib
 import re
 import time
 from datetime import timedelta
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from bilingualsub.core.subtitle import Subtitle, SubtitleEntry
 from bilingualsub.utils.config import get_gemini_api_key, get_settings
@@ -18,6 +19,8 @@
 except ImportError:
     _genai = None
 
+_FILE_PROCESSING_TIMEOUT = 600
+
 DESCRIBE_PROMPT = (
     "You are a narrator producing subtitles for a silent video.\n\n"
     "Format each subtitle as: MM:SS - MM:SS | Text\n\n"
@@ -42,8 +45,7 @@
     "<skip>\n"
     "Omit static logo cards and branding screens — they carry no information "
     "a subtitle can add.\n"
-    "</skip>\n\n"
-    "Output in the video's original language."
+    "</skip>"
 )
 
 _TIMESTAMP_PATTERN = re.compile(
@@ -65,50 +67,26 @@ def _parse_timestamp(timestamp: str) -> timedelta:
     return timedelta(hours=hours, minutes=minutes, seconds=seconds)
 
 
-def describe_video(
-    video_path: Path,
-    *,
-    source_lang: str = "en",  # noqa: ARG001
-) -> Subtitle:
-    """Analyze video frames with Gemini and return timestamped descriptions."""
-    if not video_path.exists():
-        raise ValueError(f"Video file not found: {video_path}")
-
-    api_key = get_gemini_api_key()
-
-    if _genai is None:
-        raise VisualDescriptionError(
-            "google-genai package is not installed. Run: uv add google-genai"
-        )
-
-    settings = get_settings()
-
-    try:
-        client = _genai.Client(api_key=api_key)
-        uploaded_file = client.files.upload(file=video_path)
+def _wait_for_active(client: Any, uploaded_file: Any) -> Any:
+    """Poll until the uploaded file reaches ACTIVE state or raise on failure."""
+    deadline = time.monotonic() + _FILE_PROCESSING_TIMEOUT
+    while uploaded_file.state == "PROCESSING":
+        if time.monotonic() >= deadline:
+            raise VisualDescriptionError("File processing timed out after 600 seconds")
+        time.sleep(2)
+        file_name = uploaded_file.name or ""
+        uploaded_file = client.files.get(name=file_name)
 
-        # Wait for file processing to complete
-        while uploaded_file.state == "PROCESSING":
-            time.sleep(2)
-            file_name = uploaded_file.name or ""
-            uploaded_file = client.files.get(name=file_name)
-        if uploaded_file.state != "ACTIVE":
-            raise VisualDescriptionError(
-                f"File processing failed with state: {uploaded_file.state}"
-            )
+    if uploaded_file.state == "FAILED":
+        raise VisualDescriptionError("File processing failed on Gemini servers")
+    if uploaded_file.state != "ACTIVE":
+        raise VisualDescriptionError(f"Unexpected file state: {uploaded_file.state}")
+    return uploaded_file
 
-        response = client.models.generate_content(
-            model=settings.visual_description_model,
-            contents=[uploaded_file, DESCRIBE_PROMPT],
-        )
-    except VisualDescriptionError:
-        raise
-    except Exception as exc:
-        raise VisualDescriptionError(f"Gemini API call failed: {exc}") from exc
 
-    response_text = response.text or ""
+def _parse_response(response_text: str) -> list[SubtitleEntry]:
+    """Parse Gemini response text into subtitle entries."""
     entries: list[SubtitleEntry] = []
-
     for line in response_text.splitlines():
         match = _TIMESTAMP_PATTERN.search(line)
         if not match:
@@ -133,7 +111,52 @@ def describe_video(
             )
         except (ValueError, IndexError):
             continue
+    return entries
+
+
+def describe_video(
+    video_path: Path,
+    *,
+    source_lang: str = "en",
+) -> Subtitle:
+    """Analyze video frames with Gemini and return timestamped descriptions."""
+    if not video_path.exists():
+        raise ValueError(f"Video file not found: {video_path}")
+
+    if _genai is None:
+        raise VisualDescriptionError(
+            "google-genai package is not installed. Run: uv add google-genai"
+        )
+
+    api_key = get_gemini_api_key()
+    settings = get_settings()
+
+    prompt = DESCRIBE_PROMPT
+    if source_lang and source_lang != "auto":
+        prompt += f"\n\nOutput in {source_lang}."
+    else:
+        prompt += "\n\nOutput in the video's original language."
+
+    uploaded_file = None
+    try:
+        client = _genai.Client(api_key=api_key)
+        uploaded_file = client.files.upload(file=video_path)
+        uploaded_file = _wait_for_active(client, uploaded_file)
+
+        response = client.models.generate_content(
+            model=settings.visual_description_model,
+            contents=[uploaded_file, prompt],
+        )
+    except VisualDescriptionError:
+        raise
+    except Exception as exc:
+        raise VisualDescriptionError(f"Gemini API call failed: {exc}") from exc
+    finally:
+        if uploaded_file and uploaded_file.name:
+            with contextlib.suppress(Exception):
+                client.files.delete(name=uploaded_file.name)
 
+    entries = _parse_response(response.text or "")
     if not entries:
         raise VisualDescriptionError("No visual description segments returned")
 
diff --git a/src/bilingualsub/utils/config.py b/src/bilingualsub/utils/config.py
index 88977fb..ca0b3f0 100644
--- a/src/bilingualsub/utils/config.py
+++ b/src/bilingualsub/utils/config.py
@@ -48,55 +48,23 @@ def get_settings() -> Settings:
     return Settings()
 
 
-def get_groq_api_key() -> str:
-    """Get Groq API key from environment.
-
-    Returns:
-        Groq API key string
-
-    Raises:
-        ValueError: If GROQ_API_KEY is not set or empty
-    """
-    settings = get_settings()
-    if not settings.groq_api_key:
+def _require_api_key(attr: str, env_var: str) -> str:
+    value = getattr(get_settings(), attr)
+    if not value:
         raise ValueError(
-            "GROQ_API_KEY environment variable is not set. "
-            "Please set it with your Groq API key."
+            f"{env_var} environment variable is not set. "
+            f"Please set it with your {env_var} key."
         )
-    return settings.groq_api_key
+    return str(value)
 
 
-def get_openai_api_key() -> str:
-    """Get OpenAI API key from environment.
+def get_groq_api_key() -> str:
+    return _require_api_key("groq_api_key", "GROQ_API_KEY")
 
-    Returns:
-        OpenAI API key string
 
-    Raises:
-        ValueError: If OPENAI_API_KEY is not set or empty
-    """
-    settings = get_settings()
-    if not settings.openai_api_key:
-        raise ValueError(
-            "OPENAI_API_KEY environment variable is not set. "
-            "Please set it with your OpenAI API key."
-        )
-    return settings.openai_api_key
+def get_openai_api_key() -> str:
+    return _require_api_key("openai_api_key", "OPENAI_API_KEY")
 
 
 def get_gemini_api_key() -> str:
-    """Get Gemini API key from environment.
-
-    Returns:
-        Gemini API key string
-
-    Raises:
-        ValueError: If GEMINI_API_KEY is not set or empty
-    """
-    settings = get_settings()
-    if not settings.gemini_api_key:
-        raise ValueError(
-            "GEMINI_API_KEY environment variable is not set. "
-            "Please set it with your Gemini API key."
-        )
-    return settings.gemini_api_key
+    return _require_api_key("gemini_api_key", "GEMINI_API_KEY")
diff --git a/tests/integration/test_visual_description_pipeline.py b/tests/integration/test_visual_description_pipeline.py
index 37bb975..b75b54b 100644
--- a/tests/integration/test_visual_description_pipeline.py
+++ b/tests/integration/test_visual_description_pipeline.py
@@ -279,10 +279,10 @@ def test_visual_description_missing_api_key_fails(
         3. Inject DOWNLOAD_COMPLETE state
         4. POST /api/jobs/{job_id}/subtitle → 200
         5. Poll until failed (describe_video raises ValueError for missing key)
-        6. Assert error.code in {"invalid_input", "visual_description_failed"}
+        6. Assert error.code == "invalid_input"
         """
         # Step 1: remove the API key so describe_video raises ValueError
-        monkeypatch.delenv("GEMINI_API_KEY", raising=False)
+        monkeypatch.setenv("GEMINI_API_KEY", "")
         get_settings.cache_clear()
 
         app = create_app()
@@ -332,8 +332,7 @@ def test_visual_description_missing_api_key_fails(
         )
         error = status_data.get("error")
         assert error is not None, "Expected error detail in response"
-        # ValueError → "invalid_input"; VisualDescriptionError → "visual_description_failed"
-        assert error["code"] in {"invalid_input", "visual_description_failed"}, (
-            f"Expected error code 'invalid_input' or 'visual_description_failed', "
-            f"got {error['code']!r}"
+        # ValueError from missing API key → "invalid_input" via _ERROR_MAP
+        assert error["code"] == "invalid_input", (
+            f"Expected error code 'invalid_input', got {error['code']!r}"
         )
diff --git a/tests/unit/core/test_visual_describer.py b/tests/unit/core/test_visual_describer.py
index 76c3032..dac2520 100644
--- a/tests/unit/core/test_visual_describer.py
+++ b/tests/unit/core/test_visual_describer.py
@@ -38,6 +38,8 @@ def _setup_client(self, mock_genai, response_text: str) -> MagicMock:
         mock_genai.Client.return_value = mock_client
 
         mock_file = MagicMock()
+        mock_file.state = "ACTIVE"
+        mock_file.name = "files/test-file"
         mock_client.files.upload.return_value = mock_file
 
         mock_response = MagicMock()
@@ -127,7 +129,10 @@ def test_api_error_raises_visual_description_error(
         """Exception from generate_content is wrapped into VisualDescriptionError."""
         mock_client = MagicMock()
         mock_genai.Client.return_value = mock_client
-        mock_client.files.upload.return_value = MagicMock()
+        mock_file = MagicMock()
+        mock_file.state = "ACTIVE"
+        mock_file.name = "files/test-file"
+        mock_client.files.upload.return_value = mock_file
         mock_client.models.generate_content.side_effect = Exception("quota exceeded")
 
         video_path = tmp_path / "test.mp4"
@@ -151,7 +156,7 @@ def test_missing_api_key_raises_value_error(self, tmp_path, mock_genai):
                     "GEMINI_API_KEY environment variable is not set"
                 ),
             ),
-            pytest.raises(ValueError),
+            pytest.raises(ValueError, match="GEMINI_API_KEY"),
         ):
             describe_video(video_path, source_lang="en")
 

From 8c6e1d650d120c71c88f039d7c8f5d1a8fb9d2cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maple=EF=BC=81?= <mapleee723@gmail.com>
Date: Mon, 27 Apr 2026 22:32:09 +0800
Subject: [PATCH 4/5] fix: address round-2 review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Code review fixes:
- Remove duplicate [dependency-groups] section from pyproject.toml
- Validate processing_mode in upload route (prevent 500 on invalid input)
- Use ProcessingMode.SUBTITLE as Form default instead of magic string
- Change _genai-missing check to raise ValueError for correct error routing
- Guard client in finally block to prevent potential UnboundLocalError
- Refactor _require_api_key to accept value directly (type-safe, no getattr)
- Skip audio extraction for visual description mode (saves 30-60s)
- Align pre-commit mypy (v1.10→v1.19.1) and add google-genai to its
  additional_dependencies so both environments resolve the same types

Test review fixes:
- Add MM:SS and HH:MM:SS timestamp parsing tests
- Add start>=end boundary value test
- Add FAILED state and timeout path tests for _wait_for_active
- Verify describe→translate causal chain in IT

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .pre-commit-config.yaml                       |  3 +-
 pyproject.toml                                |  8 --
 src/bilingualsub/api/pipeline.py              |  3 +-
 src/bilingualsub/api/routes.py                | 12 ++-
 src/bilingualsub/core/visual_describer.py     | 13 +--
 src/bilingualsub/utils/config.py              | 11 +--
 .../test_visual_description_pipeline.py       |  6 ++
 tests/unit/core/test_visual_describer.py      | 92 +++++++++++++++++++
 uv.lock                                       | 16 ----
 9 files changed, 124 insertions(+), 40 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2feff72..a7affc2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
   # Mypy - Static type checking
   # ===========================================
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.0
+    rev: v1.19.1
     hooks:
       - id: mypy
         additional_dependencies:
@@ -53,6 +53,7 @@ repos:
           - fastapi>=0.115.0
           - structlog>=24.0.0
           - sse-starlette>=2.0.0
+          - google-genai>=1.0.0
         args: [--config-file=pyproject.toml]
         pass_filenames: false
         entry: bash -c 'mypy src/'
diff --git a/pyproject.toml b/pyproject.toml
index 3db8bf8..1f558cf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -200,11 +200,3 @@ sort_by_size = true
 targets = ["src/bilingualsub"]
 exclude_dirs = ["tests", ".venv"]
 skips = ["B101"]  # Skip assert warnings (used in tests)
-
-[dependency-groups]
-dev = [
-    "mypy>=1.19.1",
-    "pytest-asyncio>=1.3.0",
-    "pytest-cov>=7.0.0",
-    "ruff>=0.14.14",
-]
diff --git a/src/bilingualsub/api/pipeline.py b/src/bilingualsub/api/pipeline.py
index a656a97..55785a3 100644
--- a/src/bilingualsub/api/pipeline.py
+++ b/src/bilingualsub/api/pipeline.py
@@ -287,7 +287,8 @@ async def run_download(job: Job) -> None:
 
     try:
         video_path, metadata = await _acquire_video(job, work_dir, log)
-        await _extract_audio_step(job, video_path, work_dir, log)
+        if job.processing_mode != ProcessingMode.VISUAL_DESCRIPTION:
+            await _extract_audio_step(job, video_path, work_dir, log)
 
         # Save metadata for subtitle phase
         job.video_width = metadata.width
diff --git a/src/bilingualsub/api/routes.py b/src/bilingualsub/api/routes.py
index fa931b1..97fbf06 100644
--- a/src/bilingualsub/api/routes.py
+++ b/src/bilingualsub/api/routes.py
@@ -164,7 +164,7 @@ async def create_job_from_upload(
     target_lang: str = Form("zh-TW"),
     start_time: float | None = Form(None),
     end_time: float | None = Form(None),
-    processing_mode: str = Form("subtitle"),
+    processing_mode: str = Form(ProcessingMode.SUBTITLE),
     *,
     request: Request,
 ) -> JobCreateResponse:
@@ -195,6 +195,14 @@ async def create_job_from_upload(
                 )
             buf.write(chunk)
 
+    try:
+        mode = ProcessingMode(processing_mode)
+    except ValueError as err:
+        raise InvalidRequestError(
+            "Invalid processing_mode",
+            detail=f"Must be one of: {', '.join(ProcessingMode)}",
+        ) from err
+
     manager = _get_job_manager(request)
     job = manager.create_job(
         source_lang=source_lang,
@@ -202,7 +210,7 @@ async def create_job_from_upload(
         start_time=start_time,
         end_time=end_time,
         local_video_path=saved_path,
-        processing_mode=ProcessingMode(processing_mode),
+        processing_mode=mode,
     )
     _start_background_task(request, run_download(job))
     return JobCreateResponse(job_id=job.id)
diff --git a/src/bilingualsub/core/visual_describer.py b/src/bilingualsub/core/visual_describer.py
index 77c1a22..dde2901 100644
--- a/src/bilingualsub/core/visual_describer.py
+++ b/src/bilingualsub/core/visual_describer.py
@@ -8,16 +8,16 @@
 from datetime import timedelta
 from typing import TYPE_CHECKING, Any
 
-from bilingualsub.core.subtitle import Subtitle, SubtitleEntry
-from bilingualsub.utils.config import get_gemini_api_key, get_settings
-
 if TYPE_CHECKING:
     from pathlib import Path
 
+from bilingualsub.core.subtitle import Subtitle, SubtitleEntry
+from bilingualsub.utils.config import get_gemini_api_key, get_settings
+
 try:
     from google import genai as _genai
 except ImportError:
-    _genai = None
+    _genai = None  # type: ignore[assignment]
 
 _FILE_PROCESSING_TIMEOUT = 600
 
@@ -124,7 +124,7 @@ def describe_video(
         raise ValueError(f"Video file not found: {video_path}")
 
     if _genai is None:
-        raise VisualDescriptionError(
+        raise ValueError(
             "google-genai package is not installed. Run: uv add google-genai"
         )
 
@@ -137,6 +137,7 @@ def describe_video(
     else:
         prompt += "\n\nOutput in the video's original language."
 
+    client = None
     uploaded_file = None
     try:
         client = _genai.Client(api_key=api_key)
@@ -152,7 +153,7 @@ def describe_video(
     except Exception as exc:
         raise VisualDescriptionError(f"Gemini API call failed: {exc}") from exc
     finally:
-        if uploaded_file and uploaded_file.name:
+        if client and uploaded_file and uploaded_file.name:
             with contextlib.suppress(Exception):
                 client.files.delete(name=uploaded_file.name)
 
diff --git a/src/bilingualsub/utils/config.py b/src/bilingualsub/utils/config.py
index ca0b3f0..5911b39 100644
--- a/src/bilingualsub/utils/config.py
+++ b/src/bilingualsub/utils/config.py
@@ -48,23 +48,22 @@ def get_settings() -> Settings:
     return Settings()
 
 
-def _require_api_key(attr: str, env_var: str) -> str:
-    value = getattr(get_settings(), attr)
+def _require_api_key(value: str, env_var: str) -> str:
     if not value:
         raise ValueError(
             f"{env_var} environment variable is not set. "
             f"Please set it with your {env_var} key."
         )
-    return str(value)
+    return value
 
 
 def get_groq_api_key() -> str:
-    return _require_api_key("groq_api_key", "GROQ_API_KEY")
+    return _require_api_key(get_settings().groq_api_key, "GROQ_API_KEY")
 
 
 def get_openai_api_key() -> str:
-    return _require_api_key("openai_api_key", "OPENAI_API_KEY")
+    return _require_api_key(get_settings().openai_api_key, "OPENAI_API_KEY")
 
 
 def get_gemini_api_key() -> str:
-    return _require_api_key("gemini_api_key", "GEMINI_API_KEY")
+    return _require_api_key(get_settings().gemini_api_key, "GEMINI_API_KEY")
diff --git a/tests/integration/test_visual_description_pipeline.py b/tests/integration/test_visual_description_pipeline.py
index b75b54b..adc1ecc 100644
--- a/tests/integration/test_visual_description_pipeline.py
+++ b/tests/integration/test_visual_description_pipeline.py
@@ -179,6 +179,12 @@ def test_visual_description_journey_produces_srt(
                 # Step 5: poll for completion
                 status_data = _poll_until_terminal(client, job_id)
 
+                # Verify describe→translate causal chain
+                mock_describe.assert_called_once()
+                mock_translate.assert_called_once()
+                translate_first_arg = mock_translate.call_args.args[0]
+                assert translate_first_arg.entries[0].text == "Product showcase"
+
             # Step 6: assertions
             assert status_data["status"] == "completed", (
                 f"Expected completed, got {status_data['status']!r}. "
diff --git a/tests/unit/core/test_visual_describer.py b/tests/unit/core/test_visual_describer.py
index dac2520..43c0b55 100644
--- a/tests/unit/core/test_visual_describer.py
+++ b/tests/unit/core/test_visual_describer.py
@@ -189,3 +189,95 @@ def test_malformed_lines_are_skipped(
         assert len(result.entries) == 2
         assert result.entries[0].text == "Valid first entry"
         assert result.entries[1].text == "Valid second entry"
+
+    def test_mixed_timestamp_formats_parsed_correctly(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """MM:SS and HH:MM:SS formats are both parsed correctly."""
+        response_text = (
+            "01:00 - 01:30 | Scene with minutes\n"
+            "01:00:00 - 01:00:10 | Scene with hours\n"
+        )
+        self._setup_client(mock_genai, response_text)
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        result = describe_video(video_path, source_lang="en")
+
+        assert len(result.entries) == 2
+        # MM:SS: 01:00 = 60 seconds, 01:30 = 90 seconds
+        assert result.entries[0].start == timedelta(minutes=1, seconds=0)
+        assert result.entries[0].end == timedelta(minutes=1, seconds=30)
+        # HH:MM:SS: 01:00:00 = 1 hour, 01:00:10 = 1 hour 10 seconds
+        assert result.entries[1].start == timedelta(hours=1)
+        assert result.entries[1].end == timedelta(hours=1, seconds=10)
+
+    def test_reversed_and_equal_timestamps_are_skipped(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """Entries where start >= end are silently skipped."""
+        response_text = (
+            "00:10 - 00:05 | Reversed timestamps\n"
+            "00:05 - 00:05 | Equal timestamps\n"
+            "00:00 - 00:10 | Valid entry\n"
+        )
+        self._setup_client(mock_genai, response_text)
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        result = describe_video(video_path, source_lang="en")
+
+        assert len(result.entries) == 1
+        assert result.entries[0].text == "Valid entry"
+
+    def test_file_state_failed_raises_error(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """Gemini file in FAILED state raises VisualDescriptionError."""
+        mock_client = MagicMock()
+        mock_genai.Client.return_value = mock_client
+
+        mock_file = MagicMock()
+        mock_file.state = "FAILED"
+        mock_file.name = "files/test-file"
+        mock_client.files.upload.return_value = mock_file
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        with pytest.raises(
+            VisualDescriptionError,
+            match="File processing failed on Gemini servers",
+        ):
+            describe_video(video_path, source_lang="en")
+
+    def test_file_processing_timeout_raises_error(
+        self, tmp_path, mock_genai, mock_get_gemini_api_key
+    ):
+        """File stuck in PROCESSING state past timeout raises error."""
+        mock_client = MagicMock()
+        mock_genai.Client.return_value = mock_client
+
+        mock_file = MagicMock()
+        mock_file.state = "PROCESSING"
+        mock_file.name = "files/test-file"
+        mock_client.files.upload.return_value = mock_file
+        # files.get always returns PROCESSING
+        mock_client.files.get.return_value = mock_file
+
+        video_path = tmp_path / "test.mp4"
+        video_path.write_bytes(b"fake video content")
+
+        with (
+            patch("bilingualsub.core.visual_describer.time") as mock_time,
+            pytest.raises(
+                VisualDescriptionError,
+                match="File processing timed out",
+            ),
+        ):
+            # First call to monotonic() sets deadline, second exceeds it
+            mock_time.monotonic.side_effect = [0.0, 601.0]
+            mock_time.sleep = MagicMock()
+            describe_video(video_path, source_lang="en")
diff --git a/uv.lock b/uv.lock
index c9b6477..c39dbe2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -115,14 +115,6 @@ e2e = [
     { name = "pytest-playwright" },
 ]
 
-[package.dev-dependencies]
-dev = [
-    { name = "mypy" },
-    { name = "pytest-asyncio" },
-    { name = "pytest-cov" },
-    { name = "ruff" },
-]
-
 [package.metadata]
 requires-dist = [
     { name = "agno", specifier = ">=1.0.0" },
@@ -158,14 +150,6 @@ requires-dist = [
 ]
 provides-extras = ["dev", "e2e"]
 
-[package.metadata.requires-dev]
-dev = [
-    { name = "mypy", specifier = ">=1.19.1" },
-    { name = "pytest-asyncio", specifier = ">=1.3.0" },
-    { name = "pytest-cov", specifier = ">=7.0.0" },
-    { name = "ruff", specifier = ">=0.14.14" },
-]
-
 [[package]]
 name = "boolean-py"
 version = "5.0"

From 8b52c37c0ae231926c1d0d9e766f2b37b6dc659d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maple=EF=BC=81?= <mapleee723@gmail.com>
Date: Tue, 28 Apr 2026 11:37:31 +0800
Subject: [PATCH 5/5] fix: address round-3 and round-4 review findings

Round-3: early-fail on missing file name, audio extraction guard for
mode switch, validation before file upload, error message fixes,
frontend ProcessingMode type alias, upload FormData forwards mode.

Round-4: ProgressTracker visual description label, DownloadLinks hides
ASS/Audio in visual mode, startSubtitle forwards processingMode,
consolidate get_settings calls, update docs to Gemini 3.1 Flash Lite.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/arch/visual-description-mode.md        |  4 +-
 docs/design/visual-description-mode.md      |  6 +--
 frontend/src/App.tsx                        | 26 +++++++----
 frontend/src/api/client.ts                  |  7 ++-
 frontend/src/components/DownloadLinks.tsx   | 16 +++++--
 frontend/src/components/ProgressTracker.tsx |  4 +-
 frontend/src/constants.ts                   |  1 +
 frontend/src/hooks/useJob.ts                | 21 ++++++---
 frontend/src/i18n/en.json                   |  4 +-
 frontend/src/i18n/zh-TW.json                |  4 +-
 frontend/src/types.ts                       |  6 ++-
 src/bilingualsub/api/pipeline.py            |  6 +++
 src/bilingualsub/api/routes.py              | 16 +++----
 src/bilingualsub/core/visual_describer.py   | 13 +++---
 src/bilingualsub/utils/config.py            |  7 ++-
 tests/unit/core/test_visual_describer.py    | 48 ++++++++++-----------
 16 files changed, 115 insertions(+), 74 deletions(-)

diff --git a/docs/arch/visual-description-mode.md b/docs/arch/visual-description-mode.md
index 8630446..4313c4a 100644
--- a/docs/arch/visual-description-mode.md
+++ b/docs/arch/visual-description-mode.md
@@ -2,7 +2,7 @@
 
 ## 概述
 
-在現有 download → subtitle → burn 三階段管線上，新增一條平行的字幕生成路徑：當使用者選擇「視覺描述」模式時，subtitle phase 以 `describe_video()` 取代 `transcribe_audio()`。Gemini 2.5 Flash 直接讀取影片檔（`FileType.SOURCE_VIDEO`）並回傳帶時間戳的畫面描述，再由現有 `translate_subtitle()` 翻譯成目標語言。因為視覺描述不存在「原文字幕」概念，merge 步驟跳過，只序列化目標語言 SRT。
+在現有 download → subtitle → burn 三階段管線上，新增一條平行的字幕生成路徑：當使用者選擇「視覺描述」模式時，subtitle phase 以 `describe_video()` 取代 `transcribe_audio()`。Gemini 3.1 Flash Lite Preview 直接讀取影片檔（`FileType.SOURCE_VIDEO`）並回傳帶時間戳的畫面描述，再由現有 `translate_subtitle()` 翻譯成目標語言。因為視覺描述不存在「原文字幕」概念，merge 步驟跳過，只序列化目標語言 SRT。
 
 ## Files to Create / Modify
 
@@ -52,7 +52,7 @@ def describe_video(
     *,
     source_lang: str = "en",
 ) -> Subtitle:
-    """Analyze video frames with Gemini 2.5 Flash and return timestamped descriptions.
+    """Analyze video frames with Gemini 3.1 Flash Lite Preview and return timestamped descriptions.
 
     Raises:
         VisualDescriptionError: If Gemini API fails or no segments can be parsed.
diff --git a/docs/design/visual-description-mode.md b/docs/design/visual-description-mode.md
index 8b677b7..0657d5a 100644
--- a/docs/design/visual-description-mode.md
+++ b/docs/design/visual-description-mode.md
@@ -21,7 +21,7 @@
 - **手動切換而非自動偵測** → 自動偵測需要先跑 Whisper 才能判斷有無語音，浪費時間且判斷邊界模糊（幾句話算「有語音」？）。手動切換讓使用者掌控意圖，流程更直覺。
 - **只產出翻譯後的單語字幕** → 視覺描述的「原文」是畫面內容而非語言文字，雙語對照在此場景沒有意義。
 - **第一版不做混合模式** → 混合模式需要時間軸對齊和內容類型判斷，複雜度高。先做純模式，驗證價值後再擴展。
-- **使用 Gemini 2.5 Flash** → 目前唯一支援原生影片輸入的主流模型，可直接吃整段影片（最長 90 分鐘），同時處理視覺和音訊，不需自行抽 frame。成本低、速度快，適合生產環境。
+- **使用 Gemini 3.1 Flash Lite Preview** → 目前唯一支援原生影片輸入的主流模型，可直接吃整段影片（最長 90 分鐘），同時處理視覺和音訊，不需自行抽 frame。成本低、速度快，適合生產環境。
 
 ## User Journey
 
@@ -34,7 +34,7 @@
    → 頁面提示文字變更，說明此模式會分析畫面內容而非語音
 3. 使用者貼入影片 URL，選擇目標語言，點擊「開始處理」
    → 系統開始下載影片
-4. 下載完成後，系統將影片送入 Gemini 2.5 Flash 分析
+4. 下載完成後，系統將影片送入 Gemini 3.1 Flash Lite Preview 分析
    → 進度條顯示「分析畫面內容中...」
 5. Gemini 回傳帶時間戳的畫面描述（英文或原始語言）
    → 系統將描述翻譯成目標語言
@@ -81,7 +81,7 @@
 
 ## 整合點
 
-- **Gemini API**：新增 `GEMINI_API_KEY` 環境變數，透過 Google AI SDK 呼叫 Gemini 2.5 Flash
+- **Gemini API**：新增 `GEMINI_API_KEY` 環境變數，透過 Google AI SDK 呼叫 Gemini 3.1 Flash Lite Preview
 - **現有 Pipeline**：視覺描述模式複用現有的 download → translate → merge → burn 步驟，僅將 transcribe 步驟替換為 Gemini 視覺分析
 - **前端狀態**：`useJob` hook 需支援新的模式參數，Toggle 狀態影響 API 請求的 payload
 - **翻譯模組**：視覺描述的翻譯複用現有的 translator，輸入格式與語音轉錄的字幕條目相同
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 81cc405..6822d5d 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -120,7 +120,9 @@ function App() {
                 </div>
               </div>
               <button
-                onClick={() => subtitleJob(sourceLang, targetLang)}
+                onClick={() =>
+                  subtitleJob(sourceLang, targetLang, state.processingMode ?? undefined)
+                }
                 className="px-8 py-3 bg-black text-white rounded-full hover:scale-105 transition-transform"
               >
                 {t('app.generate_subtitles')}
@@ -133,13 +135,15 @@ function App() {
                 >
                   {t('app.download_original_video')}
                 </a>
-                <a
-                  href={apiClient.getDownloadUrl(state.jobId!, FileType.AUDIO)}
-                  download
-                  className="text-sm text-gray-400 hover:text-black transition-colors"
-                >
-                  {t('app.download_audio')}
-                </a>
+                {state.processingMode !== 'visual_description' && (
+                  <a
+                    href={apiClient.getDownloadUrl(state.jobId!, FileType.AUDIO)}
+                    download
+                    className="text-sm text-gray-400 hover:text-black transition-colors"
+                  >
+                    {t('app.download_audio')}
+                  </a>
+                )}
                 <button
                   onClick={reset}
                   className="text-sm text-gray-400 hover:text-black transition-colors"
@@ -272,7 +276,11 @@ function App() {
               <div className="lg:col-span-1 space-y-8">
                 <div>
                   <h3 className="text-3xl font-serif mb-6">{t('app.downloads_title')}</h3>
-                  <DownloadLinks jobId={state.jobId!} showVideo={true} />
+                  <DownloadLinks
+                    jobId={state.jobId!}
+                    showVideo={true}
+                    processingMode={state.processingMode}
+                  />
                 </div>
                 <button
                   onClick={backToEdit}
diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts
index 1fac7d1..3b553c0 100644
--- a/frontend/src/api/client.ts
+++ b/frontend/src/api/client.ts
@@ -38,6 +38,7 @@ class ApiClient {
     if (request.target_lang) formData.append('target_lang', request.target_lang);
     if (request.start_time !== undefined) formData.append('start_time', String(request.start_time));
     if (request.end_time !== undefined) formData.append('end_time', String(request.end_time));
+    if (request.processing_mode) formData.append('processing_mode', request.processing_mode);
 
     const response = await fetch(`${this.baseUrl}/api/jobs/upload`, {
       method: 'POST',
@@ -105,11 +106,13 @@ class ApiClient {
   async startSubtitle(
     jobId: string,
     sourceLang?: string,
-    targetLang?: string
+    targetLang?: string,
+    processingMode?: string
   ): Promise<{ status: string }> {
-    const payload: { source_lang?: string; target_lang?: string } = {};
+    const payload: { source_lang?: string; target_lang?: string; processing_mode?: string } = {};
     if (sourceLang) payload.source_lang = sourceLang;
     if (targetLang) payload.target_lang = targetLang;
+    if (processingMode) payload.processing_mode = processingMode;
 
     const response = await fetch(`${this.baseUrl}/api/jobs/${jobId}/subtitle`, {
       method: 'POST',
diff --git a/frontend/src/components/DownloadLinks.tsx b/frontend/src/components/DownloadLinks.tsx
index 23db990..b4ea8f2 100644
--- a/frontend/src/components/DownloadLinks.tsx
+++ b/frontend/src/components/DownloadLinks.tsx
@@ -4,10 +4,12 @@ import { FileType } from '../constants';
 import { apiClient } from '../api/client';
 import { DisclaimerDialog } from './DisclaimerDialog';
 import { triggerDownload } from '../utils/download';
+import type { ProcessingMode } from '../types';
 
 interface DownloadLinksProps {
   jobId: string;
   showVideo?: boolean;
+  processingMode?: ProcessingMode | null;
 }
 
 const FILE_OPTIONS = [
@@ -17,12 +19,20 @@ const FILE_OPTIONS = [
   { type: FileType.AUDIO, labelKey: 'download.audio' },
 ] as const;
 
-export function DownloadLinks({ jobId, showVideo }: DownloadLinksProps) {
+export function DownloadLinks({ jobId, showVideo, processingMode }: DownloadLinksProps) {
   const { t } = useTranslation();
   const [pendingUrl, setPendingUrl] = useState<string | null>(null);
 
-  const visibleOptions =
-    showVideo === false ? FILE_OPTIONS.filter(opt => opt.type !== FileType.VIDEO) : FILE_OPTIONS;
+  let visibleOptions =
+    showVideo === false
+      ? FILE_OPTIONS.filter(opt => opt.type !== FileType.VIDEO)
+      : [...FILE_OPTIONS];
+
+  if (processingMode === 'visual_description') {
+    visibleOptions = visibleOptions.filter(
+      opt => opt.type !== FileType.ASS && opt.type !== FileType.AUDIO
+    );
+  }
 
   return (
     <>
diff --git a/frontend/src/components/ProgressTracker.tsx b/frontend/src/components/ProgressTracker.tsx
index 4a41fbe..c8f4e85 100644
--- a/frontend/src/components/ProgressTracker.tsx
+++ b/frontend/src/components/ProgressTracker.tsx
@@ -59,7 +59,9 @@ export function ProgressTracker({
           {': '}
           {subtitleSource === SubtitleSource.YOUTUBE_MANUAL
             ? t('progress.subtitleSourceYoutube')
-            : t('progress.subtitleSourceWhisper')}
+            : subtitleSource === SubtitleSource.VISUAL_DESCRIPTION
+              ? t('progress.subtitleSourceVisual')
+              : t('progress.subtitleSourceWhisper')}
         </p>
       )}
     </div>
diff --git a/frontend/src/constants.ts b/frontend/src/constants.ts
index f90ddfc..8c180fe 100644
--- a/frontend/src/constants.ts
+++ b/frontend/src/constants.ts
@@ -46,5 +46,6 @@ export const PIPELINE_STEPS = [
 export const SubtitleSource = {
   WHISPER: 'whisper',
   YOUTUBE_MANUAL: 'youtube_manual',
+  VISUAL_DESCRIPTION: 'visual_description',
 } as const;
 export type SubtitleSource = (typeof SubtitleSource)[keyof typeof SubtitleSource];
diff --git a/frontend/src/hooks/useJob.ts b/frontend/src/hooks/useJob.ts
index 3853d54..15c9e5b 100644
--- a/frontend/src/hooks/useJob.ts
+++ b/frontend/src/hooks/useJob.ts
@@ -1,6 +1,6 @@
 import { useCallback, useReducer, useRef } from 'react';
 import { JobStatus } from '../constants';
-import type { JobCreateRequest, JobUploadRequest, SSEProgressData } from '../types';
+import type { JobCreateRequest, JobUploadRequest, ProcessingMode, SSEProgressData } from '../types';
 import { apiClient } from '../api/client';
 
 // State type
@@ -21,12 +21,13 @@ interface JobState {
   progress: number;
   currentStep: string | null;
   subtitleSource: string | null;
+  processingMode: ProcessingMode | null;
   error: { code: string; message: string; detail?: string } | null;
 }
 
 // Action types
 type JobAction =
-  | { type: 'SUBMIT'; sourceUrl: string | null }
+  | { type: 'SUBMIT'; sourceUrl: string | null; processingMode: ProcessingMode | null }
   | { type: 'JOB_CREATED'; jobId: string }
   | { type: 'PROGRESS'; data: SSEProgressData }
   | { type: 'DOWNLOAD_COMPLETE' }
@@ -48,13 +49,19 @@ const initialState: JobState = {
   progress: 0,
   currentStep: null,
   subtitleSource: null,
+  processingMode: null,
   error: null,
 };
 
 function jobReducer(state: JobState, action: JobAction): JobState {
   switch (action.type) {
     case 'SUBMIT':
-      return { ...initialState, phase: 'submitting', sourceUrl: action.sourceUrl };
+      return {
+        ...initialState,
+        phase: 'submitting',
+        sourceUrl: action.sourceUrl,
+        processingMode: action.processingMode,
+      };
     case 'JOB_CREATED':
       return { ...state, phase: 'processing', jobId: action.jobId };
     case 'PROGRESS':
@@ -130,7 +137,9 @@ export function useJob() {
     async (request: JobCreateRequest | JobUploadRequest) => {
       cleanup();
       const sourceUrl = 'source_url' in request ? request.source_url : null;
-      dispatch({ type: 'SUBMIT', sourceUrl });
+      const processingMode =
+        'processing_mode' in request ? (request.processing_mode ?? null) : null;
+      dispatch({ type: 'SUBMIT', sourceUrl, processingMode });
 
       try {
         const response =
@@ -167,11 +176,11 @@ export function useJob() {
   );
 
   const subtitleJob = useCallback(
-    async (sourceLang?: string, targetLang?: string) => {
+    async (sourceLang?: string, targetLang?: string, processingMode?: string) => {
       if (!state.jobId) return;
       dispatch({ type: 'SUBTITLE_START' });
       try {
-        await apiClient.startSubtitle(state.jobId, sourceLang, targetLang);
+        await apiClient.startSubtitle(state.jobId, sourceLang, targetLang, processingMode);
         // Reconnect SSE if the previous connection was closed
         if (!eventSourceRef.current || eventSourceRef.current.readyState === EventSource.CLOSED) {
           eventSourceRef.current = apiClient.connectSSE(state.jobId, {
diff --git a/frontend/src/i18n/en.json b/frontend/src/i18n/en.json
index 89d20bd..22be9a9 100644
--- a/frontend/src/i18n/en.json
+++ b/frontend/src/i18n/en.json
@@ -59,8 +59,8 @@
     "subtitleSource": "Source",
     "subtitleSourceYoutube": "YouTube (manual)",
     "subtitleSourceWhisper": "Whisper",
-    "nonYoutubeHint": "Non-YouTube platforms may take longer to download",
-    "describing": "Analyzing visual content..."
+    "subtitleSourceVisual": "Visual Description",
+    "nonYoutubeHint": "Non-YouTube platforms may take longer to download"
   },
   "download": {
     "title": "Download Results",
diff --git a/frontend/src/i18n/zh-TW.json b/frontend/src/i18n/zh-TW.json
index 86a3e7d..97afeb7 100644
--- a/frontend/src/i18n/zh-TW.json
+++ b/frontend/src/i18n/zh-TW.json
@@ -59,8 +59,8 @@
     "subtitleSource": "字幕來源",
     "subtitleSourceYoutube": "YouTube（手動上傳）",
     "subtitleSourceWhisper": "Whisper 語音辨識",
-    "nonYoutubeHint": "非 YouTube 平台的影片下載可能較慢",
-    "describing": "分析畫面內容中..."
+    "subtitleSourceVisual": "視覺描述",
+    "nonYoutubeHint": "非 YouTube 平台的影片下載可能較慢"
   },
   "download": {
     "title": "下載結果",
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index 32d146d..ce6d2e8 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -1,12 +1,14 @@
 import type { FileType, JobStatus } from './constants';
 
+export type ProcessingMode = 'subtitle' | 'visual_description';
+
 export interface JobCreateRequest {
   source_url: string;
   source_lang?: string;
   target_lang?: string;
   start_time?: number; // seconds
   end_time?: number; // seconds
-  processing_mode?: 'subtitle' | 'visual_description';
+  processing_mode?: ProcessingMode;
 }
 
 export interface JobCreateResponse {
@@ -49,7 +51,7 @@ export interface JobUploadRequest {
   target_lang?: string;
   start_time?: number;
   end_time?: number;
-  processing_mode?: 'subtitle' | 'visual_description';
+  processing_mode?: ProcessingMode;
 }
 
 export interface RetranslateEntryPayload {
diff --git a/src/bilingualsub/api/pipeline.py b/src/bilingualsub/api/pipeline.py
index 55785a3..e833c37 100644
--- a/src/bilingualsub/api/pipeline.py
+++ b/src/bilingualsub/api/pipeline.py
@@ -439,6 +439,12 @@ async def run_subtitle(job: Job) -> None:
         return
 
     try:
+        if FileType.AUDIO not in job.output_files:
+            video_path = job.output_files.get(FileType.SOURCE_VIDEO)
+            if not video_path:
+                raise PipelineError("pipeline_failed", "Source video not found")
+            await _extract_audio_step(job, video_path, video_path.parent, log)
+
         audio_path = job.output_files[FileType.AUDIO]
         work_dir = audio_path.parent
 
diff --git a/src/bilingualsub/api/routes.py b/src/bilingualsub/api/routes.py
index 97fbf06..09c0306 100644
--- a/src/bilingualsub/api/routes.py
+++ b/src/bilingualsub/api/routes.py
@@ -179,6 +179,14 @@ async def create_job_from_upload(
 
     safe_name = Path(filename).name or f"upload{suffix}"
 
+    try:
+        mode = ProcessingMode(processing_mode)
+    except ValueError as err:
+        raise InvalidRequestError(
+            "Invalid processing_mode",
+            detail=f"Must be one of: {', '.join(ProcessingMode)}",
+        ) from err
+
     max_size = _MAX_UPLOAD_BYTES
     tmp_dir = Path(tempfile.mkdtemp(prefix="bilingualsub_upload_"))
     saved_path = tmp_dir / safe_name
@@ -195,14 +203,6 @@ async def create_job_from_upload(
                 )
             buf.write(chunk)
 
-    try:
-        mode = ProcessingMode(processing_mode)
-    except ValueError as err:
-        raise InvalidRequestError(
-            "Invalid processing_mode",
-            detail=f"Must be one of: {', '.join(ProcessingMode)}",
-        ) from err
-
     manager = _get_job_manager(request)
     job = manager.create_job(
         source_lang=source_lang,
diff --git a/src/bilingualsub/core/visual_describer.py b/src/bilingualsub/core/visual_describer.py
index dde2901..a5b1364 100644
--- a/src/bilingualsub/core/visual_describer.py
+++ b/src/bilingualsub/core/visual_describer.py
@@ -12,7 +12,7 @@
     from pathlib import Path
 
 from bilingualsub.core.subtitle import Subtitle, SubtitleEntry
-from bilingualsub.utils.config import get_gemini_api_key, get_settings
+from bilingualsub.utils.config import _require_api_key, get_settings
 
 try:
     from google import genai as _genai
@@ -72,10 +72,13 @@ def _wait_for_active(client: Any, uploaded_file: Any) -> Any:
     deadline = time.monotonic() + _FILE_PROCESSING_TIMEOUT
     while uploaded_file.state == "PROCESSING":
         if time.monotonic() >= deadline:
-            raise VisualDescriptionError("File processing timed out after 600 seconds")
+            raise VisualDescriptionError(
+                f"File processing timed out after {_FILE_PROCESSING_TIMEOUT} seconds"
+            )
         time.sleep(2)
-        file_name = uploaded_file.name or ""
-        uploaded_file = client.files.get(name=file_name)
+        if not uploaded_file.name:
+            raise VisualDescriptionError("Uploaded file has no name identifier")
+        uploaded_file = client.files.get(name=uploaded_file.name)
 
     if uploaded_file.state == "FAILED":
         raise VisualDescriptionError("File processing failed on Gemini servers")
@@ -128,8 +131,8 @@ def describe_video(
             "google-genai package is not installed. Run: uv add google-genai"
         )
 
-    api_key = get_gemini_api_key()
     settings = get_settings()
+    api_key = _require_api_key(settings.gemini_api_key, "GEMINI_API_KEY")
 
     prompt = DESCRIBE_PROMPT
     if source_lang and source_lang != "auto":
diff --git a/src/bilingualsub/utils/config.py b/src/bilingualsub/utils/config.py
index 5911b39..8fffec1 100644
--- a/src/bilingualsub/utils/config.py
+++ b/src/bilingualsub/utils/config.py
@@ -14,6 +14,8 @@ class Settings(BaseSettings):
         transcriber_provider: Whisper provider ("groq" or "openai")
         transcriber_model: Whisper model name
         translator_model: Agno model string (e.g. "ollama:model_id", "groq:model_id")
+        gemini_api_key: API key for Google Gemini visual description
+        visual_description_model: Gemini model name for visual description
     """
 
     groq_api_key: str = ""
@@ -50,10 +52,7 @@ def get_settings() -> Settings:
 
 def _require_api_key(value: str, env_var: str) -> str:
     if not value:
-        raise ValueError(
-            f"{env_var} environment variable is not set. "
-            f"Please set it with your {env_var} key."
-        )
+        raise ValueError(f"{env_var} environment variable is not set.")
     return value
 
 
diff --git a/tests/unit/core/test_visual_describer.py b/tests/unit/core/test_visual_describer.py
index 43c0b55..7be1616 100644
--- a/tests/unit/core/test_visual_describer.py
+++ b/tests/unit/core/test_visual_describer.py
@@ -20,12 +20,15 @@ def mock_genai(self):
             yield mock
 
     @pytest.fixture
-    def mock_get_gemini_api_key(self):
-        """Mock get_gemini_api_key to return a fixed key."""
+    def mock_settings(self):
+        """Mock get_settings to return test configuration."""
+        mock = MagicMock()
+        mock.gemini_api_key = "fake-gemini-key"  # pragma: allowlist secret
+        mock.visual_description_model = "test-model"
         with patch(
-            "bilingualsub.core.visual_describer.get_gemini_api_key",
-            return_value="fake-gemini-key",
-        ) as mock:
+            "bilingualsub.core.visual_describer.get_settings",
+            return_value=mock,
+        ):
             yield mock
 
     # ------------------------------------------------------------------
@@ -53,7 +56,7 @@ def _setup_client(self, mock_genai, response_text: str) -> MagicMock:
     # ------------------------------------------------------------------
 
     def test_valid_response_parses_to_subtitle(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
+        self, tmp_path, mock_genai, mock_settings
     ):
         """Three well-formed lines produce a Subtitle with 3 entries."""
         response_text = (
@@ -91,9 +94,7 @@ def test_valid_response_parses_to_subtitle(
         assert result.entries[2].start == timedelta(seconds=15)
         assert result.entries[2].end == timedelta(seconds=30)
 
-    def test_no_segments_raises_error(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
-    ):
+    def test_no_segments_raises_error(self, tmp_path, mock_genai, mock_settings):
         """Empty response text must raise VisualDescriptionError."""
         self._setup_client(mock_genai, "")
 
@@ -107,7 +108,7 @@ def test_no_segments_raises_error(
             describe_video(video_path, source_lang="en")
 
     def test_no_segments_unparseable_content_raises_error(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
+        self, tmp_path, mock_genai, mock_settings
     ):
         """Response with only unparseable lines must raise VisualDescriptionError."""
         self._setup_client(
@@ -124,7 +125,7 @@ def test_no_segments_unparseable_content_raises_error(
             describe_video(video_path, source_lang="en")
 
     def test_api_error_raises_visual_description_error(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
+        self, tmp_path, mock_genai, mock_settings
     ):
         """Exception from generate_content is wrapped into VisualDescriptionError."""
         mock_client = MagicMock()
@@ -145,16 +146,17 @@ def test_api_error_raises_visual_description_error(
             describe_video(video_path, source_lang="en")
 
     def test_missing_api_key_raises_value_error(self, tmp_path, mock_genai):
-        """ValueError from get_gemini_api_key propagates unchanged."""
+        """ValueError from _require_api_key propagates unchanged."""
         video_path = tmp_path / "test.mp4"
         video_path.write_bytes(b"fake video content")
 
+        mock = MagicMock()
+        mock.gemini_api_key = ""
+        mock.visual_description_model = "test-model"
         with (
             patch(
-                "bilingualsub.core.visual_describer.get_gemini_api_key",
-                side_effect=ValueError(
-                    "GEMINI_API_KEY environment variable is not set"
-                ),
+                "bilingualsub.core.visual_describer.get_settings",
+                return_value=mock,
             ),
             pytest.raises(ValueError, match="GEMINI_API_KEY"),
         ):
@@ -167,9 +169,7 @@ def test_file_not_exists_raises_value_error(self, tmp_path):
         with pytest.raises(ValueError, match="Video file not found"):
             describe_video(video_path, source_lang="en")
 
-    def test_malformed_lines_are_skipped(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
-    ):
+    def test_malformed_lines_are_skipped(self, tmp_path, mock_genai, mock_settings):
         """Lines that don't match the timestamp pattern are silently ignored."""
         response_text = (
             "00:00 - 00:10 | Valid first entry\n"
@@ -191,7 +191,7 @@ def test_malformed_lines_are_skipped(
         assert result.entries[1].text == "Valid second entry"
 
     def test_mixed_timestamp_formats_parsed_correctly(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
+        self, tmp_path, mock_genai, mock_settings
     ):
         """MM:SS and HH:MM:SS formats are both parsed correctly."""
         response_text = (
@@ -214,7 +214,7 @@ def test_mixed_timestamp_formats_parsed_correctly(
         assert result.entries[1].end == timedelta(hours=1, seconds=10)
 
     def test_reversed_and_equal_timestamps_are_skipped(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
+        self, tmp_path, mock_genai, mock_settings
     ):
         """Entries where start >= end are silently skipped."""
         response_text = (
@@ -232,9 +232,7 @@ def test_reversed_and_equal_timestamps_are_skipped(
         assert len(result.entries) == 1
         assert result.entries[0].text == "Valid entry"
 
-    def test_file_state_failed_raises_error(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
-    ):
+    def test_file_state_failed_raises_error(self, tmp_path, mock_genai, mock_settings):
         """Gemini file in FAILED state raises VisualDescriptionError."""
         mock_client = MagicMock()
         mock_genai.Client.return_value = mock_client
@@ -254,7 +252,7 @@ def test_file_state_failed_raises_error(
             describe_video(video_path, source_lang="en")
 
     def test_file_processing_timeout_raises_error(
-        self, tmp_path, mock_genai, mock_get_gemini_api_key
+        self, tmp_path, mock_genai, mock_settings
     ):
         """File stuck in PROCESSING state past timeout raises error."""
         mock_client = MagicMock()