From d4c71adfe248baceacf4339d7ba7292c5bf14d56 Mon Sep 17 00:00:00 2001
From: grider-transwithai <grider@transwith.ai>
Date: Fri, 26 Dec 2025 14:27:27 +0000
Subject: [PATCH 01/25] fix: Pin PyInstaller to 6.16.0 to avoid conda hook
 regression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PyInstaller 6.17.0 has a KeyError: 'depends' regression in some conda
environments. Pin to 6.16.0 until the issue is resolved.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 environment-cuda118.yml | 3 ++-
 environment-cuda122.yml | 3 ++-
 environment-cuda128.yml | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/environment-cuda118.yml b/environment-cuda118.yml
index 7d66261..8d3c832 100644
--- a/environment-cuda118.yml
+++ b/environment-cuda118.yml
@@ -37,7 +37,8 @@ dependencies:
       - backports.functools-lru-cache  # Fix for PyInstaller ModuleNotFoundError
 
       # Build tools
-      - pyinstaller>=6.0.0
+      # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments
+      - pyinstaller==6.16.0
       - setuptools>=65.0.0
       - wheel>=0.38.0
       - build>=0.10.0
diff --git a/environment-cuda122.yml b/environment-cuda122.yml
index aeb91bf..6665cd3 100644
--- a/environment-cuda122.yml
+++ b/environment-cuda122.yml
@@ -34,7 +34,8 @@ dependencies:
       - backports.functools-lru-cache  # Fix for PyInstaller ModuleNotFoundError
 
       # Build tools
-      - pyinstaller>=6.0.0
+      # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments
+      - pyinstaller==6.16.0
       - setuptools>=65.0.0
       - wheel>=0.38.0
       - build>=0.10.0
diff --git a/environment-cuda128.yml b/environment-cuda128.yml
index b9ff41a..655dd6a 100644
--- a/environment-cuda128.yml
+++ b/environment-cuda128.yml
@@ -34,7 +34,8 @@ dependencies:
       - backports.functools-lru-cache  # Fix for PyInstaller ModuleNotFoundError
 
       # Build tools
-      - pyinstaller>=6.0.0
+      # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments
+      - pyinstaller==6.16.0
       - setuptools>=65.0.0
       - wheel>=0.38.0
       - build>=0.10.0

From 3e2fc63b1c438ca7957836e3a850ee4cc3334150 Mon Sep 17 00:00:00 2001
From: neo <randomless98@gmail.com>
Date: Tue, 30 Dec 2025 15:55:25 +0800
Subject: [PATCH 02/25] feat: Add Modal cloud GPU inference support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add modal_infer.py for cloud-based GPU inference using Modal platform:
- Interactive CLI for GPU selection and configuration
- Support for multiple GPU types (T4, A10G, A100, H100, etc.)
- Automatic model download and caching on Modal Volume
- Batch processing support for accelerated transcription
- Micromamba-based image with conda environment from environment-cuda128.yml

Add environment-modal.yml for lightweight local client setup:
- Minimal dependencies (modal, questionary)
- Python 3.10 environment for running modal_infer.py locally

Update 使用说明.txt with Modal usage instructions:
- Environment setup guide
- Modal account registration and token configuration
- HuggingFace token setup for model downloads
- Step-by-step usage instructions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 environment-modal.yml                         |  21 ++
 modal_infer.py                                | 216 ++++++++++++++++++
 ...7\347\224\250\350\257\264\346\230\216.txt" |   8 +-
 3 files changed, 242 insertions(+), 3 deletions(-)
 create mode 100644 environment-modal.yml

diff --git a/environment-modal.yml b/environment-modal.yml
new file mode 100644
index 0000000..2bef4a6
--- /dev/null
+++ b/environment-modal.yml
@@ -0,0 +1,21 @@
+# Conda environment for Modal inference (local client only)
+# This environment is for running modal_infer.py locally to submit jobs to Modal
+name: faster-whisper-modal
+channels:
+  - conda-forge
+  - defaults
+
+dependencies:
+  # Python version
+  - python=3.10
+
+  # Core dependencies
+  - pip
+
+  # Pip dependencies
+  - pip:
+      # Modal client for submitting jobs
+      - modal
+
+      # Interactive CLI prompts
+      - questionary
diff --git a/modal_infer.py b/modal_infer.py
index d6abd78..9978c44 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -9,7 +9,11 @@
 from datetime import datetime
 from pathlib import Path
 import subprocess
+<<<<<<< HEAD
 from typing import Dict, List, Optional, Sequence, Tuple
+=======
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 from uuid import uuid4
 
 try:
@@ -27,11 +31,19 @@
 
 APP_NAME = "Faster-Whisper-TransWithAI-ChickenRice"
 REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice"
+<<<<<<< HEAD
 VOLUME_NAME = "Faster_Whisper"
 VOLUME_ROOT = Path("/Faster_Whisper")
 REMOTE_MOUNT = VOLUME_ROOT
 APP_ROOT_REL = Path(APP_NAME)
 SESSION_SUBDIR = Path("sessions")
+=======
+VOLUME_NAME = "agent_volume"
+VOLUME_ROOT = Path("/agent_volume")
+REMOTE_MOUNT = VOLUME_ROOT
+APP_ROOT_REL = Path(APP_NAME)
+SESSION_SUBDIR = APP_ROOT_REL / "sessions"
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 REPO_VOLUME_DIR = VOLUME_ROOT / "repo"
 SUB_FORMATS = "srt,vtt,lrc"
 SUB_SUFFIXES = {".srt", ".vtt", ".lrc"}
@@ -43,6 +55,10 @@
     ".aac",
     ".ogg",
     ".wma",
+<<<<<<< HEAD
+=======
+    ".mp4",
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     ".mkv",
     ".avi",
     ".mov",
@@ -50,10 +66,14 @@
     ".flv",
     ".wmv",
 }
+<<<<<<< HEAD
 VIDEO_NEED_CONVERT = {".mp4"}  # 需要用户手动转换的格式
 DEFAULT_GPU_CHOICES = [
     "T4",
     "L4",
+=======
+DEFAULT_GPU_CHOICES = [
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     "L40S",
     "A10G",
     "A100-40GB",
@@ -61,6 +81,11 @@
     "H100",
     "H200",
     "B200",
+<<<<<<< HEAD
+=======
+    "L4",
+    "T4",
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 ]
 
 
@@ -96,6 +121,7 @@ class UploadManifest:
     remote_output_rel: Path
     local_output_dir: Path
     remote_logs_rel: Path
+<<<<<<< HEAD
     original_filename: Optional[str] = None  # 原始文件名（用于恢复空格）
 
 
@@ -107,6 +133,14 @@ class ScanResult:
 
 class NoAudioFilesError(Exception):
     pass
+=======
+
+
+@dataclass
+class RemoteResult:
+    created_files: Dict[str, List[str]]
+    log_file: Optional[str]
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
 
 def rel_to_volume_path(path: Path) -> str:
@@ -190,6 +224,19 @@ def ensure_questionary():
 def ask_selection() -> UserSelection:
     ensure_questionary()
 
+<<<<<<< HEAD
+=======
+    run_mode = questionary.select(
+        "选择运行模式：",
+        choices=[
+            Choice(title="一次性运行（modal run）", value="once"),
+            Choice(title="持久化 App（modal deploy）", value="persistent"),
+        ],
+    ).ask()
+    if not run_mode:
+        raise KeyboardInterrupt
+
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     gpu_choice = questionary.select(
         "选择 GPU",
         choices=DEFAULT_GPU_CHOICES,
@@ -250,7 +297,11 @@ def ask_selection() -> UserSelection:
     )
 
     return UserSelection(
+<<<<<<< HEAD
         run_mode="once",
+=======
+        run_mode=run_mode,
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
         gpu_choice=gpu_choice,
         input_path=input_path,
         model_profile=model_profile,
@@ -263,6 +314,7 @@ def ask_selection() -> UserSelection:
     )
 
 
+<<<<<<< HEAD
 def scan_audio_files(path: Path) -> ScanResult:
     """扫描目录，返回音频文件和需要转换的 mp4 文件"""
     audio_files: List[Path] = []
@@ -302,10 +354,28 @@ def validate_audio_path(path: Path) -> ScanResult:
         if not scan_result.audio_files:
             raise NoAudioFilesError(f"输入的文件夹内没有音频文件：{path}")
         return scan_result
+=======
+def iter_audio_files(path: Path) -> List[Path]:
+    files: List[Path] = []
+    for file in path.rglob("*"):
+        if file.is_file() and file.suffix.lower() in AUDIO_SUFFIXES:
+            files.append(file)
+    return files
+
+
+def validate_audio_path(path: Path) -> None:
+    if path.is_file():
+        if path.suffix.lower() not in AUDIO_SUFFIXES:
+            raise ValueError(f"文件 {path} 不属于支持的音/视频格式。")
+    elif path.is_dir():
+        if not iter_audio_files(path):
+            raise ValueError(f"文件夹 {path} 中没有支持的音/视频文件。")
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     else:
         raise ValueError(f"路径 {path} 既不是文件也不是文件夹。")
 
 
+<<<<<<< HEAD
 def upload_single_file(
     volume: modal.Volume,
     selection: UserSelection,
@@ -345,6 +415,47 @@ def upload_single_file(
         local_output_dir=local_output_dir,
         remote_logs_rel=remote_logs_rel,
         original_filename=original_filename,  # 始终记录原始文件名
+=======
+def prepare_upload(
+    volume: modal.Volume,
+    selection: UserSelection,
+) -> UploadManifest:
+    validate_audio_path(selection.input_path)
+    session_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid4().hex[:6]}"
+    remote_session_rel = SESSION_SUBDIR / session_id
+    remote_logs_rel = remote_session_rel / "logs"
+    remote_inputs_rel: List[Path] = []
+
+    with volume.batch_upload(force=True) as batch:
+        if selection.input_path.is_file():
+            remote_rel = APP_ROOT_REL / selection.input_path.name
+            logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel))
+            batch.put_file(str(selection.input_path), rel_to_volume_path(remote_rel))
+            remote_inputs_rel.append(remote_rel)
+            remote_output_rel = remote_session_rel
+            local_output_dir = selection.input_path.parent
+            source_type = "file"
+        else:
+            remote_input_dir_rel = remote_session_rel / selection.input_path.name
+            audio_files = iter_audio_files(selection.input_path)
+            for file in audio_files:
+                rel = remote_input_dir_rel / file.relative_to(selection.input_path)
+                logging.info("上传文件 -> %s", rel_to_volume_path(rel))
+                batch.put_file(str(file), rel_to_volume_path(rel))
+            remote_inputs_rel.append(remote_input_dir_rel)
+            remote_output_rel = remote_session_rel / f"{selection.input_path.name}_out"
+            local_output_dir = selection.input_path / f"{selection.input_path.name}_out"
+            source_type = "directory"
+
+    return UploadManifest(
+        session_id=session_id,
+        source_type=source_type,
+        local_source=selection.input_path,
+        remote_inputs_rel=remote_inputs_rel,
+        remote_output_rel=remote_output_rel,
+        local_output_dir=local_output_dir,
+        remote_logs_rel=remote_logs_rel,
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     )
 
 
@@ -401,12 +512,18 @@ def run_remote_pipeline(
     selection: UserSelection,
     manifest: UploadManifest,
     payload: Dict,
+<<<<<<< HEAD
 ) -> Dict:
     logging.info("=== 开始构建 Modal 镜像 ===")
     image = build_modal_image()
     logging.info("✓ 镜像构建完成")
     logging.info("使用 GPU：%s", selection.gpu_choice)
     logging.info("超时时间：%d 分钟", selection.timeout_minutes)
+=======
+) -> RemoteResult:
+    image = build_modal_image()
+    logging.info("使用 GPU：%s", selection.gpu_choice)
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     app = modal.App(APP_NAME)
 
     @app.function(
@@ -419,6 +536,7 @@ def run_remote_pipeline(
     def modal_pipeline(job_payload: Dict) -> Dict:
         return _remote_pipeline(job_payload)
 
+<<<<<<< HEAD
     logging.info("=== 开始远程执行 ===")
     logging.info("正在启动 GPU 容器并执行推理任务...")
     logging.info("（以下为远程容器输出）")
@@ -496,10 +614,19 @@ def modal_pipeline(job_payload: Dict) -> Dict:
                 continue  # 继续处理下一个文件
 
     return success_count, fail_count
+=======
+    with app.run():
+        result = modal_pipeline.remote(payload)
+    created = {
+        remote_dir: files for remote_dir, files in result.get("created", {}).items()
+    }
+    return RemoteResult(created_files=created, log_file=result.get("log_file"))
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
 
 def download_outputs(
     manifest: UploadManifest,
+<<<<<<< HEAD
     result: Dict,
 ) -> None:
     """从远程结果中提取文件内容并写入本地"""
@@ -535,15 +662,52 @@ def download_outputs(
 
 
 def summarize(manifest: UploadManifest, result: Dict) -> None:
+=======
+    result: RemoteResult,
+) -> None:
+    def modal_volume_get(remote_path: str, local_dest: Path) -> None:
+        local_dest.parent.mkdir(parents=True, exist_ok=True)
+        logging.info("下载 %s -> %s", remote_path, local_dest)
+        subprocess.run(
+            ["modal", "volume", "get", VOLUME_NAME, remote_path, str(local_dest)],
+            check=True,
+        )
+
+    for remote_dir, files in result.created_files.items():
+        base_rel = Path(remote_dir.lstrip("/"))
+        for remote_file in files:
+            file_rel = Path(remote_file.lstrip("/"))
+            try:
+                rel_inside_output = file_rel.relative_to(base_rel)
+            except Exception:
+                rel_inside_output = file_rel.name
+            local_path = manifest.local_output_dir / rel_inside_output
+            modal_volume_get(remote_file, local_path)
+
+    if result.log_file:
+        local_log = Path("logs") / Path(Path(result.log_file).name)
+        modal_volume_get(result.log_file, local_log)
+
+
+def summarize(manifest: UploadManifest, result: RemoteResult) -> None:
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     logging.info("=== 运行完成 ===")
     logging.info("Session: %s", manifest.session_id)
     logging.info("源路径: %s", manifest.local_source)
     logging.info("输出路径: %s", manifest.local_output_dir if manifest.source_type == "directory" else manifest.local_source.parent)
+<<<<<<< HEAD
     created_files = result.get("created_files", {})
     if created_files:
         logging.info("新生成文件：")
         for filename in created_files.keys():
             logging.info("  %s", filename)
+=======
+    if result.created_files:
+        logging.info("新生成文件：")
+        for remote_dir, files in result.created_files.items():
+            for file in files:
+                logging.info("  %s", file)
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
 
 def parse_args() -> argparse.Namespace:
@@ -556,6 +720,7 @@ def parse_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
+<<<<<<< HEAD
 def prompt_exit(enabled: bool) -> None:
     if not enabled:
         return
@@ -610,27 +775,59 @@ def main() -> int:
     prompt_exit(not args.non_interactive)
     return exit_code
 
+=======
+def main() -> None:
+    parse_args()
+    log_path = setup_logger()
+    try:
+        selection = ask_selection()
+        volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True)
+        manifest = prepare_upload(volume, selection)
+        payload = build_job_payload(selection, manifest)
+        result = run_remote_pipeline(volume, selection, manifest, payload)
+        download_outputs(manifest, result)
+        summarize(manifest, result)
+        logging.info("✅ 请在上方输出路径查看字幕结果。")
+    except KeyboardInterrupt:
+        logging.warning("用户中断，未执行任何远程操作。")
+        sys.exit(1)
+    except Exception as exc:
+        logging.exception("运行失败：%s", exc)
+        logging.error("日志见：%s", log_path)
+        sys.exit(1)
+
+
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 def _remote_pipeline(job: Dict) -> Dict:
     import subprocess
     from pathlib import Path
     import os
 
+<<<<<<< HEAD
     # 强制重新加载 Volume，确保看到最新上传的文件
     from modal import Volume
     volume = Volume.from_name("Faster_Whisper")
     volume.reload()
 
+=======
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     def run(cmd: Sequence[str], cwd: Optional[str] = None, env: Optional[dict] = None) -> None:
         print(" ".join(cmd), flush=True)
         subprocess.run(cmd, check=True, cwd=cwd, env=env)
 
     mount_root = Path(job["mount_root"])
     repo_dir = REPO_VOLUME_DIR
+<<<<<<< HEAD
 
     # log 文件放在 session 目录下，而不是 logs 子目录
     session_dir = Path(job["remote_output_dir"])
     session_dir.mkdir(parents=True, exist_ok=True)
     log_file = session_dir / "modal_run.log"
+=======
+    logs_dir = Path(job["remote_logs_dir"])
+    logs_dir.mkdir(parents=True, exist_ok=True)
+    log_file = logs_dir / "modal_run.log"
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
     def log(msg: str) -> None:
         line = f"[modal_run] {msg}"
@@ -708,6 +905,7 @@ def snapshot(path: str) -> set:
 
     cmd.extend(job["remote_inputs"])
 
+<<<<<<< HEAD
     # 在执行推理前，等待文件同步完成
     import time
     log("等待文件同步...")
@@ -765,13 +963,21 @@ def snapshot(path: str) -> set:
 
         log(f"=== 调试信息结束 ===")
         raise
+=======
+    log(f"执行推理命令：{' '.join(cmd)}")
+    run(cmd, cwd=str(repo_dir))
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
     def to_volume_path(path_str: str) -> str:
         return container_to_volume_path(path_str)
 
+<<<<<<< HEAD
     # 收集生成的文件内容（直接返回，避免 volume 同步问题）
     import base64
     created_files = {}  # {filename: base64_content}
+=======
+    created = {}
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     for target in job["output_targets"]:
         remote_dir = target["remote_dir"]
         after = snapshot(remote_dir)
@@ -781,6 +987,7 @@ def to_volume_path(path_str: str) -> str:
             for file in after - prev
             if Path(file).suffix.lower() in SUB_SUFFIXES
         )
+<<<<<<< HEAD
         for file_path in new_files:
             file_path = Path(file_path)
             if file_path.exists():
@@ -803,3 +1010,12 @@ def to_volume_path(path_str: str) -> str:
 
 if __name__ == "__main__":  # pragma: no cover
     sys.exit(main())
+=======
+        created[to_volume_path(remote_dir)] = [to_volume_path(path) for path in new_files]
+
+    return {"created": created, "log_file": to_volume_path(str(log_file))}
+
+
+if __name__ == "__main__":  # pragma: no cover
+    main()
+>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
diff --git "a/\344\275\277\347\224\250\350\257\264\346\230\216.txt" "b/\344\275\277\347\224\250\350\257\264\346\230\216.txt"
index ae7c8cd..c39a64a 100644
--- "a/\344\275\277\347\224\250\350\257\264\346\230\216.txt"
+++ "b/\344\275\277\347\224\250\350\257\264\346\230\216.txt"
@@ -100,13 +100,14 @@ GPU模式（仅限NVIDIA显卡）:
 
 1. 环境配置：
 
-使用现有的 Conda 环境（已包含 modal 支持）：
+使用 Conda 创建轻量级环境（仅需 modal 和 questionary 库）：
 
 ```bash
-conda activate faster-whisper-cu118  # 或 cu122, cu128
+conda env create -f environment-modal.yml
+conda activate faster-whisper-modal
 ```
 
-或在现有环境中手动安装：
+或手动安装：
 ```bash
 pip install modal questionary
 ```
@@ -150,6 +151,7 @@ python modal_infer.py
 ```
 
 程序会交互式询问：
+- 运行模式：一次性运行 或 持久化App
 - GPU 类型：T4（推荐）、A10G、A100、H100 等
 - 模型选择：基础版、海南鸡（日文转中文优化）、自定义模型
 - 输入文件：本地音频文件或文件夹路径

From 84bfe9f4888a5158464b9b76bdf8496c79cb9dea Mon Sep 17 00:00:00 2001
From: neo <randomless98@gmail.com>
Date: Wed, 31 Dec 2025 15:18:37 +0800
Subject: [PATCH 03/25] fix: Improve modal_infer.py usability and fix bugs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix file upload path to use session directory consistently
- Simplify volume path structure (remove APP_ROOT_REL layer)
- Remove run mode selection, default to one-time execution
- Add detailed logging for build, execution and download stages
- Fix download conflict by adding --force flag
- Reorder GPU choices (T4/L4 first for cost efficiency)
- Update volume name to Faster_Whisper

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .gitignore     | 1 +
 modal_infer.py | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0d6661a..8ff3ddd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ __pycache__/
 # C extensions
 *.so
 
+node_modules/
 # Distribution / packaging
 .Python
 build/
diff --git a/modal_infer.py b/modal_infer.py
index 9978c44..f004c2a 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -428,7 +428,7 @@ def prepare_upload(
 
     with volume.batch_upload(force=True) as batch:
         if selection.input_path.is_file():
-            remote_rel = APP_ROOT_REL / selection.input_path.name
+            remote_rel = remote_session_rel / selection.input_path.name
             logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel))
             batch.put_file(str(selection.input_path), rel_to_volume_path(remote_rel))
             remote_inputs_rel.append(remote_rel)
@@ -521,7 +521,9 @@ def run_remote_pipeline(
     logging.info("超时时间：%d 分钟", selection.timeout_minutes)
 =======
 ) -> RemoteResult:
+    logging.info("=== 开始构建 Modal 镜像 ===")
     image = build_modal_image()
+    logging.info("✓ 镜像构建完成")
     logging.info("使用 GPU：%s", selection.gpu_choice)
 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     app = modal.App(APP_NAME)
@@ -617,6 +619,8 @@ def modal_pipeline(job_payload: Dict) -> Dict:
 =======
     with app.run():
         result = modal_pipeline.remote(payload)
+    logging.info("-" * 60)
+    logging.info("✓ 远程执行完成")
     created = {
         remote_dir: files for remote_dir, files in result.get("created", {}).items()
     }
@@ -669,7 +673,7 @@ def modal_volume_get(remote_path: str, local_dest: Path) -> None:
         local_dest.parent.mkdir(parents=True, exist_ok=True)
         logging.info("下载 %s -> %s", remote_path, local_dest)
         subprocess.run(
-            ["modal", "volume", "get", VOLUME_NAME, remote_path, str(local_dest)],
+            ["modal", "volume", "get", VOLUME_NAME, remote_path, str(local_dest), "--force"],
             check=True,
         )
 

From 5f1b6396e260ec798a58ab16992121118f9c5b62 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Sat, 3 Jan 2026 14:57:27 +0800
Subject: [PATCH 04/25] =?UTF-8?q?refactor:=20=E4=BC=98=E5=8C=96=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E5=A4=B9=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91=EF=BC=8C?=
 =?UTF-8?q?=E9=80=90=E6=96=87=E4=BB=B6=E4=B8=8A=E4=BC=A0=E9=81=BF=E5=85=8D?=
 =?UTF-8?q?=E8=BF=9E=E6=8E=A5=E6=96=AD=E5=BC=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 文件夹模式下改为逐个上传和处理音频文件，容器复用
- 使用 min_containers=1 保持容器预热
- 排除 mp4 格式，发现时提示用户使用 ffmpeg 转换
- 文件夹模式下结果直接保存到源文件夹，不再创建 _out 子目录
- 单个文件处理失败时继续处理其他文件

注意：仅通过语法检查，未经手动测试验证

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 modal_infer.py | 209 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 172 insertions(+), 37 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index f004c2a..01ed409 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -10,10 +10,14 @@
 from pathlib import Path
 import subprocess
 <<<<<<< HEAD
+<<<<<<< HEAD
 from typing import Dict, List, Optional, Sequence, Tuple
 =======
 from typing import Dict, Iterable, List, Optional, Sequence, Tuple
 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
+=======
+from typing import Dict, List, Optional, Sequence, Tuple
+>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
 from uuid import uuid4
 
 try:
@@ -56,9 +60,12 @@
     ".ogg",
     ".wma",
 <<<<<<< HEAD
+<<<<<<< HEAD
 =======
     ".mp4",
 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
+=======
+>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
     ".mkv",
     ".avi",
     ".mov",
@@ -67,6 +74,9 @@
     ".wmv",
 }
 <<<<<<< HEAD
+<<<<<<< HEAD
+=======
+>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
 VIDEO_NEED_CONVERT = {".mp4"}  # 需要用户手动转换的格式
 DEFAULT_GPU_CHOICES = [
     "T4",
@@ -136,6 +146,12 @@ class NoAudioFilesError(Exception):
 =======
 
 
+@dataclass
+class ScanResult:
+    audio_files: List[Path]
+    mp4_files: List[Path]
+
+
 @dataclass
 class RemoteResult:
     created_files: Dict[str, List[str]]
@@ -315,10 +331,14 @@ def ask_selection() -> UserSelection:
 
 
 <<<<<<< HEAD
+<<<<<<< HEAD
+=======
+>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
 def scan_audio_files(path: Path) -> ScanResult:
     """扫描目录，返回音频文件和需要转换的 mp4 文件"""
     audio_files: List[Path] = []
     mp4_files: List[Path] = []
+<<<<<<< HEAD
     for file in path.rglob("*"):
         if file.is_file():
             suffix = file.suffix.lower()
@@ -357,24 +377,52 @@ def validate_audio_path(path: Path) -> ScanResult:
 =======
 def iter_audio_files(path: Path) -> List[Path]:
     files: List[Path] = []
+=======
+>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
     for file in path.rglob("*"):
-        if file.is_file() and file.suffix.lower() in AUDIO_SUFFIXES:
-            files.append(file)
-    return files
+        if file.is_file():
+            suffix = file.suffix.lower()
+            if suffix in AUDIO_SUFFIXES:
+                audio_files.append(file)
+            elif suffix in VIDEO_NEED_CONVERT:
+                mp4_files.append(file)
+    return ScanResult(audio_files=audio_files, mp4_files=mp4_files)
 
 
-def validate_audio_path(path: Path) -> None:
+def validate_audio_path(path: Path) -> ScanResult:
+    """验证音频路径，返回扫描结果。如果发现 mp4 文件会打印警告。"""
     if path.is_file():
-        if path.suffix.lower() not in AUDIO_SUFFIXES:
+        suffix = path.suffix.lower()
+        if suffix in VIDEO_NEED_CONVERT:
+            raise ValueError(
+                f"文件 {path} 是 mp4 格式，请先使用 ffmpeg 转换为 mp3：\n"
+                f"  ffmpeg -i \"{path}\" -vn -acodec libmp3lame \"{path.with_suffix('.mp3')}\""
+            )
+        if suffix not in AUDIO_SUFFIXES:
             raise ValueError(f"文件 {path} 不属于支持的音/视频格式。")
+        return ScanResult(audio_files=[path], mp4_files=[])
     elif path.is_dir():
-        if not iter_audio_files(path):
+        scan_result = scan_audio_files(path)
+        if scan_result.mp4_files:
+            logging.warning("=" * 60)
+            logging.warning("发现 %d 个 mp4 文件，这些文件将被跳过：", len(scan_result.mp4_files))
+            for mp4_file in scan_result.mp4_files:
+                logging.warning("  - %s", mp4_file)
+            logging.warning("请使用 ffmpeg 转换为 mp3 后再处理，例如：")
+            logging.warning("  ffmpeg -i \"input.mp4\" -vn -acodec libmp3lame \"output.mp3\"")
+            logging.warning("=" * 60)
+        if not scan_result.audio_files:
             raise ValueError(f"文件夹 {path} 中没有支持的音/视频文件。")
+<<<<<<< HEAD
 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
+=======
+        return scan_result
+>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
     else:
         raise ValueError(f"路径 {path} 既不是文件也不是文件夹。")
 
 
+<<<<<<< HEAD
 <<<<<<< HEAD
 def upload_single_file(
     volume: modal.Volume,
@@ -417,42 +465,40 @@ def upload_single_file(
         original_filename=original_filename,  # 始终记录原始文件名
 =======
 def prepare_upload(
+=======
+def upload_single_file(
+>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
     volume: modal.Volume,
     selection: UserSelection,
+    audio_file: Path,
+    base_dir: Optional[Path] = None,
 ) -> UploadManifest:
-    validate_audio_path(selection.input_path)
+    """上传单个音频文件到 Modal Volume。
+
+    Args:
+        volume: Modal Volume 实例
+        selection: 用户选择配置
+        audio_file: 要上传的音频文件路径
+        base_dir: 基础目录（用于文件夹模式，输出到此目录）
+    """
     session_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid4().hex[:6]}"
     remote_session_rel = SESSION_SUBDIR / session_id
     remote_logs_rel = remote_session_rel / "logs"
-    remote_inputs_rel: List[Path] = []
 
     with volume.batch_upload(force=True) as batch:
-        if selection.input_path.is_file():
-            remote_rel = remote_session_rel / selection.input_path.name
-            logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel))
-            batch.put_file(str(selection.input_path), rel_to_volume_path(remote_rel))
-            remote_inputs_rel.append(remote_rel)
-            remote_output_rel = remote_session_rel
-            local_output_dir = selection.input_path.parent
-            source_type = "file"
-        else:
-            remote_input_dir_rel = remote_session_rel / selection.input_path.name
-            audio_files = iter_audio_files(selection.input_path)
-            for file in audio_files:
-                rel = remote_input_dir_rel / file.relative_to(selection.input_path)
-                logging.info("上传文件 -> %s", rel_to_volume_path(rel))
-                batch.put_file(str(file), rel_to_volume_path(rel))
-            remote_inputs_rel.append(remote_input_dir_rel)
-            remote_output_rel = remote_session_rel / f"{selection.input_path.name}_out"
-            local_output_dir = selection.input_path / f"{selection.input_path.name}_out"
-            source_type = "directory"
+        remote_rel = remote_session_rel / audio_file.name
+        logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel))
+        batch.put_file(str(audio_file), rel_to_volume_path(remote_rel))
+
+    # 如果指定了 base_dir（文件夹模式），输出到 base_dir；否则输出到文件所在目录
+    local_output_dir = base_dir if base_dir else audio_file.parent
 
     return UploadManifest(
         session_id=session_id,
-        source_type=source_type,
-        local_source=selection.input_path,
-        remote_inputs_rel=remote_inputs_rel,
-        remote_output_rel=remote_output_rel,
+        source_type="file",
+        local_source=audio_file,
+        remote_inputs_rel=[remote_rel],
+        remote_output_rel=remote_session_rel,
         local_output_dir=local_output_dir,
         remote_logs_rel=remote_logs_rel,
 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
@@ -628,6 +674,78 @@ def modal_pipeline(job_payload: Dict) -> Dict:
 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
 
+def process_directory_files(
+    volume: modal.Volume,
+    selection: UserSelection,
+    audio_files: List[Path],
+) -> Tuple[int, int]:
+    """处理文件夹中的所有音频文件，容器复用。
+
+    Args:
+        volume: Modal Volume 实例
+        selection: 用户选择配置
+        audio_files: 要处理的音频文件列表
+
+    Returns:
+        (成功数, 失败数) 元组
+    """
+    logging.info("=== 开始构建 Modal 镜像 ===")
+    image = build_modal_image()
+    logging.info("✓ 镜像构建完成")
+    logging.info("使用 GPU：%s", selection.gpu_choice)
+    logging.info("超时时间：%d 分钟", selection.timeout_minutes)
+    logging.info("待处理文件数：%d", len(audio_files))
+
+    app = modal.App(APP_NAME)
+
+    @app.function(
+        image=image,
+        gpu=selection.gpu_choice,
+        timeout=selection.timeout_minutes * 60,
+        volumes={str(REMOTE_MOUNT): volume},
+        serialized=True,
+        min_containers=1,  # 保持容器预热，复用容器
+    )
+    def modal_pipeline(job_payload: Dict) -> Dict:
+        return _remote_pipeline(job_payload)
+
+    success_count = 0
+    fail_count = 0
+    base_dir = selection.input_path  # 文件夹模式下，输出到源文件夹
+
+    with app.run():
+        for i, audio_file in enumerate(audio_files, 1):
+            logging.info("=" * 60)
+            logging.info("处理文件 [%d/%d]: %s", i, len(audio_files), audio_file.name)
+            logging.info("=" * 60)
+            try:
+                # 1. 上传单个文件
+                manifest = upload_single_file(volume, selection, audio_file, base_dir)
+
+                # 2. 构建 payload
+                payload = build_job_payload(selection, manifest)
+
+                # 3. 执行推理（复用容器）
+                logging.info("正在执行推理...")
+                result = modal_pipeline.remote(payload)
+
+                # 4. 下载结果到源文件夹
+                remote_result = RemoteResult(
+                    created_files=result.get("created", {}),
+                    log_file=result.get("log_file"),
+                )
+                download_outputs(manifest, remote_result)
+
+                logging.info("✓ 文件 %s 处理完成", audio_file.name)
+                success_count += 1
+            except Exception as e:
+                logging.error("✗ 文件 %s 处理失败: %s", audio_file.name, e)
+                fail_count += 1
+                continue  # 继续处理下一个文件
+
+    return success_count, fail_count
+
+
 def download_outputs(
     manifest: UploadManifest,
 <<<<<<< HEAD
@@ -786,12 +904,29 @@ def main() -> None:
     try:
         selection = ask_selection()
         volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True)
-        manifest = prepare_upload(volume, selection)
-        payload = build_job_payload(selection, manifest)
-        result = run_remote_pipeline(volume, selection, manifest, payload)
-        download_outputs(manifest, result)
-        summarize(manifest, result)
-        logging.info("✅ 请在上方输出路径查看字幕结果。")
+
+        # 验证路径并获取扫描结果
+        scan_result = validate_audio_path(selection.input_path)
+
+        if selection.input_path.is_dir():
+            # 文件夹模式：逐个处理文件，容器复用
+            logging.info("检测到文件夹输入，将逐个处理 %d 个音频文件", len(scan_result.audio_files))
+            success_count, fail_count = process_directory_files(
+                volume, selection, scan_result.audio_files
+            )
+            logging.info("=" * 60)
+            logging.info("=== 批量处理完成 ===")
+            logging.info("成功: %d, 失败: %d", success_count, fail_count)
+            logging.info("输出路径: %s", selection.input_path)
+            logging.info("✅ 请在上方输出路径查看字幕结果。")
+        else:
+            # 单文件模式：保持原有逻辑
+            manifest = upload_single_file(volume, selection, selection.input_path)
+            payload = build_job_payload(selection, manifest)
+            result = run_remote_pipeline(volume, selection, manifest, payload)
+            download_outputs(manifest, result)
+            summarize(manifest, result)
+            logging.info("✅ 请在上方输出路径查看字幕结果。")
     except KeyboardInterrupt:
         logging.warning("用户中断，未执行任何远程操作。")
         sys.exit(1)

From 04fc0ddcf98cd9d40be910ff1184a568932fd655 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Sat, 3 Jan 2026 15:11:09 +0800
Subject: [PATCH 05/25] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E5=90=8D=E5=8C=85=E5=90=AB=E7=A9=BA=E6=A0=BC=E5=AF=BC?=
 =?UTF-8?q?=E8=87=B4=E8=BF=9C=E7=A8=8B=E6=89=A7=E8=A1=8C=E5=A4=B1=E8=B4=A5?=
 =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 上传时将文件名中的空格替换为下划线
- 在 UploadManifest 中记录原始文件名
- 下载字幕后恢复原始文件名（带空格）

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 modal_infer.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/modal_infer.py b/modal_infer.py
index 01ed409..ea007cf 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -802,10 +802,20 @@ def modal_volume_get(remote_path: str, local_dest: Path) -> None:
             try:
                 rel_inside_output = file_rel.relative_to(base_rel)
             except Exception:
-                rel_inside_output = file_rel.name
+                rel_inside_output = Path(file_rel.name)
             local_path = manifest.local_output_dir / rel_inside_output
             modal_volume_get(remote_file, local_path)
 
+            # 如果有原始文件名（包含空格），恢复原始文件名
+            if manifest.original_filename:
+                original_stem = Path(manifest.original_filename).stem
+                safe_stem = original_stem.replace(" ", "_")
+                if local_path.stem == safe_stem:
+                    new_name = original_stem + local_path.suffix
+                    new_path = local_path.parent / new_name
+                    logging.info("恢复原始文件名: %s -> %s", local_path.name, new_name)
+                    local_path.rename(new_path)
+
     if result.log_file:
         local_log = Path("logs") / Path(Path(result.log_file).name)
         modal_volume_get(result.log_file, local_log)

From d863ce14202a02b5c4466349698adfe51118572b Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Sat, 3 Jan 2026 15:41:42 +0800
Subject: [PATCH 06/25] =?UTF-8?q?fix:=20=E4=BD=BF=E7=94=A8=E5=9B=BA?=
 =?UTF-8?q?=E5=AE=9A=E6=96=87=E4=BB=B6=E5=90=8D=20todo=20=E9=81=BF?=
 =?UTF-8?q?=E5=85=8D=E5=85=A8=E8=A7=92=E5=AD=97=E7=AC=A6=E5=AF=BC=E8=87=B4?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=89=BE=E4=B8=8D=E5=88=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 上传时将文件名改为 todo + 扩展名，避免全角字符问题
- 下载后恢复原始文件名

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 modal_infer.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index ea007cf..08474e3 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -806,11 +806,10 @@ def modal_volume_get(remote_path: str, local_dest: Path) -> None:
             local_path = manifest.local_output_dir / rel_inside_output
             modal_volume_get(remote_file, local_path)
 
-            # 如果有原始文件名（包含空格），恢复原始文件名
+            # 如果有原始文件名，恢复原始文件名
             if manifest.original_filename:
                 original_stem = Path(manifest.original_filename).stem
-                safe_stem = original_stem.replace(" ", "_")
-                if local_path.stem == safe_stem:
+                if local_path.stem == "todo":  # 固定文件名
                     new_name = original_stem + local_path.suffix
                     new_path = local_path.parent / new_name
                     logging.info("恢复原始文件名: %s -> %s", local_path.name, new_name)

From 4a56d1e7762d152c5d73fbb375042c79963b8d1b Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Sat, 3 Jan 2026 16:09:02 +0800
Subject: [PATCH 07/25] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=20Volume=20?=
 =?UTF-8?q?=E5=90=8C=E6=AD=A5=E7=AD=89=E5=BE=85=EF=BC=8C=E8=A7=A3=E5=86=B3?=
 =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=8A=E4=BC=A0=E5=90=8E=E5=AE=B9=E5=99=A8?=
 =?UTF-8?q?=E7=9C=8B=E4=B8=8D=E5=88=B0=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 上传后调用 volume.commit() 强制同步
- 远程执行前等待文件出现（最多 30 秒）

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 modal_infer.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modal_infer.py b/modal_infer.py
index 08474e3..24c5eeb 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -451,6 +451,11 @@ def upload_single_file(
         logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel))
         batch.put_file(str(audio_file), rel_to_volume_path(remote_rel))
 
+    # 强制同步 Volume，确保文件对容器可见
+    logging.info("等待 Volume 同步...")
+    volume.commit()
+    logging.info("Volume 同步完成")
+
     # 如果指定了 base_dir（文件夹模式），输出到 base_dir；否则输出到文件所在目录
     local_output_dir = base_dir if base_dir else audio_file.parent
 

From 00da127bcd76529dc6aa8e919ba7d7781f8b0440 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Sat, 3 Jan 2026 16:14:40 +0800
Subject: [PATCH 08/25] =?UTF-8?q?fix:=20=E7=A7=BB=E9=99=A4=E6=9C=AC?=
 =?UTF-8?q?=E5=9C=B0=20volume.commit()=20=E8=B0=83=E7=94=A8=EF=BC=88?=
 =?UTF-8?q?=E5=8F=AA=E8=83=BD=E5=9C=A8=E5=AE=B9=E5=99=A8=E5=86=85=E8=B0=83?=
 =?UTF-8?q?=E7=94=A8=EF=BC=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 modal_infer.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index 24c5eeb..08474e3 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -451,11 +451,6 @@ def upload_single_file(
         logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel))
         batch.put_file(str(audio_file), rel_to_volume_path(remote_rel))
 
-    # 强制同步 Volume，确保文件对容器可见
-    logging.info("等待 Volume 同步...")
-    volume.commit()
-    logging.info("Volume 同步完成")
-
     # 如果指定了 base_dir（文件夹模式），输出到 base_dir；否则输出到文件所在目录
     local_output_dir = base_dir if base_dir else audio_file.parent
 

From bdaedcaf0b604d2b5f0308c7eb6097f1cb2be641 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Sat, 3 Jan 2026 17:15:56 +0800
Subject: [PATCH 09/25] =?UTF-8?q?refactor:=20=E9=80=9A=E8=BF=87=E5=87=BD?=
 =?UTF-8?q?=E6=95=B0=E8=BF=94=E5=9B=9E=E5=80=BC=E4=BC=A0=E8=BE=93=E7=BB=93?=
 =?UTF-8?q?=E6=9E=9C=E6=96=87=E4=BB=B6=EF=BC=8C=E9=81=BF=E5=85=8D=20volume?=
 =?UTF-8?q?=20=E5=90=8C=E6=AD=A5=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 远程函数直接返回文件内容（base64 编码）
- 本地直接写入文件，使用原始文件名
- 移除 modal volume get 下载逻辑
- 删除不再使用的 RemoteResult 类

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 modal_infer.py | 67 +++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index 08474e3..3b1477c 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -729,12 +729,8 @@ def modal_pipeline(job_payload: Dict) -> Dict:
                 logging.info("正在执行推理...")
                 result = modal_pipeline.remote(payload)
 
-                # 4. 下载结果到源文件夹
-                remote_result = RemoteResult(
-                    created_files=result.get("created", {}),
-                    log_file=result.get("log_file"),
-                )
-                download_outputs(manifest, remote_result)
+                # 4. 写入结果文件到本地
+                download_outputs(manifest, result)
 
                 logging.info("✓ 文件 %s 处理完成", audio_file.name)
                 success_count += 1
@@ -787,37 +783,36 @@ def summarize(manifest: UploadManifest, result: Dict) -> None:
 =======
     result: RemoteResult,
 ) -> None:
-    def modal_volume_get(remote_path: str, local_dest: Path) -> None:
-        local_dest.parent.mkdir(parents=True, exist_ok=True)
-        logging.info("下载 %s -> %s", remote_path, local_dest)
-        subprocess.run(
-            ["modal", "volume", "get", VOLUME_NAME, remote_path, str(local_dest), "--force"],
-            check=True,
-        )
+    """从远程结果中提取文件内容并写入本地"""
+    import base64
 
-    for remote_dir, files in result.created_files.items():
-        base_rel = Path(remote_dir.lstrip("/"))
-        for remote_file in files:
-            file_rel = Path(remote_file.lstrip("/"))
-            try:
-                rel_inside_output = file_rel.relative_to(base_rel)
-            except Exception:
-                rel_inside_output = Path(file_rel.name)
-            local_path = manifest.local_output_dir / rel_inside_output
-            modal_volume_get(remote_file, local_path)
-
-            # 如果有原始文件名，恢复原始文件名
-            if manifest.original_filename:
-                original_stem = Path(manifest.original_filename).stem
-                if local_path.stem == "todo":  # 固定文件名
-                    new_name = original_stem + local_path.suffix
-                    new_path = local_path.parent / new_name
-                    logging.info("恢复原始文件名: %s -> %s", local_path.name, new_name)
-                    local_path.rename(new_path)
-
-    if result.log_file:
-        local_log = Path("logs") / Path(Path(result.log_file).name)
-        modal_volume_get(result.log_file, local_log)
+    created_files = result.get("created_files", {})
+    log_content = result.get("log_content")
+
+    # 获取原始文件名的 stem（不含扩展名）
+    original_stem = Path(manifest.original_filename).stem if manifest.original_filename else "todo"
+
+    for filename, content_b64 in created_files.items():
+        content = base64.b64decode(content_b64)
+        # 将 todo.xxx 替换为原始文件名
+        if filename.startswith("todo."):
+            suffix = Path(filename).suffix
+            new_filename = original_stem + suffix
+        else:
+            new_filename = filename
+
+        local_path = manifest.local_output_dir / new_filename
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+        local_path.write_bytes(content)
+        logging.info("写入文件: %s (%d bytes)", local_path, len(content))
+
+    # 写入 log 文件
+    if log_content:
+        log_dir = Path("logs")
+        log_dir.mkdir(exist_ok=True)
+        log_path = log_dir / f"modal_run_{manifest.session_id}.log"
+        log_path.write_bytes(base64.b64decode(log_content))
+        logging.info("写入日志: %s", log_path)
 
 
 def summarize(manifest: UploadManifest, result: RemoteResult) -> None:

From 56d1764a57727823fd2cdf87360f3fbb9f2e29ed Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Sun, 11 Jan 2026 19:12:30 +0800
Subject: [PATCH 10/25] fix: add modal_infer to build and CI

---
 .github/workflows/build-release-conda.yml              |  5 +++++
 modal_infer.py                                         | 10 +++++++---
 "\344\275\277\347\224\250\350\257\264\346\230\216.txt" |  1 -
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build-release-conda.yml b/.github/workflows/build-release-conda.yml
index 195be49..e7fa697 100644
--- a/.github/workflows/build-release-conda.yml
+++ b/.github/workflows/build-release-conda.yml
@@ -212,6 +212,11 @@ jobs:
         python -c "import ctranslate2; print(f'CTranslate2 version: {ctranslate2.__version__}')"
         echo "Note: CUDA availability check skipped (no GPU on GitHub runners)"
 
+    - name: Install Modal dependencies
+      run: |
+        pip install modal questionary
+        python -c "import modal; print(f'Modal version: {modal.__version__}')"
+
     - name: Check cached models
       run: |
         echo "Checking for cached models..."
diff --git a/modal_infer.py b/modal_infer.py
index 3b1477c..3668de0 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -905,6 +905,7 @@ def main() -> int:
 def main() -> None:
     parse_args()
     log_path = setup_logger()
+    exit_code = 0
     try:
         selection = ask_selection()
         volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True)
@@ -933,11 +934,14 @@ def main() -> None:
             logging.info("✅ 请在上方输出路径查看字幕结果。")
     except KeyboardInterrupt:
         logging.warning("用户中断，未执行任何远程操作。")
-        sys.exit(1)
+        exit_code = 1
     except Exception as exc:
-        logging.exception("运行失败：%s", exc)
+        if isinstance(exc, NoAudioFilesError):
+            logging.error("%s", exc)
+        else:
+            logging.exception("运行失败：%s", exc)
         logging.error("日志见：%s", log_path)
-        sys.exit(1)
+        exit_code = 1
 
 
 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
diff --git "a/\344\275\277\347\224\250\350\257\264\346\230\216.txt" "b/\344\275\277\347\224\250\350\257\264\346\230\216.txt"
index c39a64a..a55207f 100644
--- "a/\344\275\277\347\224\250\350\257\264\346\230\216.txt"
+++ "b/\344\275\277\347\224\250\350\257\264\346\230\216.txt"
@@ -151,7 +151,6 @@ python modal_infer.py
 ```
 
 程序会交互式询问：
-- 运行模式：一次性运行 或 持久化App
 - GPU 类型：T4（推荐）、A10G、A100、H100 等
 - 模型选择：基础版、海南鸡（日文转中文优化）、自定义模型
 - 输入文件：本地音频文件或文件夹路径

From 72b5e810c67e0dc1e19fe9b872ef556c7a495b01 Mon Sep 17 00:00:00 2001
From: neo <randomless98@gmail.com>
Date: Tue, 13 Jan 2026 16:23:45 +0800
Subject: [PATCH 11/25] =?UTF-8?q?=E6=8B=86=E5=88=86infer=E4=B8=8Emodal?=
 =?UTF-8?q?=E6=89=93=E5=8C=85=E5=B9=B6=E9=9A=94=E7=A6=BB=E4=BE=9D=E8=B5=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 project.spec | 114 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 101 insertions(+), 13 deletions(-)

diff --git a/project.spec b/project.spec
index efcb59a..24e99c0 100644
--- a/project.spec
+++ b/project.spec
@@ -345,12 +345,69 @@ datas += [
     ('locales', 'locales'),  # Include the locales directory with translations
 ]
 
-a = Analysis(
-    ['infer.py', 'modal_infer.py'],
+# Base collections (for infer.exe only)
+infer_datas = list(datas)
+infer_binaries = list(binaries)
+infer_hiddenimports = list(hiddenimports)
+
+# Extend collections for modal_infer.exe only
+modal_datas = list(datas)
+modal_binaries = list(binaries)
+modal_hiddenimports = list(hiddenimports)
+
+# Collect modal / questionary and their tricky deps explicitly (modal_infer only)
+try:
+    m_datas, m_binaries, m_hiddenimports = collect_all('modal')
+    modal_datas += m_datas
+    modal_binaries += m_binaries
+    modal_hiddenimports += m_hiddenimports
+    print("Collected modal successfully")
+except:
+    print("Warning: could not collect modal")
+
+try:
+    s_datas, s_binaries, s_hiddenimports = collect_all('synchronicity')
+    modal_datas += s_datas
+    modal_binaries += s_binaries
+    modal_hiddenimports += s_hiddenimports
+except:
+    print("Warning: could not collect synchronicity")
+
+try:
+    q_datas, q_binaries, q_hiddenimports = collect_all('questionary')
+    modal_datas += q_datas
+    modal_binaries += q_binaries
+    modal_hiddenimports += q_hiddenimports
+except:
+    print("Warning: could not collect questionary")
+
+modal_hiddenimports += [
+    'modal',
+    'modal.proto',
+    'synchronicity',
+    'grpclib',
+    'google.protobuf',
+    'google.protobuf.internal',
+    'toml',
+    'rich',
+    'typer',
+    'click',
+    'questionary',
+    'prompt_toolkit',
+    'prompt_toolkit.styles',
+    'prompt_toolkit.key_binding',
+    'prompt_toolkit.formatted_text',
+    'prompt_toolkit.shortcuts',
+    'prompt_toolkit.output',
+    'prompt_toolkit.input',
+]
+
+a_infer = Analysis(
+    ['infer.py'],
     pathex=[],
-    binaries=binaries,
-    datas=datas,
-    hiddenimports=hiddenimports,
+    binaries=infer_binaries,
+    datas=infer_datas,
+    hiddenimports=infer_hiddenimports,
     hookspath=[],  # PyInstaller hooks contrib should be auto-detected
     hooksconfig={},
     runtime_hooks=['runtime_hook.py'],  # Add runtime hook to set KMP_DUPLICATE_LIB_OK
@@ -372,11 +429,39 @@ a = Analysis(
     noarchive=False,
 )
 
-pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+a_modal = Analysis(
+    ['modal_infer.py'],
+    pathex=[],
+    binaries=modal_binaries,
+    datas=modal_datas,
+    hiddenimports=modal_hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=['runtime_hook.py'],
+    excludes=[
+        'matplotlib',
+        'tkinter',
+        'PyQt5',
+        'PyQt6',
+        'PySide2',
+        'PySide6',
+        'notebook',
+        'jupyter',
+        'IPython',
+        'pytest',
+    ],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+
+pyz_infer = PYZ(a_infer.pure, a_infer.zipped_data, cipher=block_cipher)
+pyz_modal = PYZ(a_modal.pure, a_modal.zipped_data, cipher=block_cipher)
 
 infer_exe = EXE(
-    pyz,
-    [a.scripts[0]],
+    pyz_infer,
+    [a_infer.scripts[0]],
     [],
     exclude_binaries=True,
     name='infer',
@@ -394,8 +479,8 @@ infer_exe = EXE(
 )
 
 modal_exe = EXE(
-    pyz,
-    [a.scripts[1]],
+    pyz_modal,
+    [a_modal.scripts[0]],
     [],
     exclude_binaries=True,
     name='modal_infer',
@@ -415,9 +500,12 @@ modal_exe = EXE(
 coll = COLLECT(
     infer_exe,
     modal_exe,
-    a.binaries,
-    a.zipfiles,
-    a.datas,
+    a_infer.binaries,
+    a_infer.zipfiles,
+    a_infer.datas,
+    a_modal.binaries,
+    a_modal.zipfiles,
+    a_modal.datas,
     strip=False,
     upx=False,
     upx_exclude=[],

From 1236802d3f31e2bdc3b274dfadeabba3f52c204c Mon Sep 17 00:00:00 2001
From: neo <randomless98@gmail.com>
Date: Tue, 13 Jan 2026 16:49:16 +0800
Subject: [PATCH 12/25] =?UTF-8?q?CI=E9=80=82=E9=85=8Dengine=E4=B8=8Eclient?=
 =?UTF-8?q?=E8=BE=93=E5=87=BA=E7=BB=93=E6=9E=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/build-release-conda.yml | 32 ++++++++++++++++-------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/build-release-conda.yml b/.github/workflows/build-release-conda.yml
index e7fa697..08f5a79 100644
--- a/.github/workflows/build-release-conda.yml
+++ b/.github/workflows/build-release-conda.yml
@@ -285,6 +285,19 @@ jobs:
 
         python build_windows.py
 
+    - name: Prepare distribution layout
+      run: |
+        echo "Preparing distribution layout..."
+        mkdir -p dist/faster_whisper_transwithai_chickenrice
+        if [ -d "dist/engine" ]; then
+          mv dist/engine dist/faster_whisper_transwithai_chickenrice/engine
+        fi
+        if [ -d "dist/client" ]; then
+          mv dist/client dist/faster_whisper_transwithai_chickenrice/client
+        fi
+        echo "Distribution root:"
+        find dist/faster_whisper_transwithai_chickenrice -maxdepth 2 -type d -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null || true
+
     - name: Copy models to distribution
       run: |
         echo "Copying models to distribution directory..."
@@ -294,19 +307,19 @@ jobs:
           find models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null
           echo ""
 
-          # Create models directory in dist
-          mkdir -p dist/faster_whisper_transwithai_chickenrice/models
+          # Create models directory in engine dist
+          mkdir -p dist/faster_whisper_transwithai_chickenrice/engine/models
 
           # Copy VAD model files (always included)
           echo "Copying VAD models..."
-          cp models/*.onnx dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
-          cp models/*.json dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
+          cp models/*.onnx dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
+          cp models/*.json dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
 
           # Copy whisper-base for feature extractor (always included for offline usage)
           echo "Copying whisper-base for feature extractor..."
           if [ -d "models/whisper-base" ]; then
             echo "  Found whisper-base directory, copying..."
-            cp -r models/whisper-base dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
+            cp -r models/whisper-base dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
             echo "  Whisper-base copied for offline feature extractor support"
           else
             echo "  WARNING: whisper-base directory not found"
@@ -322,9 +335,9 @@ jobs:
                 if [ "$model_name" != "whisper-base" ]; then
                   echo "  Copying model contents from: $model_name"
                   # Copy the contents of the model directory, not the directory itself
-                  cp -r "$dir"* dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
+                  cp -r "$dir"* dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
                   # Also copy hidden files if any exist
-                  cp -r "$dir".* dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
+                  cp -r "$dir".* dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
                 fi
               fi
             done
@@ -332,7 +345,7 @@ jobs:
 
           echo ""
           echo "Models in distribution:"
-          find dist/faster_whisper_transwithai_chickenrice/models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null
+          find dist/faster_whisper_transwithai_chickenrice/engine/models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null
           echo ""
           echo "Total distribution size:"
           du -sh dist/faster_whisper_transwithai_chickenrice/
@@ -380,8 +393,9 @@ jobs:
     - name: Test executable (CPU mode)
       shell: cmd /C CALL {0}
       run: |
-        cd dist\faster_whisper_transwithai_chickenrice
+        cd dist\faster_whisper_transwithai_chickenrice\engine
         infer.exe --help
+        cd ..\client
         modal_infer.exe --help
 
     - name: Upload artifact

From 1a00c3a4f886ac862cafeea7cc9f74da882926dc Mon Sep 17 00:00:00 2001
From: neo <randomless98@gmail.com>
Date: Tue, 13 Jan 2026 16:59:11 +0800
Subject: [PATCH 13/25] =?UTF-8?q?amend=20=E6=8A=8A.spec=20=E7=9A=84=20COLL?=
 =?UTF-8?q?ECT=20=E6=8B=86=E6=88=90=E4=B8=A4=E4=B8=AA=E8=BE=93=E5=87=BA?=
 =?UTF-8?q?=E7=9B=AE=E5=BD=95=EF=BC=88dist/engine=20=E5=92=8C=20dist/clien?=
 =?UTF-8?q?t=EF=BC=89=EF=BC=8C=E4=BB=A5=E5=BD=BB=E5=BA=95=E9=9A=94?=
 =?UTF-8?q?=E7=A6=BB=20DLL=20=E5=86=B2=E7=AA=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 project.spec | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/project.spec b/project.spec
index 24e99c0..9deb0c2 100644
--- a/project.spec
+++ b/project.spec
@@ -497,17 +497,24 @@ modal_exe = EXE(
     icon='transwithai.ico' if os.path.exists('transwithai.ico') else None,
 )
 
-coll = COLLECT(
+coll_infer = COLLECT(
     infer_exe,
-    modal_exe,
     a_infer.binaries,
     a_infer.zipfiles,
     a_infer.datas,
+    strip=False,
+    upx=False,
+    upx_exclude=[],
+    name='engine',
+)
+
+coll_modal = COLLECT(
+    modal_exe,
     a_modal.binaries,
     a_modal.zipfiles,
     a_modal.datas,
     strip=False,
     upx=False,
     upx_exclude=[],
-    name='faster_whisper_transwithai_chickenrice',
+    name='client',
 )

From c45c08c9f9c0ab230d3faa48ea6bb1406a0cce35 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 18:09:00 +0800
Subject: [PATCH 14/25] =?UTF-8?q?=E4=BF=AE=E6=94=B9Verify=20build=20succee?=
 =?UTF-8?q?ded=E6=A3=80=E6=9F=A5=E7=9B=AE=E5=BD=95=E9=9A=94=E7=A6=BB?=
 =?UTF-8?q?=E4=B8=8B=E7=9A=84=E4=B8=A4=E4=B8=AA=E5=AD=90=E7=9B=AE=E5=BD=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 build_windows.py | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/build_windows.py b/build_windows.py
index 83c3eb9..a58157e 100644
--- a/build_windows.py
+++ b/build_windows.py
@@ -209,8 +209,17 @@ def build():
 
     # Verify build succeeded and check for CUDA libraries
     if result.returncode == 0:
-        dist_dir = Path("dist/faster_whisper_transwithai_chickenrice")
+        dist_root = Path("dist")
+        dist_dir = dist_root / "faster_whisper_transwithai_chickenrice"
+        engine_dir = dist_root / "engine"
+        client_dir = dist_root / "client"
+
         if dist_dir.exists():
+            verify_dirs = [dist_dir]
+        else:
+            verify_dirs = [p for p in [engine_dir, client_dir] if p.exists()]
+
+        if verify_dirs:
             # Quick verification of critical libraries
             print("\nVerifying CUDA libraries in distribution...")
 
@@ -219,15 +228,20 @@ def build():
             missing_libs = []
 
             # Check in root directory and all subdirectories
-            all_dlls = list(dist_dir.glob("**/*.dll"))
+            all_dlls = []
+            for root_dir in verify_dirs:
+                all_dlls.extend(root_dir.glob("**/*.dll"))
 
             for critical in critical_libs:
                 found_in_locations = []
                 for dll_path in all_dlls:
                     if critical in dll_path.name.lower():
-                        # Get relative path from dist_dir
-                        rel_path = dll_path.relative_to(dist_dir)
-                        location = str(rel_path.parent) if str(rel_path.parent) != '.' else 'root'
+                        # Get relative path from dist root for consistent display
+                        try:
+                            rel_path = dll_path.relative_to(dist_root)
+                            location = str(rel_path.parent) if str(rel_path.parent) != '.' else 'root'
+                        except ValueError:
+                            location = str(dll_path.parent)
                         found_in_locations.append(location)
 
                 if found_in_locations:
@@ -247,9 +261,13 @@ def build():
                 print("     Note: The PyInstaller hooks should have included these.")
                 print("     If GPU acceleration doesn't work, check your conda environment.")
 
-            print(f"\nBuild complete! Output in: {dist_dir}")
+            if dist_dir.exists():
+                output_locations = [str(dist_dir)]
+            else:
+                output_locations = [str(p) for p in verify_dirs]
+            print(f"\nBuild complete! Output in: {', '.join(output_locations)}")
         else:
-            print("Error: dist/faster_whisper_transwithai_chickenrice directory not found after build")
+            print("Error: dist/engine or dist/client directory not found after build")
             return 1
     else:
         print("\nBuild failed!")

From aec0e5662261b4ba1193202fe22c1ada4e8f2484 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 18:26:12 +0800
Subject: [PATCH 15/25] change back to v1.5 .spec and workflow

---
 .github/workflows/build-release-conda.yml |  73 ++---------
 project.spec                              | 145 +++-------------------
 2 files changed, 25 insertions(+), 193 deletions(-)

diff --git a/.github/workflows/build-release-conda.yml b/.github/workflows/build-release-conda.yml
index 08f5a79..6c377eb 100644
--- a/.github/workflows/build-release-conda.yml
+++ b/.github/workflows/build-release-conda.yml
@@ -28,45 +28,12 @@ jobs:
     strategy:
       matrix:
         include:
-          # CUDA 11.8 versions
-          - cuda: "11.8"
-            env_file: "environment-cuda118.yml"
-            env_name: "faster-whisper-cu118"
-            artifact_suffix: "cu118"
-            model_variant: "base"
-            hf_model: ""
-          - cuda: "11.8"
-            env_file: "environment-cuda118.yml"
-            env_name: "faster-whisper-cu118"
-            artifact_suffix: "cu118-chickenrice"
-            model_variant: "chickenrice"
-            hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2"
-          # CUDA 12.2 versions
-          - cuda: "12.2"
-            env_file: "environment-cuda122.yml"
-            env_name: "faster-whisper-cu122"
-            artifact_suffix: "cu122"
-            model_variant: "base"
-            hf_model: ""
-          - cuda: "12.2"
-            env_file: "environment-cuda122.yml"
-            env_name: "faster-whisper-cu122"
-            artifact_suffix: "cu122-chickenrice"
-            model_variant: "chickenrice"
-            hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2"
-          # CUDA 12.8 versions
           - cuda: "12.8"
             env_file: "environment-cuda128.yml"
             env_name: "faster-whisper-cu128"
             artifact_suffix: "cu128"
             model_variant: "base"
             hf_model: ""
-          - cuda: "12.8"
-            env_file: "environment-cuda128.yml"
-            env_name: "faster-whisper-cu128"
-            artifact_suffix: "cu128-chickenrice"
-            model_variant: "chickenrice"
-            hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2"
 
     steps:
     - name: Checkout code
@@ -212,11 +179,6 @@ jobs:
         python -c "import ctranslate2; print(f'CTranslate2 version: {ctranslate2.__version__}')"
         echo "Note: CUDA availability check skipped (no GPU on GitHub runners)"
 
-    - name: Install Modal dependencies
-      run: |
-        pip install modal questionary
-        python -c "import modal; print(f'Modal version: {modal.__version__}')"
-
     - name: Check cached models
       run: |
         echo "Checking for cached models..."
@@ -285,19 +247,6 @@ jobs:
 
         python build_windows.py
 
-    - name: Prepare distribution layout
-      run: |
-        echo "Preparing distribution layout..."
-        mkdir -p dist/faster_whisper_transwithai_chickenrice
-        if [ -d "dist/engine" ]; then
-          mv dist/engine dist/faster_whisper_transwithai_chickenrice/engine
-        fi
-        if [ -d "dist/client" ]; then
-          mv dist/client dist/faster_whisper_transwithai_chickenrice/client
-        fi
-        echo "Distribution root:"
-        find dist/faster_whisper_transwithai_chickenrice -maxdepth 2 -type d -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null || true
-
     - name: Copy models to distribution
       run: |
         echo "Copying models to distribution directory..."
@@ -307,19 +256,19 @@ jobs:
           find models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null
           echo ""
 
-          # Create models directory in engine dist
-          mkdir -p dist/faster_whisper_transwithai_chickenrice/engine/models
+          # Create models directory in dist
+          mkdir -p dist/faster_whisper_transwithai_chickenrice/models
 
           # Copy VAD model files (always included)
           echo "Copying VAD models..."
-          cp models/*.onnx dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
-          cp models/*.json dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
+          cp models/*.onnx dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
+          cp models/*.json dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
 
           # Copy whisper-base for feature extractor (always included for offline usage)
           echo "Copying whisper-base for feature extractor..."
           if [ -d "models/whisper-base" ]; then
             echo "  Found whisper-base directory, copying..."
-            cp -r models/whisper-base dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
+            cp -r models/whisper-base dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
             echo "  Whisper-base copied for offline feature extractor support"
           else
             echo "  WARNING: whisper-base directory not found"
@@ -335,9 +284,9 @@ jobs:
                 if [ "$model_name" != "whisper-base" ]; then
                   echo "  Copying model contents from: $model_name"
                   # Copy the contents of the model directory, not the directory itself
-                  cp -r "$dir"* dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
+                  cp -r "$dir"* dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
                   # Also copy hidden files if any exist
-                  cp -r "$dir".* dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true
+                  cp -r "$dir".* dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true
                 fi
               fi
             done
@@ -345,7 +294,7 @@ jobs:
 
           echo ""
           echo "Models in distribution:"
-          find dist/faster_whisper_transwithai_chickenrice/engine/models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null
+          find dist/faster_whisper_transwithai_chickenrice/models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null
           echo ""
           echo "Total distribution size:"
           du -sh dist/faster_whisper_transwithai_chickenrice/
@@ -393,10 +342,8 @@ jobs:
     - name: Test executable (CPU mode)
       shell: cmd /C CALL {0}
       run: |
-        cd dist\faster_whisper_transwithai_chickenrice\engine
+        cd dist\faster_whisper_transwithai_chickenrice
         infer.exe --help
-        cd ..\client
-        modal_infer.exe --help
 
     - name: Upload artifact
       uses: actions/upload-artifact@v4
@@ -667,4 +614,4 @@ jobs:
         repository: ${{ github.repository }}
         tag_name: ${{ github.ref }}
         files: faster_whisper_transwithai_windows_cu128-chickenrice.zip
-        token: ${{ secrets.GITHUB_TOKEN }}
+        token: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/project.spec b/project.spec
index 9deb0c2..b8078db 100644
--- a/project.spec
+++ b/project.spec
@@ -345,69 +345,12 @@ datas += [
     ('locales', 'locales'),  # Include the locales directory with translations
 ]
 
-# Base collections (for infer.exe only)
-infer_datas = list(datas)
-infer_binaries = list(binaries)
-infer_hiddenimports = list(hiddenimports)
-
-# Extend collections for modal_infer.exe only
-modal_datas = list(datas)
-modal_binaries = list(binaries)
-modal_hiddenimports = list(hiddenimports)
-
-# Collect modal / questionary and their tricky deps explicitly (modal_infer only)
-try:
-    m_datas, m_binaries, m_hiddenimports = collect_all('modal')
-    modal_datas += m_datas
-    modal_binaries += m_binaries
-    modal_hiddenimports += m_hiddenimports
-    print("Collected modal successfully")
-except:
-    print("Warning: could not collect modal")
-
-try:
-    s_datas, s_binaries, s_hiddenimports = collect_all('synchronicity')
-    modal_datas += s_datas
-    modal_binaries += s_binaries
-    modal_hiddenimports += s_hiddenimports
-except:
-    print("Warning: could not collect synchronicity")
-
-try:
-    q_datas, q_binaries, q_hiddenimports = collect_all('questionary')
-    modal_datas += q_datas
-    modal_binaries += q_binaries
-    modal_hiddenimports += q_hiddenimports
-except:
-    print("Warning: could not collect questionary")
-
-modal_hiddenimports += [
-    'modal',
-    'modal.proto',
-    'synchronicity',
-    'grpclib',
-    'google.protobuf',
-    'google.protobuf.internal',
-    'toml',
-    'rich',
-    'typer',
-    'click',
-    'questionary',
-    'prompt_toolkit',
-    'prompt_toolkit.styles',
-    'prompt_toolkit.key_binding',
-    'prompt_toolkit.formatted_text',
-    'prompt_toolkit.shortcuts',
-    'prompt_toolkit.output',
-    'prompt_toolkit.input',
-]
-
-a_infer = Analysis(
+a = Analysis(
     ['infer.py'],
     pathex=[],
-    binaries=infer_binaries,
-    datas=infer_datas,
-    hiddenimports=infer_hiddenimports,
+    binaries=binaries,
+    datas=datas,
+    hiddenimports=hiddenimports,
     hookspath=[],  # PyInstaller hooks contrib should be auto-detected
     hooksconfig={},
     runtime_hooks=['runtime_hook.py'],  # Add runtime hook to set KMP_DUPLICATE_LIB_OK
@@ -429,39 +372,11 @@ a_infer = Analysis(
     noarchive=False,
 )
 
-a_modal = Analysis(
-    ['modal_infer.py'],
-    pathex=[],
-    binaries=modal_binaries,
-    datas=modal_datas,
-    hiddenimports=modal_hiddenimports,
-    hookspath=[],
-    hooksconfig={},
-    runtime_hooks=['runtime_hook.py'],
-    excludes=[
-        'matplotlib',
-        'tkinter',
-        'PyQt5',
-        'PyQt6',
-        'PySide2',
-        'PySide6',
-        'notebook',
-        'jupyter',
-        'IPython',
-        'pytest',
-    ],
-    win_no_prefer_redirects=False,
-    win_private_assemblies=False,
-    cipher=block_cipher,
-    noarchive=False,
-)
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
 
-pyz_infer = PYZ(a_infer.pure, a_infer.zipped_data, cipher=block_cipher)
-pyz_modal = PYZ(a_modal.pure, a_modal.zipped_data, cipher=block_cipher)
-
-infer_exe = EXE(
-    pyz_infer,
-    [a_infer.scripts[0]],
+exe = EXE(
+    pyz,
+    a.scripts,
     [],
     exclude_binaries=True,
     name='infer',
@@ -478,43 +393,13 @@ infer_exe = EXE(
     icon='transwithai.ico' if os.path.exists('transwithai.ico') else None,
 )
 
-modal_exe = EXE(
-    pyz_modal,
-    [a_modal.scripts[0]],
-    [],
-    exclude_binaries=True,
-    name='modal_infer',
-    debug=False,
-    bootloader_ignore_signals=False,
-    strip=False,
-    upx=False,
-    console=True,
-    disable_windowed_traceback=False,
-    argv_emulation=False,
-    target_arch=None,
-    codesign_identity=None,
-    entitlements_file=None,
-    icon='transwithai.ico' if os.path.exists('transwithai.ico') else None,
-)
-
-coll_infer = COLLECT(
-    infer_exe,
-    a_infer.binaries,
-    a_infer.zipfiles,
-    a_infer.datas,
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
     strip=False,
     upx=False,
     upx_exclude=[],
-    name='engine',
-)
-
-coll_modal = COLLECT(
-    modal_exe,
-    a_modal.binaries,
-    a_modal.zipfiles,
-    a_modal.datas,
-    strip=False,
-    upx=False,
-    upx_exclude=[],
-    name='client',
-)
+    name='faster_whisper_transwithai_chickenrice',
+)
\ No newline at end of file

From 7fd3f7eea6be7f322003f1d69fc44a32a249e746 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 19:27:03 +0800
Subject: [PATCH 16/25] standalone modal build process

---
 .github/workflows/build-release-conda.yml | 34 ++++++++++++++
 build_windows.py                          | 17 ++++++-
 modal.spec                                | 54 +++++++++++++++++++++++
 modal_infer.py                            | 15 ++++---
 4 files changed, 114 insertions(+), 6 deletions(-)
 create mode 100644 modal.spec

diff --git a/.github/workflows/build-release-conda.yml b/.github/workflows/build-release-conda.yml
index 6c377eb..3d22b0c 100644
--- a/.github/workflows/build-release-conda.yml
+++ b/.github/workflows/build-release-conda.yml
@@ -28,12 +28,45 @@ jobs:
     strategy:
       matrix:
         include:
+          # CUDA 11.8 versions
+          - cuda: "11.8"
+            env_file: "environment-cuda118.yml"
+            env_name: "faster-whisper-cu118"
+            artifact_suffix: "cu118"
+            model_variant: "base"
+            hf_model: ""
+          - cuda: "11.8"
+            env_file: "environment-cuda118.yml"
+            env_name: "faster-whisper-cu118"
+            artifact_suffix: "cu118-chickenrice"
+            model_variant: "chickenrice"
+            hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2"
+          # CUDA 12.2 versions
+          - cuda: "12.2"
+            env_file: "environment-cuda122.yml"
+            env_name: "faster-whisper-cu122"
+            artifact_suffix: "cu122"
+            model_variant: "base"
+            hf_model: ""
+          - cuda: "12.2"
+            env_file: "environment-cuda122.yml"
+            env_name: "faster-whisper-cu122"
+            artifact_suffix: "cu122-chickenrice"
+            model_variant: "chickenrice"
+            hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2"
+          # CUDA 12.8 versions
           - cuda: "12.8"
             env_file: "environment-cuda128.yml"
             env_name: "faster-whisper-cu128"
             artifact_suffix: "cu128"
             model_variant: "base"
             hf_model: ""
+          - cuda: "12.8"
+            env_file: "environment-cuda128.yml"
+            env_name: "faster-whisper-cu128"
+            artifact_suffix: "cu128-chickenrice"
+            model_variant: "chickenrice"
+            hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2"
 
     steps:
     - name: Checkout code
@@ -344,6 +377,7 @@ jobs:
       run: |
         cd dist\faster_whisper_transwithai_chickenrice
         infer.exe --help
+        modal_infer.exe --help
 
     - name: Upload artifact
       uses: actions/upload-artifact@v4
diff --git a/build_windows.py b/build_windows.py
index a58157e..cba704b 100644
--- a/build_windows.py
+++ b/build_windows.py
@@ -209,6 +209,21 @@ def build():
 
     # Verify build succeeded and check for CUDA libraries
     if result.returncode == 0:
+        # Build modal_infer if modal.spec is present (separate target).
+        modal_spec = Path("modal.spec")
+        if modal_spec.exists():
+            modal_cmd = [
+                sys.executable, "-m", "PyInstaller",
+                "--clean",
+                "--noconfirm",
+                str(modal_spec),
+            ]
+            print(f"\nRunning: {' '.join(modal_cmd)}")
+            modal_result = subprocess.run(modal_cmd, capture_output=False)
+            if modal_result.returncode != 0:
+                print("\nModal build failed!")
+                return 1
+
         dist_root = Path("dist")
         dist_dir = dist_root / "faster_whisper_transwithai_chickenrice"
         engine_dir = dist_root / "engine"
@@ -276,4 +291,4 @@ def build():
     return 0
 
 if __name__ == "__main__":
-    sys.exit(build())
\ No newline at end of file
+    sys.exit(build())
diff --git a/modal.spec b/modal.spec
new file mode 100644
index 0000000..a9ad761
--- /dev/null
+++ b/modal.spec
@@ -0,0 +1,54 @@
+# -*- mode: python ; coding: utf-8 -*-
+import os
+from PyInstaller.utils.hooks import collect_all
+
+block_cipher = None
+
+datas = [("environment-cuda128.yml", ".")]
+binaries = []
+hiddenimports = []
+
+for package in ["modal", "questionary", "prompt_toolkit", "rich", "typer", "click"]:
+    try:
+        pkg_datas, pkg_binaries, pkg_hiddenimports = collect_all(package)
+        datas += pkg_datas
+        binaries += pkg_binaries
+        hiddenimports += pkg_hiddenimports
+    except Exception:
+        pass
+
+a = Analysis(
+    ["modal_infer.py"],
+    pathex=[],
+    binaries=binaries,
+    datas=datas,
+    hiddenimports=hiddenimports,
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    noarchive=False,
+)
+
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    exclude_binaries=False,
+    name="modal_infer",
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=False,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+    icon="transwithai.ico" if os.path.exists("transwithai.ico") else None,
+)
diff --git a/modal_infer.py b/modal_infer.py
index 3668de0..75d066c 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -7,7 +7,7 @@
 import sys
 from dataclasses import dataclass
 from datetime import datetime
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 import subprocess
 <<<<<<< HEAD
 <<<<<<< HEAD
@@ -37,7 +37,7 @@
 REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice"
 <<<<<<< HEAD
 VOLUME_NAME = "Faster_Whisper"
-VOLUME_ROOT = Path("/Faster_Whisper")
+VOLUME_ROOT = "/Faster_Whisper"
 REMOTE_MOUNT = VOLUME_ROOT
 APP_ROOT_REL = Path(APP_NAME)
 SESSION_SUBDIR = Path("sessions")
@@ -98,6 +98,10 @@
 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 ]
 
+def resolve_resource_path(filename: str) -> Path:
+    base_dir = Path(getattr(sys, "_MEIPASS", Path(__file__).resolve().parent))
+    return base_dir / filename
+
 
 @dataclass
 class ModelProfile:
@@ -167,7 +171,8 @@ def rel_to_volume_path(path: Path) -> str:
 
 
 def rel_to_container_path(path: Path) -> str:
-    return str((REMOTE_MOUNT / path).as_posix())
+    base = PurePosixPath(REMOTE_MOUNT)
+    return str((base / path.as_posix()).as_posix())
 
 
 def volume_path_to_relative(path: str) -> Path:
@@ -439,7 +444,7 @@ def upload_single_file(
         base_dir: 基础目录（用于文件夹模式，输出到此目录）
     """
     session_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid4().hex[:6]}"
-    remote_session_rel = SESSION_SUBDIR / session_id
+    remote_session_rel = Path(SESSION_SUBDIR) / session_id
     remote_logs_rel = remote_session_rel / "logs"
 
     # 使用固定文件名避免全角字符等问题
@@ -546,7 +551,7 @@ def build_modal_image() -> modal.Image:
         modal.Image.micromamba(python_version="3.10")
         .apt_install("git")
         .micromamba_install(
-            spec_file="environment-cuda128.yml",
+            spec_file=str(resolve_resource_path("environment-cuda128.yml")),
             channels=["conda-forge", "defaults"],
         )
         .pip_install("modal", "questionary")

From 3c6d1129c2abdb316d13b5010eb0a698992055b7 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 19:45:38 +0800
Subject: [PATCH 17/25] change modal.exe output path

---
 build_windows.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build_windows.py b/build_windows.py
index cba704b..c153f14 100644
--- a/build_windows.py
+++ b/build_windows.py
@@ -216,6 +216,8 @@ def build():
                 sys.executable, "-m", "PyInstaller",
                 "--clean",
                 "--noconfirm",
+                "--distpath", str(Path("dist") / "faster_whisper_transwithai_chickenrice"),
+                "--workpath", str(Path("build") / "modal"),
                 str(modal_spec),
             ]
             print(f"\nRunning: {' '.join(modal_cmd)}")

From d4c30e9802d7e871cacea16d6db4df6b9cd99c30 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 19:59:54 +0800
Subject: [PATCH 18/25] install modal dependencies on the fly

---
 build_windows.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/build_windows.py b/build_windows.py
index c153f14..cc8dd7d 100644
--- a/build_windows.py
+++ b/build_windows.py
@@ -212,6 +212,21 @@ def build():
         # Build modal_infer if modal.spec is present (separate target).
         modal_spec = Path("modal.spec")
         if modal_spec.exists():
+            # Ensure modal dependencies are available in the current env.
+            try:
+                import modal  # noqa: F401
+                import questionary  # noqa: F401
+            except ImportError:
+                print("\nmodal/questionary not found; installing for modal.spec build...")
+                install_cmd = [
+                    sys.executable, "-m", "pip", "install",
+                    "modal", "questionary",
+                ]
+                install_result = subprocess.run(install_cmd, capture_output=False)
+                if install_result.returncode != 0:
+                    print("\nFailed to install modal/questionary.")
+                    return 1
+
             modal_cmd = [
                 sys.executable, "-m", "PyInstaller",
                 "--clean",

From bc5efca0f27c30583182b45c02b5c09047b9f9e5 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 20:14:36 +0800
Subject: [PATCH 19/25] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=BA=86=20ensure=5Fut?=
 =?UTF-8?q?f8=5Fstdio()=EF=BC=8C=E5=BC=BA=E5=88=B6=E6=8A=8A=20stdout/stder?=
 =?UTF-8?q?r=20=E6=94=B9=E6=88=90=20UTF=E2=80=918=EF=BC=88=E5=B9=B6?=
 =?UTF-8?q?=E7=94=A8=20errors=3D"replace"=20=E5=85=9C=E5=BA=95=EF=BC=89?=
 =?UTF-8?q?=EF=BC=8C=E9=81=BF=E5=85=8D=20argparse=20=E6=89=93=E5=8D=B0?=
 =?UTF-8?q?=E4=B8=AD=E6=96=87=E6=97=B6=E5=B4=A9=E6=8E=89=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 modal_infer.py | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index 75d066c..576a9fe 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -1,8 +1,7 @@
-"""feature-modal: 交互式 CLI，完成 Modal App 构建、音频上传、推理执行与结果回传。"""
-
 from __future__ import annotations
 
 import argparse
+import io
 import logging
 import sys
 from dataclasses import dataclass
@@ -20,6 +19,28 @@
 >>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
 from uuid import uuid4
 
+def ensure_utf8_stdio() -> None:
+    for name in ("stdout", "stderr"):
+        stream = getattr(sys, name, None)
+        if stream is None:
+            continue
+        try:
+            encoding = getattr(stream, "encoding", None)
+            if encoding and encoding.lower().startswith("utf-8"):
+                continue
+            if hasattr(stream, "reconfigure"):
+                stream.reconfigure(encoding="utf-8", errors="replace")
+            elif hasattr(stream, "buffer"):
+                setattr(
+                    sys,
+                    name,
+                    io.TextIOWrapper(stream.buffer, encoding="utf-8", errors="replace"),
+                )
+        except Exception:
+            pass
+
+ensure_utf8_stdio()
+
 try:
     import questionary  # type: ignore
     from questionary import Choice  # type: ignore

From d39e0f5dd71a927db45d3bc00ca1c5b807d75999 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 20:44:14 +0800
Subject: [PATCH 20/25] =?UTF-8?q?=E6=96=B0=E5=A2=9E=20REPO=5FREF=20=3D=20"?=
 =?UTF-8?q?v1.4"=EF=BC=8C=E5=B9=B6=E5=9C=A8=20clone/update=20=E6=97=B6?=
 =?UTF-8?q?=E5=BC=BA=E5=88=B6=20checkout/reset=20=E5=88=B0=E8=AF=A5?=
 =?UTF-8?q?=E7=89=88=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 modal_infer.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index 576a9fe..f30f573 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -57,6 +57,7 @@ def ensure_utf8_stdio() -> None:
 APP_NAME = "Faster-Whisper-TransWithAI-ChickenRice"
 REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice"
 <<<<<<< HEAD
+REPO_REF = "v1.4"
 VOLUME_NAME = "Faster_Whisper"
 VOLUME_ROOT = "/Faster_Whisper"
 REMOTE_MOUNT = VOLUME_ROOT
@@ -1010,11 +1011,12 @@ def log(msg: str) -> None:
 
     if not (repo_dir / ".git").exists():
         log("开始克隆仓库...")
-        run(["git", "clone", REPO_URL, str(repo_dir)])
+        run(["git", "clone", "--branch", REPO_REF, "--depth", "1", REPO_URL, str(repo_dir)])
     else:
         log("更新仓库...")
-        run(["git", "-C", str(repo_dir), "fetch", "origin", "main"])
-        run(["git", "-C", str(repo_dir), "reset", "--hard", "origin/main"])
+        run(["git", "-C", str(repo_dir), "fetch", "--tags", "origin"])
+        run(["git", "-C", str(repo_dir), "checkout", "-f", REPO_REF])
+        run(["git", "-C", str(repo_dir), "reset", "--hard", REPO_REF])
 
     model_profile = job["model_profile"]
     model_path = repo_dir / "models"

From 25eebe2f38a3813817756e6f0596e8d45fc9134b Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 21:08:38 +0800
Subject: [PATCH 21/25] fix: audio_suffixes variable added to modal_infer.py

---
 modal_infer.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index f30f573..f6ccc72 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -1061,16 +1061,12 @@ def snapshot(path: str) -> set:
     cmd = [
         "python",
         str(repo_dir / "infer.py"),
-        "--device",
-        "cuda",
-        "--model_name_or_path",
-        str(model_path),
-        "--sub_formats",
-        job["sub_formats"],
-        "--log_level",
-        "INFO",
-        "--output_dir",
-        str(output_dir),
+        "--audio_suffixes", "mp3,wav,flac,m4a,aac,ogg,wma,mp4,mkv,avi,mov,webm,flv,wmv",
+        "--device","cuda",
+        "--model_name_or_path",str(model_path),
+        "--sub_formats",job["sub_formats"],
+        "--log_level","INFO",
+        "--output_dir",str(output_dir),
     ]
     if job["enable_batching"]:
         cmd.append("--enable_batching")

From f51736942beb044141ce36ff358773c7c33cdd14 Mon Sep 17 00:00:00 2001
From: neo <2418660459@qq.com>
Date: Tue, 13 Jan 2026 21:27:14 +0800
Subject: [PATCH 22/25] clean diff with v1.6

---
 .gitignore              |  1 -
 build_windows.py        | 28 ++++++++--------------------
 environment-cuda118.yml |  3 +--
 environment-cuda128.yml |  3 +--
 4 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8ff3ddd..0d6661a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,7 +10,6 @@ __pycache__/
 # C extensions
 *.so
 
-node_modules/
 # Distribution / packaging
 .Python
 build/
diff --git a/build_windows.py b/build_windows.py
index cc8dd7d..7932ade 100644
--- a/build_windows.py
+++ b/build_windows.py
@@ -209,6 +209,7 @@ def build():
 
     # Verify build succeeded and check for CUDA libraries
     if result.returncode == 0:
+        dist_dir = Path("dist/faster_whisper_transwithai_chickenrice")
         # Build modal_infer if modal.spec is present (separate target).
         modal_spec = Path("modal.spec")
         if modal_spec.exists():
@@ -247,11 +248,6 @@ def build():
         client_dir = dist_root / "client"
 
         if dist_dir.exists():
-            verify_dirs = [dist_dir]
-        else:
-            verify_dirs = [p for p in [engine_dir, client_dir] if p.exists()]
-
-        if verify_dirs:
             # Quick verification of critical libraries
             print("\nVerifying CUDA libraries in distribution...")
 
@@ -260,20 +256,15 @@ def build():
             missing_libs = []
 
             # Check in root directory and all subdirectories
-            all_dlls = []
-            for root_dir in verify_dirs:
-                all_dlls.extend(root_dir.glob("**/*.dll"))
+            all_dlls = list(dist_dir.glob("**/*.dll"))
 
             for critical in critical_libs:
                 found_in_locations = []
                 for dll_path in all_dlls:
                     if critical in dll_path.name.lower():
-                        # Get relative path from dist root for consistent display
-                        try:
-                            rel_path = dll_path.relative_to(dist_root)
-                            location = str(rel_path.parent) if str(rel_path.parent) != '.' else 'root'
-                        except ValueError:
-                            location = str(dll_path.parent)
+                        # Get relative path from dist_dir
+                        rel_path = dll_path.relative_to(dist_dir)
+                        location = str(rel_path.parent) if str(rel_path.parent) != '.' else 'root'
                         found_in_locations.append(location)
 
                 if found_in_locations:
@@ -293,13 +284,9 @@ def build():
                 print("     Note: The PyInstaller hooks should have included these.")
                 print("     If GPU acceleration doesn't work, check your conda environment.")
 
-            if dist_dir.exists():
-                output_locations = [str(dist_dir)]
-            else:
-                output_locations = [str(p) for p in verify_dirs]
-            print(f"\nBuild complete! Output in: {', '.join(output_locations)}")
+            print(f"\nBuild complete! Output in: {dist_dir}")
         else:
-            print("Error: dist/engine or dist/client directory not found after build")
+            print("Error: dist/faster_whisper_transwithai_chickenrice directory not found after build")
             return 1
     else:
         print("\nBuild failed!")
@@ -309,3 +296,4 @@ def build():
 
 if __name__ == "__main__":
     sys.exit(build())
+    
diff --git a/environment-cuda118.yml b/environment-cuda118.yml
index 8d3c832..7d66261 100644
--- a/environment-cuda118.yml
+++ b/environment-cuda118.yml
@@ -37,8 +37,7 @@ dependencies:
       - backports.functools-lru-cache  # Fix for PyInstaller ModuleNotFoundError
 
       # Build tools
-      # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments
-      - pyinstaller==6.16.0
+      - pyinstaller>=6.0.0
       - setuptools>=65.0.0
       - wheel>=0.38.0
       - build>=0.10.0
diff --git a/environment-cuda128.yml b/environment-cuda128.yml
index 655dd6a..b9ff41a 100644
--- a/environment-cuda128.yml
+++ b/environment-cuda128.yml
@@ -34,8 +34,7 @@ dependencies:
       - backports.functools-lru-cache  # Fix for PyInstaller ModuleNotFoundError
 
       # Build tools
-      # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments
-      - pyinstaller==6.16.0
+      - pyinstaller>=6.0.0
       - setuptools>=65.0.0
       - wheel>=0.38.0
       - build>=0.10.0

From e90a3305326e2e3ec08ce8ebf9d906a63b2e9acf Mon Sep 17 00:00:00 2001
From: neo <randomless98@gmail.com>
Date: Wed, 14 Jan 2026 10:01:06 +0800
Subject: [PATCH 23/25] remove HEAD<< artifacts from rebase process

---
 modal_infer.py | 371 +------------------------------------------------
 1 file changed, 4 insertions(+), 367 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index f6ccc72..8cf646c 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -8,15 +8,7 @@
 from datetime import datetime
 from pathlib import Path, PurePosixPath
 import subprocess
-<<<<<<< HEAD
-<<<<<<< HEAD
 from typing import Dict, List, Optional, Sequence, Tuple
-=======
-from typing import Dict, Iterable, List, Optional, Sequence, Tuple
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
-=======
-from typing import Dict, List, Optional, Sequence, Tuple
->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
 from uuid import uuid4
 
 def ensure_utf8_stdio() -> None:
@@ -56,21 +48,13 @@ def ensure_utf8_stdio() -> None:
 
 APP_NAME = "Faster-Whisper-TransWithAI-ChickenRice"
 REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice"
-<<<<<<< HEAD
 REPO_REF = "v1.4"
 VOLUME_NAME = "Faster_Whisper"
 VOLUME_ROOT = "/Faster_Whisper"
 REMOTE_MOUNT = VOLUME_ROOT
-APP_ROOT_REL = Path(APP_NAME)
-SESSION_SUBDIR = Path("sessions")
-=======
-VOLUME_NAME = "agent_volume"
-VOLUME_ROOT = Path("/agent_volume")
-REMOTE_MOUNT = VOLUME_ROOT
-APP_ROOT_REL = Path(APP_NAME)
-SESSION_SUBDIR = APP_ROOT_REL / "sessions"
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
-REPO_VOLUME_DIR = VOLUME_ROOT / "repo"
+APP_ROOT_REL = APP_NAME
+SESSION_SUBDIR = "sessions"
+REPO_VOLUME_DIR = f"{VOLUME_ROOT}/repo"
 SUB_FORMATS = "srt,vtt,lrc"
 SUB_SUFFIXES = {".srt", ".vtt", ".lrc"}
 AUDIO_SUFFIXES = {
@@ -81,13 +65,6 @@ def ensure_utf8_stdio() -> None:
     ".aac",
     ".ogg",
     ".wma",
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-    ".mp4",
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
-=======
->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
     ".mkv",
     ".avi",
     ".mov",
@@ -95,17 +72,10 @@ def ensure_utf8_stdio() -> None:
     ".flv",
     ".wmv",
 }
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
 VIDEO_NEED_CONVERT = {".mp4"}  # 需要用户手动转换的格式
 DEFAULT_GPU_CHOICES = [
     "T4",
     "L4",
-=======
-DEFAULT_GPU_CHOICES = [
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     "L40S",
     "A10G",
     "A100-40GB",
@@ -113,11 +83,6 @@ def ensure_utf8_stdio() -> None:
     "H100",
     "H200",
     "B200",
-<<<<<<< HEAD
-=======
-    "L4",
-    "T4",
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 ]
 
 def resolve_resource_path(filename: str) -> Path:
@@ -157,7 +122,6 @@ class UploadManifest:
     remote_output_rel: Path
     local_output_dir: Path
     remote_logs_rel: Path
-<<<<<<< HEAD
     original_filename: Optional[str] = None  # 原始文件名（用于恢复空格）
 
 
@@ -169,20 +133,6 @@ class ScanResult:
 
 class NoAudioFilesError(Exception):
     pass
-=======
-
-
-@dataclass
-class ScanResult:
-    audio_files: List[Path]
-    mp4_files: List[Path]
-
-
-@dataclass
-class RemoteResult:
-    created_files: Dict[str, List[str]]
-    log_file: Optional[str]
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
 
 def rel_to_volume_path(path: Path) -> str:
@@ -267,19 +217,6 @@ def ensure_questionary():
 def ask_selection() -> UserSelection:
     ensure_questionary()
 
-<<<<<<< HEAD
-=======
-    run_mode = questionary.select(
-        "选择运行模式：",
-        choices=[
-            Choice(title="一次性运行（modal run）", value="once"),
-            Choice(title="持久化 App（modal deploy）", value="persistent"),
-        ],
-    ).ask()
-    if not run_mode:
-        raise KeyboardInterrupt
-
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     gpu_choice = questionary.select(
         "选择 GPU",
         choices=DEFAULT_GPU_CHOICES,
@@ -340,11 +277,7 @@ def ask_selection() -> UserSelection:
     )
 
     return UserSelection(
-<<<<<<< HEAD
         run_mode="once",
-=======
-        run_mode=run_mode,
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
         gpu_choice=gpu_choice,
         input_path=input_path,
         model_profile=model_profile,
@@ -357,15 +290,10 @@ def ask_selection() -> UserSelection:
     )
 
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
 def scan_audio_files(path: Path) -> ScanResult:
     """扫描目录，返回音频文件和需要转换的 mp4 文件"""
     audio_files: List[Path] = []
     mp4_files: List[Path] = []
-<<<<<<< HEAD
     for file in path.rglob("*"):
         if file.is_file():
             suffix = file.suffix.lower()
@@ -401,56 +329,10 @@ def validate_audio_path(path: Path) -> ScanResult:
         if not scan_result.audio_files:
             raise NoAudioFilesError(f"输入的文件夹内没有音频文件：{path}")
         return scan_result
-=======
-def iter_audio_files(path: Path) -> List[Path]:
-    files: List[Path] = []
-=======
->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
-    for file in path.rglob("*"):
-        if file.is_file():
-            suffix = file.suffix.lower()
-            if suffix in AUDIO_SUFFIXES:
-                audio_files.append(file)
-            elif suffix in VIDEO_NEED_CONVERT:
-                mp4_files.append(file)
-    return ScanResult(audio_files=audio_files, mp4_files=mp4_files)
-
-
-def validate_audio_path(path: Path) -> ScanResult:
-    """验证音频路径，返回扫描结果。如果发现 mp4 文件会打印警告。"""
-    if path.is_file():
-        suffix = path.suffix.lower()
-        if suffix in VIDEO_NEED_CONVERT:
-            raise ValueError(
-                f"文件 {path} 是 mp4 格式，请先使用 ffmpeg 转换为 mp3：\n"
-                f"  ffmpeg -i \"{path}\" -vn -acodec libmp3lame \"{path.with_suffix('.mp3')}\""
-            )
-        if suffix not in AUDIO_SUFFIXES:
-            raise ValueError(f"文件 {path} 不属于支持的音/视频格式。")
-        return ScanResult(audio_files=[path], mp4_files=[])
-    elif path.is_dir():
-        scan_result = scan_audio_files(path)
-        if scan_result.mp4_files:
-            logging.warning("=" * 60)
-            logging.warning("发现 %d 个 mp4 文件，这些文件将被跳过：", len(scan_result.mp4_files))
-            for mp4_file in scan_result.mp4_files:
-                logging.warning("  - %s", mp4_file)
-            logging.warning("请使用 ffmpeg 转换为 mp3 后再处理，例如：")
-            logging.warning("  ffmpeg -i \"input.mp4\" -vn -acodec libmp3lame \"output.mp3\"")
-            logging.warning("=" * 60)
-        if not scan_result.audio_files:
-            raise ValueError(f"文件夹 {path} 中没有支持的音/视频文件。")
-<<<<<<< HEAD
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
-=======
-        return scan_result
->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
     else:
         raise ValueError(f"路径 {path} 既不是文件也不是文件夹。")
 
 
-<<<<<<< HEAD
-<<<<<<< HEAD
 def upload_single_file(
     volume: modal.Volume,
     selection: UserSelection,
@@ -490,45 +372,6 @@ def upload_single_file(
         local_output_dir=local_output_dir,
         remote_logs_rel=remote_logs_rel,
         original_filename=original_filename,  # 始终记录原始文件名
-=======
-def prepare_upload(
-=======
-def upload_single_file(
->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑，逐文件上传避免连接断开)
-    volume: modal.Volume,
-    selection: UserSelection,
-    audio_file: Path,
-    base_dir: Optional[Path] = None,
-) -> UploadManifest:
-    """上传单个音频文件到 Modal Volume。
-
-    Args:
-        volume: Modal Volume 实例
-        selection: 用户选择配置
-        audio_file: 要上传的音频文件路径
-        base_dir: 基础目录（用于文件夹模式，输出到此目录）
-    """
-    session_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid4().hex[:6]}"
-    remote_session_rel = SESSION_SUBDIR / session_id
-    remote_logs_rel = remote_session_rel / "logs"
-
-    with volume.batch_upload(force=True) as batch:
-        remote_rel = remote_session_rel / audio_file.name
-        logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel))
-        batch.put_file(str(audio_file), rel_to_volume_path(remote_rel))
-
-    # 如果指定了 base_dir（文件夹模式），输出到 base_dir；否则输出到文件所在目录
-    local_output_dir = base_dir if base_dir else audio_file.parent
-
-    return UploadManifest(
-        session_id=session_id,
-        source_type="file",
-        local_source=audio_file,
-        remote_inputs_rel=[remote_rel],
-        remote_output_rel=remote_session_rel,
-        local_output_dir=local_output_dir,
-        remote_logs_rel=remote_logs_rel,
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     )
 
 
@@ -585,20 +428,12 @@ def run_remote_pipeline(
     selection: UserSelection,
     manifest: UploadManifest,
     payload: Dict,
-<<<<<<< HEAD
 ) -> Dict:
     logging.info("=== 开始构建 Modal 镜像 ===")
     image = build_modal_image()
     logging.info("✓ 镜像构建完成")
     logging.info("使用 GPU：%s", selection.gpu_choice)
     logging.info("超时时间：%d 分钟", selection.timeout_minutes)
-=======
-) -> RemoteResult:
-    logging.info("=== 开始构建 Modal 镜像 ===")
-    image = build_modal_image()
-    logging.info("✓ 镜像构建完成")
-    logging.info("使用 GPU：%s", selection.gpu_choice)
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     app = modal.App(APP_NAME)
 
     @app.function(
@@ -611,7 +446,6 @@ def run_remote_pipeline(
     def modal_pipeline(job_payload: Dict) -> Dict:
         return _remote_pipeline(job_payload)
 
-<<<<<<< HEAD
     logging.info("=== 开始远程执行 ===")
     logging.info("正在启动 GPU 容器并执行推理任务...")
     logging.info("（以下为远程容器输出）")
@@ -623,84 +457,6 @@ def modal_pipeline(job_payload: Dict) -> Dict:
     return result  # 直接返回 Dict，包含 created_files 和 log_content
 
 
-def process_directory_files(
-    volume: modal.Volume,
-    selection: UserSelection,
-    audio_files: List[Path],
-) -> Tuple[int, int]:
-    """处理文件夹中的所有音频文件，容器复用。
-
-    Args:
-        volume: Modal Volume 实例
-        selection: 用户选择配置
-        audio_files: 要处理的音频文件列表
-
-    Returns:
-        (成功数, 失败数) 元组
-    """
-    logging.info("=== 开始构建 Modal 镜像 ===")
-    image = build_modal_image()
-    logging.info("✓ 镜像构建完成")
-    logging.info("使用 GPU：%s", selection.gpu_choice)
-    logging.info("超时时间：%d 分钟", selection.timeout_minutes)
-    logging.info("待处理文件数：%d", len(audio_files))
-
-    app = modal.App(APP_NAME)
-
-    @app.function(
-        image=image,
-        gpu=selection.gpu_choice,
-        timeout=selection.timeout_minutes * 60,
-        volumes={str(REMOTE_MOUNT): volume},
-        serialized=True,
-        min_containers=1,  # 保持容器预热，复用容器
-    )
-    def modal_pipeline(job_payload: Dict) -> Dict:
-        return _remote_pipeline(job_payload)
-
-    success_count = 0
-    fail_count = 0
-    base_dir = selection.input_path  # 文件夹模式下，输出到源文件夹
-
-    with app.run():
-        for i, audio_file in enumerate(audio_files, 1):
-            logging.info("=" * 60)
-            logging.info("处理文件 [%d/%d]: %s", i, len(audio_files), audio_file.name)
-            logging.info("=" * 60)
-            try:
-                # 1. 上传单个文件
-                manifest = upload_single_file(volume, selection, audio_file, base_dir)
-
-                # 2. 构建 payload
-                payload = build_job_payload(selection, manifest)
-
-                # 3. 执行推理（复用容器）
-                logging.info("正在执行推理...")
-                result = modal_pipeline.remote(payload)
-
-                # 4. 写入结果文件到本地
-                download_outputs(manifest, result)
-
-                logging.info("✓ 文件 %s 处理完成", audio_file.name)
-                success_count += 1
-            except Exception as e:
-                logging.error("✗ 文件 %s 处理失败: %s", audio_file.name, e)
-                fail_count += 1
-                continue  # 继续处理下一个文件
-
-    return success_count, fail_count
-=======
-    with app.run():
-        result = modal_pipeline.remote(payload)
-    logging.info("-" * 60)
-    logging.info("✓ 远程执行完成")
-    created = {
-        remote_dir: files for remote_dir, files in result.get("created", {}).items()
-    }
-    return RemoteResult(created_files=created, log_file=result.get("log_file"))
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
-
-
 def process_directory_files(
     volume: modal.Volume,
     selection: UserSelection,
@@ -771,7 +527,6 @@ def modal_pipeline(job_payload: Dict) -> Dict:
 
 def download_outputs(
     manifest: UploadManifest,
-<<<<<<< HEAD
     result: Dict,
 ) -> None:
     """从远程结果中提取文件内容并写入本地"""
@@ -807,60 +562,15 @@ def download_outputs(
 
 
 def summarize(manifest: UploadManifest, result: Dict) -> None:
-=======
-    result: RemoteResult,
-) -> None:
-    """从远程结果中提取文件内容并写入本地"""
-    import base64
-
-    created_files = result.get("created_files", {})
-    log_content = result.get("log_content")
-
-    # 获取原始文件名的 stem（不含扩展名）
-    original_stem = Path(manifest.original_filename).stem if manifest.original_filename else "todo"
-
-    for filename, content_b64 in created_files.items():
-        content = base64.b64decode(content_b64)
-        # 将 todo.xxx 替换为原始文件名
-        if filename.startswith("todo."):
-            suffix = Path(filename).suffix
-            new_filename = original_stem + suffix
-        else:
-            new_filename = filename
-
-        local_path = manifest.local_output_dir / new_filename
-        local_path.parent.mkdir(parents=True, exist_ok=True)
-        local_path.write_bytes(content)
-        logging.info("写入文件: %s (%d bytes)", local_path, len(content))
-
-    # 写入 log 文件
-    if log_content:
-        log_dir = Path("logs")
-        log_dir.mkdir(exist_ok=True)
-        log_path = log_dir / f"modal_run_{manifest.session_id}.log"
-        log_path.write_bytes(base64.b64decode(log_content))
-        logging.info("写入日志: %s", log_path)
-
-
-def summarize(manifest: UploadManifest, result: RemoteResult) -> None:
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     logging.info("=== 运行完成 ===")
     logging.info("Session: %s", manifest.session_id)
     logging.info("源路径: %s", manifest.local_source)
     logging.info("输出路径: %s", manifest.local_output_dir if manifest.source_type == "directory" else manifest.local_source.parent)
-<<<<<<< HEAD
     created_files = result.get("created_files", {})
     if created_files:
         logging.info("新生成文件：")
         for filename in created_files.keys():
             logging.info("  %s", filename)
-=======
-    if result.created_files:
-        logging.info("新生成文件：")
-        for remote_dir, files in result.created_files.items():
-            for file in files:
-                logging.info("  %s", file)
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
 
 def parse_args() -> argparse.Namespace:
@@ -873,7 +583,6 @@ def parse_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
-<<<<<<< HEAD
 def prompt_exit(enabled: bool) -> None:
     if not enabled:
         return
@@ -928,80 +637,27 @@ def main() -> int:
     prompt_exit(not args.non_interactive)
     return exit_code
 
-=======
-def main() -> None:
-    parse_args()
-    log_path = setup_logger()
-    exit_code = 0
-    try:
-        selection = ask_selection()
-        volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True)
-
-        # 验证路径并获取扫描结果
-        scan_result = validate_audio_path(selection.input_path)
-
-        if selection.input_path.is_dir():
-            # 文件夹模式：逐个处理文件，容器复用
-            logging.info("检测到文件夹输入，将逐个处理 %d 个音频文件", len(scan_result.audio_files))
-            success_count, fail_count = process_directory_files(
-                volume, selection, scan_result.audio_files
-            )
-            logging.info("=" * 60)
-            logging.info("=== 批量处理完成 ===")
-            logging.info("成功: %d, 失败: %d", success_count, fail_count)
-            logging.info("输出路径: %s", selection.input_path)
-            logging.info("✅ 请在上方输出路径查看字幕结果。")
-        else:
-            # 单文件模式：保持原有逻辑
-            manifest = upload_single_file(volume, selection, selection.input_path)
-            payload = build_job_payload(selection, manifest)
-            result = run_remote_pipeline(volume, selection, manifest, payload)
-            download_outputs(manifest, result)
-            summarize(manifest, result)
-            logging.info("✅ 请在上方输出路径查看字幕结果。")
-    except KeyboardInterrupt:
-        logging.warning("用户中断，未执行任何远程操作。")
-        exit_code = 1
-    except Exception as exc:
-        if isinstance(exc, NoAudioFilesError):
-            logging.error("%s", exc)
-        else:
-            logging.exception("运行失败：%s", exc)
-        logging.error("日志见：%s", log_path)
-        exit_code = 1
-
-
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 def _remote_pipeline(job: Dict) -> Dict:
     import subprocess
     from pathlib import Path
     import os
 
-<<<<<<< HEAD
     # 强制重新加载 Volume，确保看到最新上传的文件
     from modal import Volume
     volume = Volume.from_name("Faster_Whisper")
     volume.reload()
 
-=======
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     def run(cmd: Sequence[str], cwd: Optional[str] = None, env: Optional[dict] = None) -> None:
         print(" ".join(cmd), flush=True)
         subprocess.run(cmd, check=True, cwd=cwd, env=env)
 
     mount_root = Path(job["mount_root"])
-    repo_dir = REPO_VOLUME_DIR
-<<<<<<< HEAD
+    repo_dir = Path(REPO_VOLUME_DIR)
 
     # log 文件放在 session 目录下，而不是 logs 子目录
     session_dir = Path(job["remote_output_dir"])
     session_dir.mkdir(parents=True, exist_ok=True)
     log_file = session_dir / "modal_run.log"
-=======
-    logs_dir = Path(job["remote_logs_dir"])
-    logs_dir.mkdir(parents=True, exist_ok=True)
-    log_file = logs_dir / "modal_run.log"
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
     def log(msg: str) -> None:
         line = f"[modal_run] {msg}"
@@ -1076,7 +732,6 @@ def snapshot(path: str) -> set:
 
     cmd.extend(job["remote_inputs"])
 
-<<<<<<< HEAD
     # 在执行推理前，等待文件同步完成
     import time
     log("等待文件同步...")
@@ -1134,21 +789,13 @@ def snapshot(path: str) -> set:
 
         log(f"=== 调试信息结束 ===")
         raise
-=======
-    log(f"执行推理命令：{' '.join(cmd)}")
-    run(cmd, cwd=str(repo_dir))
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
 
     def to_volume_path(path_str: str) -> str:
         return container_to_volume_path(path_str)
 
-<<<<<<< HEAD
     # 收集生成的文件内容（直接返回，避免 volume 同步问题）
     import base64
     created_files = {}  # {filename: base64_content}
-=======
-    created = {}
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)
     for target in job["output_targets"]:
         remote_dir = target["remote_dir"]
         after = snapshot(remote_dir)
@@ -1158,7 +805,6 @@ def to_volume_path(path_str: str) -> str:
             for file in after - prev
             if Path(file).suffix.lower() in SUB_SUFFIXES
         )
-<<<<<<< HEAD
         for file_path in new_files:
             file_path = Path(file_path)
             if file_path.exists():
@@ -1181,12 +827,3 @@ def to_volume_path(path_str: str) -> str:
 
 if __name__ == "__main__":  # pragma: no cover
     sys.exit(main())
-=======
-        created[to_volume_path(remote_dir)] = [to_volume_path(path) for path in new_files]
-
-    return {"created": created, "log_file": to_volume_path(str(log_file))}
-
-
-if __name__ == "__main__":  # pragma: no cover
-    main()
->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support)

From be2b53b0891502736c3e577c39c08d70df1f5cd3 Mon Sep 17 00:00:00 2001
From: neo <randomless98@gmail.com>
Date: Thu, 15 Jan 2026 09:39:13 +0800
Subject: [PATCH 24/25] revert pyinstall version in cuda122.yaml as request

---
 environment-cuda122.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/environment-cuda122.yml b/environment-cuda122.yml
index 6665cd3..aeb91bf 100644
--- a/environment-cuda122.yml
+++ b/environment-cuda122.yml
@@ -34,8 +34,7 @@ dependencies:
       - backports.functools-lru-cache  # Fix for PyInstaller ModuleNotFoundError
 
       # Build tools
-      # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments
-      - pyinstaller==6.16.0
+      - pyinstaller>=6.0.0
       - setuptools>=65.0.0
       - wheel>=0.38.0
       - build>=0.10.0

From a71ccc20b0534d8a29de00856dd23024586aef7b Mon Sep 17 00:00:00 2001
From: Randomless <2418660459@qq.com>
Date: Thu, 15 Jan 2026 22:36:01 +0800
Subject: [PATCH 25/25] =?UTF-8?q?=E7=A7=BB=E9=99=A4=20REPO=5FREF=20?=
 =?UTF-8?q?=E7=89=88=E6=9C=AC=E9=94=81=E5=AE=9A=EF=BC=8C=E6=94=B9=E4=B8=BA?=
 =?UTF-8?q?=E4=BD=BF=E7=94=A8=20main=20=E5=88=86=E6=94=AF=E6=9C=80?=
 =?UTF-8?q?=E6=96=B0=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 删除 REPO_REF = "v1.4" 常量
- 克隆时不再指定 --branch，使用默认分支
- 更新时 reset 到 origin/main 而非固定 tag

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 modal_infer.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/modal_infer.py b/modal_infer.py
index 8cf646c..4cb753b 100644
--- a/modal_infer.py
+++ b/modal_infer.py
@@ -48,7 +48,6 @@ def ensure_utf8_stdio() -> None:
 
 APP_NAME = "Faster-Whisper-TransWithAI-ChickenRice"
 REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice"
-REPO_REF = "v1.4"
 VOLUME_NAME = "Faster_Whisper"
 VOLUME_ROOT = "/Faster_Whisper"
 REMOTE_MOUNT = VOLUME_ROOT
@@ -667,12 +666,11 @@ def log(msg: str) -> None:
 
     if not (repo_dir / ".git").exists():
         log("开始克隆仓库...")
-        run(["git", "clone", "--branch", REPO_REF, "--depth", "1", REPO_URL, str(repo_dir)])
+        run(["git", "clone", "--depth", "1", REPO_URL, str(repo_dir)])
     else:
         log("更新仓库...")
-        run(["git", "-C", str(repo_dir), "fetch", "--tags", "origin"])
-        run(["git", "-C", str(repo_dir), "checkout", "-f", REPO_REF])
-        run(["git", "-C", str(repo_dir), "reset", "--hard", REPO_REF])
+        run(["git", "-C", str(repo_dir), "fetch", "origin"])
+        run(["git", "-C", str(repo_dir), "reset", "--hard", "origin/main"])
 
     model_profile = job["model_profile"]
     model_path = repo_dir / "models"