From d4c71adfe248baceacf4339d7ba7292c5bf14d56 Mon Sep 17 00:00:00 2001 From: grider-transwithai Date: Fri, 26 Dec 2025 14:27:27 +0000 Subject: [PATCH 01/25] fix: Pin PyInstaller to 6.16.0 to avoid conda hook regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PyInstaller 6.17.0 has a KeyError: 'depends' regression in some conda environments. Pin to 6.16.0 until the issue is resolved. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- environment-cuda118.yml | 3 ++- environment-cuda122.yml | 3 ++- environment-cuda128.yml | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/environment-cuda118.yml b/environment-cuda118.yml index 7d66261..8d3c832 100644 --- a/environment-cuda118.yml +++ b/environment-cuda118.yml @@ -37,7 +37,8 @@ dependencies: - backports.functools-lru-cache # Fix for PyInstaller ModuleNotFoundError # Build tools - - pyinstaller>=6.0.0 + # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments + - pyinstaller==6.16.0 - setuptools>=65.0.0 - wheel>=0.38.0 - build>=0.10.0 diff --git a/environment-cuda122.yml b/environment-cuda122.yml index aeb91bf..6665cd3 100644 --- a/environment-cuda122.yml +++ b/environment-cuda122.yml @@ -34,7 +34,8 @@ dependencies: - backports.functools-lru-cache # Fix for PyInstaller ModuleNotFoundError # Build tools - - pyinstaller>=6.0.0 + # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments + - pyinstaller==6.16.0 - setuptools>=65.0.0 - wheel>=0.38.0 - build>=0.10.0 diff --git a/environment-cuda128.yml b/environment-cuda128.yml index b9ff41a..655dd6a 100644 --- a/environment-cuda128.yml +++ b/environment-cuda128.yml @@ -34,7 +34,8 @@ dependencies: - backports.functools-lru-cache # Fix for PyInstaller ModuleNotFoundError # Build tools - - pyinstaller>=6.0.0 + # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments + - pyinstaller==6.16.0 - setuptools>=65.0.0 - wheel>=0.38.0 - build>=0.10.0 From 3e2fc63b1c438ca7957836e3a850ee4cc3334150 Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 30 Dec 2025 15:55:25 +0800 Subject: [PATCH 02/25] feat: Add Modal cloud GPU inference support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add modal_infer.py for cloud-based GPU inference using Modal platform: - Interactive CLI for GPU selection and configuration - Support for multiple GPU types (T4, A10G, A100, H100, etc.) - Automatic model download and caching on Modal Volume - Batch processing support for accelerated transcription - Micromamba-based image with conda environment from environment-cuda128.yml Add environment-modal.yml for lightweight local client setup: - Minimal dependencies (modal, questionary) - Python 3.10 environment for running modal_infer.py locally Update 使用说明.txt with Modal usage instructions: - Environment setup guide - Modal account registration and token configuration - HuggingFace token setup for model downloads - Step-by-step usage instructions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- environment-modal.yml | 21 ++ modal_infer.py | 216 ++++++++++++++++++ ...7\347\224\250\350\257\264\346\230\216.txt" | 8 +- 3 files changed, 242 insertions(+), 3 deletions(-) create mode 100644 environment-modal.yml diff --git a/environment-modal.yml b/environment-modal.yml new file mode 100644 index 0000000..2bef4a6 --- /dev/null +++ b/environment-modal.yml @@ -0,0 +1,21 @@ +# Conda environment for Modal inference (local client only) +# This environment is for running modal_infer.py locally to submit jobs to Modal +name: faster-whisper-modal +channels: + - conda-forge + - defaults + +dependencies: + # Python version + - python=3.10 + + # Core dependencies + - pip + + # Pip dependencies + - pip: + # Modal client for submitting jobs + - modal + + # Interactive CLI prompts + - questionary diff --git a/modal_infer.py b/modal_infer.py index d6abd78..9978c44 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -9,7 +9,11 @@ from datetime import datetime from pathlib import Path import subprocess +<<<<<<< HEAD from typing import Dict, List, Optional, Sequence, Tuple +======= +from typing import Dict, Iterable, List, Optional, Sequence, Tuple +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) from uuid import uuid4 try: @@ -27,11 +31,19 @@ APP_NAME = "Faster-Whisper-TransWithAI-ChickenRice" REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice" +<<<<<<< HEAD VOLUME_NAME = "Faster_Whisper" VOLUME_ROOT = Path("/Faster_Whisper") REMOTE_MOUNT = VOLUME_ROOT APP_ROOT_REL = Path(APP_NAME) SESSION_SUBDIR = Path("sessions") +======= +VOLUME_NAME = "agent_volume" +VOLUME_ROOT = Path("/agent_volume") +REMOTE_MOUNT = VOLUME_ROOT +APP_ROOT_REL = Path(APP_NAME) +SESSION_SUBDIR = APP_ROOT_REL / "sessions" +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) REPO_VOLUME_DIR = VOLUME_ROOT / "repo" SUB_FORMATS = "srt,vtt,lrc" SUB_SUFFIXES = {".srt", ".vtt", ".lrc"} @@ -43,6 +55,10 @@ ".aac", ".ogg", ".wma", +<<<<<<< HEAD +======= + ".mp4", +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) ".mkv", ".avi", ".mov", @@ -50,10 +66,14 @@ ".flv", ".wmv", } +<<<<<<< HEAD VIDEO_NEED_CONVERT = {".mp4"} # 需要用户手动转换的格式 DEFAULT_GPU_CHOICES = [ "T4", "L4", +======= +DEFAULT_GPU_CHOICES = [ +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) "L40S", "A10G", "A100-40GB", @@ -61,6 +81,11 @@ "H100", "H200", "B200", +<<<<<<< HEAD +======= + "L4", + "T4", +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) ] @@ -96,6 +121,7 @@ class UploadManifest: remote_output_rel: Path local_output_dir: Path remote_logs_rel: Path +<<<<<<< HEAD original_filename: Optional[str] = None # 原始文件名(用于恢复空格) @@ -107,6 +133,14 @@ class ScanResult: class NoAudioFilesError(Exception): pass +======= + + +@dataclass +class RemoteResult: + created_files: Dict[str, List[str]] + log_file: Optional[str] +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def rel_to_volume_path(path: Path) -> str: @@ -190,6 +224,19 @@ def ensure_questionary(): def ask_selection() -> UserSelection: ensure_questionary() +<<<<<<< HEAD +======= + run_mode = questionary.select( + "选择运行模式:", + choices=[ + Choice(title="一次性运行(modal run)", value="once"), + Choice(title="持久化 App(modal deploy)", value="persistent"), + ], + ).ask() + if not run_mode: + raise KeyboardInterrupt + +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) gpu_choice = questionary.select( "选择 GPU", choices=DEFAULT_GPU_CHOICES, @@ -250,7 +297,11 @@ def ask_selection() -> UserSelection: ) return UserSelection( +<<<<<<< HEAD run_mode="once", +======= + run_mode=run_mode, +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) gpu_choice=gpu_choice, input_path=input_path, model_profile=model_profile, @@ -263,6 +314,7 @@ def ask_selection() -> UserSelection: ) +<<<<<<< HEAD def scan_audio_files(path: Path) -> ScanResult: """扫描目录,返回音频文件和需要转换的 mp4 文件""" audio_files: List[Path] = [] @@ -302,10 +354,28 @@ def validate_audio_path(path: Path) -> ScanResult: if not scan_result.audio_files: raise NoAudioFilesError(f"输入的文件夹内没有音频文件:{path}") return scan_result +======= +def iter_audio_files(path: Path) -> List[Path]: + files: List[Path] = [] + for file in path.rglob("*"): + if file.is_file() and file.suffix.lower() in AUDIO_SUFFIXES: + files.append(file) + return files + + +def validate_audio_path(path: Path) -> None: + if path.is_file(): + if path.suffix.lower() not in AUDIO_SUFFIXES: + raise ValueError(f"文件 {path} 不属于支持的音/视频格式。") + elif path.is_dir(): + if not iter_audio_files(path): + raise ValueError(f"文件夹 {path} 中没有支持的音/视频文件。") +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) else: raise ValueError(f"路径 {path} 既不是文件也不是文件夹。") +<<<<<<< HEAD def upload_single_file( volume: modal.Volume, selection: UserSelection, @@ -345,6 +415,47 @@ def upload_single_file( local_output_dir=local_output_dir, remote_logs_rel=remote_logs_rel, original_filename=original_filename, # 始终记录原始文件名 +======= +def prepare_upload( + volume: modal.Volume, + selection: UserSelection, +) -> UploadManifest: + validate_audio_path(selection.input_path) + session_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid4().hex[:6]}" + remote_session_rel = SESSION_SUBDIR / session_id + remote_logs_rel = remote_session_rel / "logs" + remote_inputs_rel: List[Path] = [] + + with volume.batch_upload(force=True) as batch: + if selection.input_path.is_file(): + remote_rel = APP_ROOT_REL / selection.input_path.name + logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel)) + batch.put_file(str(selection.input_path), rel_to_volume_path(remote_rel)) + remote_inputs_rel.append(remote_rel) + remote_output_rel = remote_session_rel + local_output_dir = selection.input_path.parent + source_type = "file" + else: + remote_input_dir_rel = remote_session_rel / selection.input_path.name + audio_files = iter_audio_files(selection.input_path) + for file in audio_files: + rel = remote_input_dir_rel / file.relative_to(selection.input_path) + logging.info("上传文件 -> %s", rel_to_volume_path(rel)) + batch.put_file(str(file), rel_to_volume_path(rel)) + remote_inputs_rel.append(remote_input_dir_rel) + remote_output_rel = remote_session_rel / f"{selection.input_path.name}_out" + local_output_dir = selection.input_path / f"{selection.input_path.name}_out" + source_type = "directory" + + return UploadManifest( + session_id=session_id, + source_type=source_type, + local_source=selection.input_path, + remote_inputs_rel=remote_inputs_rel, + remote_output_rel=remote_output_rel, + local_output_dir=local_output_dir, + remote_logs_rel=remote_logs_rel, +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) ) @@ -401,12 +512,18 @@ def run_remote_pipeline( selection: UserSelection, manifest: UploadManifest, payload: Dict, +<<<<<<< HEAD ) -> Dict: logging.info("=== 开始构建 Modal 镜像 ===") image = build_modal_image() logging.info("✓ 镜像构建完成") logging.info("使用 GPU:%s", selection.gpu_choice) logging.info("超时时间:%d 分钟", selection.timeout_minutes) +======= +) -> RemoteResult: + image = build_modal_image() + logging.info("使用 GPU:%s", selection.gpu_choice) +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) app = modal.App(APP_NAME) @app.function( @@ -419,6 +536,7 @@ def run_remote_pipeline( def modal_pipeline(job_payload: Dict) -> Dict: return _remote_pipeline(job_payload) +<<<<<<< HEAD logging.info("=== 开始远程执行 ===") logging.info("正在启动 GPU 容器并执行推理任务...") logging.info("(以下为远程容器输出)") @@ -496,10 +614,19 @@ def modal_pipeline(job_payload: Dict) -> Dict: continue # 继续处理下一个文件 return success_count, fail_count +======= + with app.run(): + result = modal_pipeline.remote(payload) + created = { + remote_dir: files for remote_dir, files in result.get("created", {}).items() + } + return RemoteResult(created_files=created, log_file=result.get("log_file")) +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def download_outputs( manifest: UploadManifest, +<<<<<<< HEAD result: Dict, ) -> None: """从远程结果中提取文件内容并写入本地""" @@ -535,15 +662,52 @@ def download_outputs( def summarize(manifest: UploadManifest, result: Dict) -> None: +======= + result: RemoteResult, +) -> None: + def modal_volume_get(remote_path: str, local_dest: Path) -> None: + local_dest.parent.mkdir(parents=True, exist_ok=True) + logging.info("下载 %s -> %s", remote_path, local_dest) + subprocess.run( + ["modal", "volume", "get", VOLUME_NAME, remote_path, str(local_dest)], + check=True, + ) + + for remote_dir, files in result.created_files.items(): + base_rel = Path(remote_dir.lstrip("/")) + for remote_file in files: + file_rel = Path(remote_file.lstrip("/")) + try: + rel_inside_output = file_rel.relative_to(base_rel) + except Exception: + rel_inside_output = file_rel.name + local_path = manifest.local_output_dir / rel_inside_output + modal_volume_get(remote_file, local_path) + + if result.log_file: + local_log = Path("logs") / Path(Path(result.log_file).name) + modal_volume_get(result.log_file, local_log) + + +def summarize(manifest: UploadManifest, result: RemoteResult) -> None: +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) logging.info("=== 运行完成 ===") logging.info("Session: %s", manifest.session_id) logging.info("源路径: %s", manifest.local_source) logging.info("输出路径: %s", manifest.local_output_dir if manifest.source_type == "directory" else manifest.local_source.parent) +<<<<<<< HEAD created_files = result.get("created_files", {}) if created_files: logging.info("新生成文件:") for filename in created_files.keys(): logging.info(" %s", filename) +======= + if result.created_files: + logging.info("新生成文件:") + for remote_dir, files in result.created_files.items(): + for file in files: + logging.info(" %s", file) +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def parse_args() -> argparse.Namespace: @@ -556,6 +720,7 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() +<<<<<<< HEAD def prompt_exit(enabled: bool) -> None: if not enabled: return @@ -610,27 +775,59 @@ def main() -> int: prompt_exit(not args.non_interactive) return exit_code +======= +def main() -> None: + parse_args() + log_path = setup_logger() + try: + selection = ask_selection() + volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True) + manifest = prepare_upload(volume, selection) + payload = build_job_payload(selection, manifest) + result = run_remote_pipeline(volume, selection, manifest, payload) + download_outputs(manifest, result) + summarize(manifest, result) + logging.info("✅ 请在上方输出路径查看字幕结果。") + except KeyboardInterrupt: + logging.warning("用户中断,未执行任何远程操作。") + sys.exit(1) + except Exception as exc: + logging.exception("运行失败:%s", exc) + logging.error("日志见:%s", log_path) + sys.exit(1) + + +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def _remote_pipeline(job: Dict) -> Dict: import subprocess from pathlib import Path import os +<<<<<<< HEAD # 强制重新加载 Volume,确保看到最新上传的文件 from modal import Volume volume = Volume.from_name("Faster_Whisper") volume.reload() +======= +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def run(cmd: Sequence[str], cwd: Optional[str] = None, env: Optional[dict] = None) -> None: print(" ".join(cmd), flush=True) subprocess.run(cmd, check=True, cwd=cwd, env=env) mount_root = Path(job["mount_root"]) repo_dir = REPO_VOLUME_DIR +<<<<<<< HEAD # log 文件放在 session 目录下,而不是 logs 子目录 session_dir = Path(job["remote_output_dir"]) session_dir.mkdir(parents=True, exist_ok=True) log_file = session_dir / "modal_run.log" +======= + logs_dir = Path(job["remote_logs_dir"]) + logs_dir.mkdir(parents=True, exist_ok=True) + log_file = logs_dir / "modal_run.log" +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def log(msg: str) -> None: line = f"[modal_run] {msg}" @@ -708,6 +905,7 @@ def snapshot(path: str) -> set: cmd.extend(job["remote_inputs"]) +<<<<<<< HEAD # 在执行推理前,等待文件同步完成 import time log("等待文件同步...") @@ -765,13 +963,21 @@ def snapshot(path: str) -> set: log(f"=== 调试信息结束 ===") raise +======= + log(f"执行推理命令:{' '.join(cmd)}") + run(cmd, cwd=str(repo_dir)) +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def to_volume_path(path_str: str) -> str: return container_to_volume_path(path_str) +<<<<<<< HEAD # 收集生成的文件内容(直接返回,避免 volume 同步问题) import base64 created_files = {} # {filename: base64_content} +======= + created = {} +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) for target in job["output_targets"]: remote_dir = target["remote_dir"] after = snapshot(remote_dir) @@ -781,6 +987,7 @@ def to_volume_path(path_str: str) -> str: for file in after - prev if Path(file).suffix.lower() in SUB_SUFFIXES ) +<<<<<<< HEAD for file_path in new_files: file_path = Path(file_path) if file_path.exists(): @@ -803,3 +1010,12 @@ def to_volume_path(path_str: str) -> str: if __name__ == "__main__": # pragma: no cover sys.exit(main()) +======= + created[to_volume_path(remote_dir)] = [to_volume_path(path) for path in new_files] + + return {"created": created, "log_file": to_volume_path(str(log_file))} + + +if __name__ == "__main__": # pragma: no cover + main() +>>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) diff --git "a/\344\275\277\347\224\250\350\257\264\346\230\216.txt" "b/\344\275\277\347\224\250\350\257\264\346\230\216.txt" index ae7c8cd..c39a64a 100644 --- "a/\344\275\277\347\224\250\350\257\264\346\230\216.txt" +++ "b/\344\275\277\347\224\250\350\257\264\346\230\216.txt" @@ -100,13 +100,14 @@ GPU模式(仅限NVIDIA显卡): 1. 环境配置: -使用现有的 Conda 环境(已包含 modal 支持): +使用 Conda 创建轻量级环境(仅需 modal 和 questionary 库): ```bash -conda activate faster-whisper-cu118 # 或 cu122, cu128 +conda env create -f environment-modal.yml +conda activate faster-whisper-modal ``` -或在现有环境中手动安装: +或手动安装: ```bash pip install modal questionary ``` @@ -150,6 +151,7 @@ python modal_infer.py ``` 程序会交互式询问: +- 运行模式:一次性运行 或 持久化App - GPU 类型:T4(推荐)、A10G、A100、H100 等 - 模型选择:基础版、海南鸡(日文转中文优化)、自定义模型 - 输入文件:本地音频文件或文件夹路径 From 84bfe9f4888a5158464b9b76bdf8496c79cb9dea Mon Sep 17 00:00:00 2001 From: neo Date: Wed, 31 Dec 2025 15:18:37 +0800 Subject: [PATCH 03/25] fix: Improve modal_infer.py usability and fix bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix file upload path to use session directory consistently - Simplify volume path structure (remove APP_ROOT_REL layer) - Remove run mode selection, default to one-time execution - Add detailed logging for build, execution and download stages - Fix download conflict by adding --force flag - Reorder GPU choices (T4/L4 first for cost efficiency) - Update volume name to Faster_Whisper 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- .gitignore | 1 + modal_infer.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 0d6661a..8ff3ddd 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ __pycache__/ # C extensions *.so +node_modules/ # Distribution / packaging .Python build/ diff --git a/modal_infer.py b/modal_infer.py index 9978c44..f004c2a 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -428,7 +428,7 @@ def prepare_upload( with volume.batch_upload(force=True) as batch: if selection.input_path.is_file(): - remote_rel = APP_ROOT_REL / selection.input_path.name + remote_rel = remote_session_rel / selection.input_path.name logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel)) batch.put_file(str(selection.input_path), rel_to_volume_path(remote_rel)) remote_inputs_rel.append(remote_rel) @@ -521,7 +521,9 @@ def run_remote_pipeline( logging.info("超时时间:%d 分钟", selection.timeout_minutes) ======= ) -> RemoteResult: + logging.info("=== 开始构建 Modal 镜像 ===") image = build_modal_image() + logging.info("✓ 镜像构建完成") logging.info("使用 GPU:%s", selection.gpu_choice) >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) app = modal.App(APP_NAME) @@ -617,6 +619,8 @@ def modal_pipeline(job_payload: Dict) -> Dict: ======= with app.run(): result = modal_pipeline.remote(payload) + logging.info("-" * 60) + logging.info("✓ 远程执行完成") created = { remote_dir: files for remote_dir, files in result.get("created", {}).items() } @@ -669,7 +673,7 @@ def modal_volume_get(remote_path: str, local_dest: Path) -> None: local_dest.parent.mkdir(parents=True, exist_ok=True) logging.info("下载 %s -> %s", remote_path, local_dest) subprocess.run( - ["modal", "volume", "get", VOLUME_NAME, remote_path, str(local_dest)], + ["modal", "volume", "get", VOLUME_NAME, remote_path, str(local_dest), "--force"], check=True, ) From 5f1b6396e260ec798a58ab16992121118f9c5b62 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Sat, 3 Jan 2026 14:57:27 +0800 Subject: [PATCH 04/25] =?UTF-8?q?refactor:=20=E4=BC=98=E5=8C=96=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=A4=B9=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91=EF=BC=8C?= =?UTF-8?q?=E9=80=90=E6=96=87=E4=BB=B6=E4=B8=8A=E4=BC=A0=E9=81=BF=E5=85=8D?= =?UTF-8?q?=E8=BF=9E=E6=8E=A5=E6=96=AD=E5=BC=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 文件夹模式下改为逐个上传和处理音频文件,容器复用 - 使用 min_containers=1 保持容器预热 - 排除 mp4 格式,发现时提示用户使用 ffmpeg 转换 - 文件夹模式下结果直接保存到源文件夹,不再创建 _out 子目录 - 单个文件处理失败时继续处理其他文件 注意:仅通过语法检查,未经手动测试验证 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- modal_infer.py | 209 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 172 insertions(+), 37 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index f004c2a..01ed409 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -10,10 +10,14 @@ from pathlib import Path import subprocess <<<<<<< HEAD +<<<<<<< HEAD from typing import Dict, List, Optional, Sequence, Tuple ======= from typing import Dict, Iterable, List, Optional, Sequence, Tuple >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) +======= +from typing import Dict, List, Optional, Sequence, Tuple +>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) from uuid import uuid4 try: @@ -56,9 +60,12 @@ ".ogg", ".wma", <<<<<<< HEAD +<<<<<<< HEAD ======= ".mp4", >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) +======= +>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) ".mkv", ".avi", ".mov", @@ -67,6 +74,9 @@ ".wmv", } <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) VIDEO_NEED_CONVERT = {".mp4"} # 需要用户手动转换的格式 DEFAULT_GPU_CHOICES = [ "T4", @@ -136,6 +146,12 @@ class NoAudioFilesError(Exception): ======= +@dataclass +class ScanResult: + audio_files: List[Path] + mp4_files: List[Path] + + @dataclass class RemoteResult: created_files: Dict[str, List[str]] @@ -315,10 +331,14 @@ def ask_selection() -> UserSelection: <<<<<<< HEAD +<<<<<<< HEAD +======= +>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) def scan_audio_files(path: Path) -> ScanResult: """扫描目录,返回音频文件和需要转换的 mp4 文件""" audio_files: List[Path] = [] mp4_files: List[Path] = [] +<<<<<<< HEAD for file in path.rglob("*"): if file.is_file(): suffix = file.suffix.lower() @@ -357,24 +377,52 @@ def validate_audio_path(path: Path) -> ScanResult: ======= def iter_audio_files(path: Path) -> List[Path]: files: List[Path] = [] +======= +>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) for file in path.rglob("*"): - if file.is_file() and file.suffix.lower() in AUDIO_SUFFIXES: - files.append(file) - return files + if file.is_file(): + suffix = file.suffix.lower() + if suffix in AUDIO_SUFFIXES: + audio_files.append(file) + elif suffix in VIDEO_NEED_CONVERT: + mp4_files.append(file) + return ScanResult(audio_files=audio_files, mp4_files=mp4_files) -def validate_audio_path(path: Path) -> None: +def validate_audio_path(path: Path) -> ScanResult: + """验证音频路径,返回扫描结果。如果发现 mp4 文件会打印警告。""" if path.is_file(): - if path.suffix.lower() not in AUDIO_SUFFIXES: + suffix = path.suffix.lower() + if suffix in VIDEO_NEED_CONVERT: + raise ValueError( + f"文件 {path} 是 mp4 格式,请先使用 ffmpeg 转换为 mp3:\n" + f" ffmpeg -i \"{path}\" -vn -acodec libmp3lame \"{path.with_suffix('.mp3')}\"" + ) + if suffix not in AUDIO_SUFFIXES: raise ValueError(f"文件 {path} 不属于支持的音/视频格式。") + return ScanResult(audio_files=[path], mp4_files=[]) elif path.is_dir(): - if not iter_audio_files(path): + scan_result = scan_audio_files(path) + if scan_result.mp4_files: + logging.warning("=" * 60) + logging.warning("发现 %d 个 mp4 文件,这些文件将被跳过:", len(scan_result.mp4_files)) + for mp4_file in scan_result.mp4_files: + logging.warning(" - %s", mp4_file) + logging.warning("请使用 ffmpeg 转换为 mp3 后再处理,例如:") + logging.warning(" ffmpeg -i \"input.mp4\" -vn -acodec libmp3lame \"output.mp3\"") + logging.warning("=" * 60) + if not scan_result.audio_files: raise ValueError(f"文件夹 {path} 中没有支持的音/视频文件。") +<<<<<<< HEAD >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) +======= + return scan_result +>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) else: raise ValueError(f"路径 {path} 既不是文件也不是文件夹。") +<<<<<<< HEAD <<<<<<< HEAD def upload_single_file( volume: modal.Volume, @@ -417,42 +465,40 @@ def upload_single_file( original_filename=original_filename, # 始终记录原始文件名 ======= def prepare_upload( +======= +def upload_single_file( +>>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) volume: modal.Volume, selection: UserSelection, + audio_file: Path, + base_dir: Optional[Path] = None, ) -> UploadManifest: - validate_audio_path(selection.input_path) + """上传单个音频文件到 Modal Volume。 + + Args: + volume: Modal Volume 实例 + selection: 用户选择配置 + audio_file: 要上传的音频文件路径 + base_dir: 基础目录(用于文件夹模式,输出到此目录) + """ session_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid4().hex[:6]}" remote_session_rel = SESSION_SUBDIR / session_id remote_logs_rel = remote_session_rel / "logs" - remote_inputs_rel: List[Path] = [] with volume.batch_upload(force=True) as batch: - if selection.input_path.is_file(): - remote_rel = remote_session_rel / selection.input_path.name - logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel)) - batch.put_file(str(selection.input_path), rel_to_volume_path(remote_rel)) - remote_inputs_rel.append(remote_rel) - remote_output_rel = remote_session_rel - local_output_dir = selection.input_path.parent - source_type = "file" - else: - remote_input_dir_rel = remote_session_rel / selection.input_path.name - audio_files = iter_audio_files(selection.input_path) - for file in audio_files: - rel = remote_input_dir_rel / file.relative_to(selection.input_path) - logging.info("上传文件 -> %s", rel_to_volume_path(rel)) - batch.put_file(str(file), rel_to_volume_path(rel)) - remote_inputs_rel.append(remote_input_dir_rel) - remote_output_rel = remote_session_rel / f"{selection.input_path.name}_out" - local_output_dir = selection.input_path / f"{selection.input_path.name}_out" - source_type = "directory" + remote_rel = remote_session_rel / audio_file.name + logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel)) + batch.put_file(str(audio_file), rel_to_volume_path(remote_rel)) + + # 如果指定了 base_dir(文件夹模式),输出到 base_dir;否则输出到文件所在目录 + local_output_dir = base_dir if base_dir else audio_file.parent return UploadManifest( session_id=session_id, - source_type=source_type, - local_source=selection.input_path, - remote_inputs_rel=remote_inputs_rel, - remote_output_rel=remote_output_rel, + source_type="file", + local_source=audio_file, + remote_inputs_rel=[remote_rel], + remote_output_rel=remote_session_rel, local_output_dir=local_output_dir, remote_logs_rel=remote_logs_rel, >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) @@ -628,6 +674,78 @@ def modal_pipeline(job_payload: Dict) -> Dict: >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) +def process_directory_files( + volume: modal.Volume, + selection: UserSelection, + audio_files: List[Path], +) -> Tuple[int, int]: + """处理文件夹中的所有音频文件,容器复用。 + + Args: + volume: Modal Volume 实例 + selection: 用户选择配置 + audio_files: 要处理的音频文件列表 + + Returns: + (成功数, 失败数) 元组 + """ + logging.info("=== 开始构建 Modal 镜像 ===") + image = build_modal_image() + logging.info("✓ 镜像构建完成") + logging.info("使用 GPU:%s", selection.gpu_choice) + logging.info("超时时间:%d 分钟", selection.timeout_minutes) + logging.info("待处理文件数:%d", len(audio_files)) + + app = modal.App(APP_NAME) + + @app.function( + image=image, + gpu=selection.gpu_choice, + timeout=selection.timeout_minutes * 60, + volumes={str(REMOTE_MOUNT): volume}, + serialized=True, + min_containers=1, # 保持容器预热,复用容器 + ) + def modal_pipeline(job_payload: Dict) -> Dict: + return _remote_pipeline(job_payload) + + success_count = 0 + fail_count = 0 + base_dir = selection.input_path # 文件夹模式下,输出到源文件夹 + + with app.run(): + for i, audio_file in enumerate(audio_files, 1): + logging.info("=" * 60) + logging.info("处理文件 [%d/%d]: %s", i, len(audio_files), audio_file.name) + logging.info("=" * 60) + try: + # 1. 上传单个文件 + manifest = upload_single_file(volume, selection, audio_file, base_dir) + + # 2. 构建 payload + payload = build_job_payload(selection, manifest) + + # 3. 执行推理(复用容器) + logging.info("正在执行推理...") + result = modal_pipeline.remote(payload) + + # 4. 下载结果到源文件夹 + remote_result = RemoteResult( + created_files=result.get("created", {}), + log_file=result.get("log_file"), + ) + download_outputs(manifest, remote_result) + + logging.info("✓ 文件 %s 处理完成", audio_file.name) + success_count += 1 + except Exception as e: + logging.error("✗ 文件 %s 处理失败: %s", audio_file.name, e) + fail_count += 1 + continue # 继续处理下一个文件 + + return success_count, fail_count + + def download_outputs( manifest: UploadManifest, <<<<<<< HEAD @@ -786,12 +904,29 @@ def main() -> None: try: selection = ask_selection() volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True) - manifest = prepare_upload(volume, selection) - payload = build_job_payload(selection, manifest) - result = run_remote_pipeline(volume, selection, manifest, payload) - download_outputs(manifest, result) - summarize(manifest, result) - logging.info("✅ 请在上方输出路径查看字幕结果。") + + # 验证路径并获取扫描结果 + scan_result = validate_audio_path(selection.input_path) + + if selection.input_path.is_dir(): + # 文件夹模式:逐个处理文件,容器复用 + logging.info("检测到文件夹输入,将逐个处理 %d 个音频文件", len(scan_result.audio_files)) + success_count, fail_count = process_directory_files( + volume, selection, scan_result.audio_files + ) + logging.info("=" * 60) + logging.info("=== 批量处理完成 ===") + logging.info("成功: %d, 失败: %d", success_count, fail_count) + logging.info("输出路径: %s", selection.input_path) + logging.info("✅ 请在上方输出路径查看字幕结果。") + else: + # 单文件模式:保持原有逻辑 + manifest = upload_single_file(volume, selection, selection.input_path) + payload = build_job_payload(selection, manifest) + result = run_remote_pipeline(volume, selection, manifest, payload) + download_outputs(manifest, result) + summarize(manifest, result) + logging.info("✅ 请在上方输出路径查看字幕结果。") except KeyboardInterrupt: logging.warning("用户中断,未执行任何远程操作。") sys.exit(1) From 04fc0ddcf98cd9d40be910ff1184a568932fd655 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Sat, 3 Jan 2026 15:11:09 +0800 Subject: [PATCH 05/25] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E5=90=8D=E5=8C=85=E5=90=AB=E7=A9=BA=E6=A0=BC=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E8=BF=9C=E7=A8=8B=E6=89=A7=E8=A1=8C=E5=A4=B1=E8=B4=A5?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 上传时将文件名中的空格替换为下划线 - 在 UploadManifest 中记录原始文件名 - 下载字幕后恢复原始文件名(带空格) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- modal_infer.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/modal_infer.py b/modal_infer.py index 01ed409..ea007cf 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -802,10 +802,20 @@ def modal_volume_get(remote_path: str, local_dest: Path) -> None: try: rel_inside_output = file_rel.relative_to(base_rel) except Exception: - rel_inside_output = file_rel.name + rel_inside_output = Path(file_rel.name) local_path = manifest.local_output_dir / rel_inside_output modal_volume_get(remote_file, local_path) + # 如果有原始文件名(包含空格),恢复原始文件名 + if manifest.original_filename: + original_stem = Path(manifest.original_filename).stem + safe_stem = original_stem.replace(" ", "_") + if local_path.stem == safe_stem: + new_name = original_stem + local_path.suffix + new_path = local_path.parent / new_name + logging.info("恢复原始文件名: %s -> %s", local_path.name, new_name) + local_path.rename(new_path) + if result.log_file: local_log = Path("logs") / Path(Path(result.log_file).name) modal_volume_get(result.log_file, local_log) From d863ce14202a02b5c4466349698adfe51118572b Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Sat, 3 Jan 2026 15:41:42 +0800 Subject: [PATCH 06/25] =?UTF-8?q?fix:=20=E4=BD=BF=E7=94=A8=E5=9B=BA?= =?UTF-8?q?=E5=AE=9A=E6=96=87=E4=BB=B6=E5=90=8D=20todo=20=E9=81=BF?= =?UTF-8?q?=E5=85=8D=E5=85=A8=E8=A7=92=E5=AD=97=E7=AC=A6=E5=AF=BC=E8=87=B4?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=89=BE=E4=B8=8D=E5=88=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 上传时将文件名改为 todo + 扩展名,避免全角字符问题 - 下载后恢复原始文件名 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- modal_infer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index ea007cf..08474e3 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -806,11 +806,10 @@ def modal_volume_get(remote_path: str, local_dest: Path) -> None: local_path = manifest.local_output_dir / rel_inside_output modal_volume_get(remote_file, local_path) - # 如果有原始文件名(包含空格),恢复原始文件名 + # 如果有原始文件名,恢复原始文件名 if manifest.original_filename: original_stem = Path(manifest.original_filename).stem - safe_stem = original_stem.replace(" ", "_") - if local_path.stem == safe_stem: + if local_path.stem == "todo": # 固定文件名 new_name = original_stem + local_path.suffix new_path = local_path.parent / new_name logging.info("恢复原始文件名: %s -> %s", local_path.name, new_name) From 4a56d1e7762d152c5d73fbb375042c79963b8d1b Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Sat, 3 Jan 2026 16:09:02 +0800 Subject: [PATCH 07/25] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=20Volume=20?= =?UTF-8?q?=E5=90=8C=E6=AD=A5=E7=AD=89=E5=BE=85=EF=BC=8C=E8=A7=A3=E5=86=B3?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E4=B8=8A=E4=BC=A0=E5=90=8E=E5=AE=B9=E5=99=A8?= =?UTF-8?q?=E7=9C=8B=E4=B8=8D=E5=88=B0=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 上传后调用 volume.commit() 强制同步 - 远程执行前等待文件出现(最多 30 秒) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- modal_infer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modal_infer.py b/modal_infer.py index 08474e3..24c5eeb 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -451,6 +451,11 @@ def upload_single_file( logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel)) batch.put_file(str(audio_file), rel_to_volume_path(remote_rel)) + # 强制同步 Volume,确保文件对容器可见 + logging.info("等待 Volume 同步...") + volume.commit() + logging.info("Volume 同步完成") + # 如果指定了 base_dir(文件夹模式),输出到 base_dir;否则输出到文件所在目录 local_output_dir = base_dir if base_dir else audio_file.parent From 00da127bcd76529dc6aa8e919ba7d7781f8b0440 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Sat, 3 Jan 2026 16:14:40 +0800 Subject: [PATCH 08/25] =?UTF-8?q?fix:=20=E7=A7=BB=E9=99=A4=E6=9C=AC?= =?UTF-8?q?=E5=9C=B0=20volume.commit()=20=E8=B0=83=E7=94=A8=EF=BC=88?= =?UTF-8?q?=E5=8F=AA=E8=83=BD=E5=9C=A8=E5=AE=B9=E5=99=A8=E5=86=85=E8=B0=83?= =?UTF-8?q?=E7=94=A8=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- modal_infer.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index 24c5eeb..08474e3 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -451,11 +451,6 @@ def upload_single_file( logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel)) batch.put_file(str(audio_file), rel_to_volume_path(remote_rel)) - # 强制同步 Volume,确保文件对容器可见 - logging.info("等待 Volume 同步...") - volume.commit() - logging.info("Volume 同步完成") - # 如果指定了 base_dir(文件夹模式),输出到 base_dir;否则输出到文件所在目录 local_output_dir = base_dir if base_dir else audio_file.parent From bdaedcaf0b604d2b5f0308c7eb6097f1cb2be641 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Sat, 3 Jan 2026 17:15:56 +0800 Subject: [PATCH 09/25] =?UTF-8?q?refactor:=20=E9=80=9A=E8=BF=87=E5=87=BD?= =?UTF-8?q?=E6=95=B0=E8=BF=94=E5=9B=9E=E5=80=BC=E4=BC=A0=E8=BE=93=E7=BB=93?= =?UTF-8?q?=E6=9E=9C=E6=96=87=E4=BB=B6=EF=BC=8C=E9=81=BF=E5=85=8D=20volume?= =?UTF-8?q?=20=E5=90=8C=E6=AD=A5=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 远程函数直接返回文件内容(base64 编码) - 本地直接写入文件,使用原始文件名 - 移除 modal volume get 下载逻辑 - 删除不再使用的 RemoteResult 类 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- modal_infer.py | 67 +++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index 08474e3..3b1477c 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -729,12 +729,8 @@ def modal_pipeline(job_payload: Dict) -> Dict: logging.info("正在执行推理...") result = modal_pipeline.remote(payload) - # 4. 下载结果到源文件夹 - remote_result = RemoteResult( - created_files=result.get("created", {}), - log_file=result.get("log_file"), - ) - download_outputs(manifest, remote_result) + # 4. 写入结果文件到本地 + download_outputs(manifest, result) logging.info("✓ 文件 %s 处理完成", audio_file.name) success_count += 1 @@ -787,37 +783,36 @@ def summarize(manifest: UploadManifest, result: Dict) -> None: ======= result: RemoteResult, ) -> None: - def modal_volume_get(remote_path: str, local_dest: Path) -> None: - local_dest.parent.mkdir(parents=True, exist_ok=True) - logging.info("下载 %s -> %s", remote_path, local_dest) - subprocess.run( - ["modal", "volume", "get", VOLUME_NAME, remote_path, str(local_dest), "--force"], - check=True, - ) + """从远程结果中提取文件内容并写入本地""" + import base64 - for remote_dir, files in result.created_files.items(): - base_rel = Path(remote_dir.lstrip("/")) - for remote_file in files: - file_rel = Path(remote_file.lstrip("/")) - try: - rel_inside_output = file_rel.relative_to(base_rel) - except Exception: - rel_inside_output = Path(file_rel.name) - local_path = manifest.local_output_dir / rel_inside_output - modal_volume_get(remote_file, local_path) - - # 如果有原始文件名,恢复原始文件名 - if manifest.original_filename: - original_stem = Path(manifest.original_filename).stem - if local_path.stem == "todo": # 固定文件名 - new_name = original_stem + local_path.suffix - new_path = local_path.parent / new_name - logging.info("恢复原始文件名: %s -> %s", local_path.name, new_name) - local_path.rename(new_path) - - if result.log_file: - local_log = Path("logs") / Path(Path(result.log_file).name) - modal_volume_get(result.log_file, local_log) + created_files = result.get("created_files", {}) + log_content = result.get("log_content") + + # 获取原始文件名的 stem(不含扩展名) + original_stem = Path(manifest.original_filename).stem if manifest.original_filename else "todo" + + for filename, content_b64 in created_files.items(): + content = base64.b64decode(content_b64) + # 将 todo.xxx 替换为原始文件名 + if filename.startswith("todo."): + suffix = Path(filename).suffix + new_filename = original_stem + suffix + else: + new_filename = filename + + local_path = manifest.local_output_dir / new_filename + local_path.parent.mkdir(parents=True, exist_ok=True) + local_path.write_bytes(content) + logging.info("写入文件: %s (%d bytes)", local_path, len(content)) + + # 写入 log 文件 + if log_content: + log_dir = Path("logs") + log_dir.mkdir(exist_ok=True) + log_path = log_dir / f"modal_run_{manifest.session_id}.log" + log_path.write_bytes(base64.b64decode(log_content)) + logging.info("写入日志: %s", log_path) def summarize(manifest: UploadManifest, result: RemoteResult) -> None: From 56d1764a57727823fd2cdf87360f3fbb9f2e29ed Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Sun, 11 Jan 2026 19:12:30 +0800 Subject: [PATCH 10/25] fix: add modal_infer to build and CI --- .github/workflows/build-release-conda.yml | 5 +++++ modal_infer.py | 10 +++++++--- "\344\275\277\347\224\250\350\257\264\346\230\216.txt" | 1 - 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-release-conda.yml b/.github/workflows/build-release-conda.yml index 195be49..e7fa697 100644 --- a/.github/workflows/build-release-conda.yml +++ b/.github/workflows/build-release-conda.yml @@ -212,6 +212,11 @@ jobs: python -c "import ctranslate2; print(f'CTranslate2 version: {ctranslate2.__version__}')" echo "Note: CUDA availability check skipped (no GPU on GitHub runners)" + - name: Install Modal dependencies + run: | + pip install modal questionary + python -c "import modal; print(f'Modal version: {modal.__version__}')" + - name: Check cached models run: | echo "Checking for cached models..." diff --git a/modal_infer.py b/modal_infer.py index 3b1477c..3668de0 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -905,6 +905,7 @@ def main() -> int: def main() -> None: parse_args() log_path = setup_logger() + exit_code = 0 try: selection = ask_selection() volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True) @@ -933,11 +934,14 @@ def main() -> None: logging.info("✅ 请在上方输出路径查看字幕结果。") except KeyboardInterrupt: logging.warning("用户中断,未执行任何远程操作。") - sys.exit(1) + exit_code = 1 except Exception as exc: - logging.exception("运行失败:%s", exc) + if isinstance(exc, NoAudioFilesError): + logging.error("%s", exc) + else: + logging.exception("运行失败:%s", exc) logging.error("日志见:%s", log_path) - sys.exit(1) + exit_code = 1 >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) diff --git "a/\344\275\277\347\224\250\350\257\264\346\230\216.txt" "b/\344\275\277\347\224\250\350\257\264\346\230\216.txt" index c39a64a..a55207f 100644 --- "a/\344\275\277\347\224\250\350\257\264\346\230\216.txt" +++ "b/\344\275\277\347\224\250\350\257\264\346\230\216.txt" @@ -151,7 +151,6 @@ python modal_infer.py ``` 程序会交互式询问: -- 运行模式:一次性运行 或 持久化App - GPU 类型:T4(推荐)、A10G、A100、H100 等 - 模型选择:基础版、海南鸡(日文转中文优化)、自定义模型 - 输入文件:本地音频文件或文件夹路径 From 72b5e810c67e0dc1e19fe9b872ef556c7a495b01 Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 13 Jan 2026 16:23:45 +0800 Subject: [PATCH 11/25] =?UTF-8?q?=E6=8B=86=E5=88=86infer=E4=B8=8Emodal?= =?UTF-8?q?=E6=89=93=E5=8C=85=E5=B9=B6=E9=9A=94=E7=A6=BB=E4=BE=9D=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- project.spec | 114 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 101 insertions(+), 13 deletions(-) diff --git a/project.spec b/project.spec index efcb59a..24e99c0 100644 --- a/project.spec +++ b/project.spec @@ -345,12 +345,69 @@ datas += [ ('locales', 'locales'), # Include the locales directory with translations ] -a = Analysis( - ['infer.py', 'modal_infer.py'], +# Base collections (for infer.exe only) +infer_datas = list(datas) +infer_binaries = list(binaries) +infer_hiddenimports = list(hiddenimports) + +# Extend collections for modal_infer.exe only +modal_datas = list(datas) +modal_binaries = list(binaries) +modal_hiddenimports = list(hiddenimports) + +# Collect modal / questionary and their tricky deps explicitly (modal_infer only) +try: + m_datas, m_binaries, m_hiddenimports = collect_all('modal') + modal_datas += m_datas + modal_binaries += m_binaries + modal_hiddenimports += m_hiddenimports + print("Collected modal successfully") +except: + print("Warning: could not collect modal") + +try: + s_datas, s_binaries, s_hiddenimports = collect_all('synchronicity') + modal_datas += s_datas + modal_binaries += s_binaries + modal_hiddenimports += s_hiddenimports +except: + print("Warning: could not collect synchronicity") + +try: + q_datas, q_binaries, q_hiddenimports = collect_all('questionary') + modal_datas += q_datas + modal_binaries += q_binaries + modal_hiddenimports += q_hiddenimports +except: + print("Warning: could not collect questionary") + +modal_hiddenimports += [ + 'modal', + 'modal.proto', + 'synchronicity', + 'grpclib', + 'google.protobuf', + 'google.protobuf.internal', + 'toml', + 'rich', + 'typer', + 'click', + 'questionary', + 'prompt_toolkit', + 'prompt_toolkit.styles', + 'prompt_toolkit.key_binding', + 'prompt_toolkit.formatted_text', + 'prompt_toolkit.shortcuts', + 'prompt_toolkit.output', + 'prompt_toolkit.input', +] + +a_infer = Analysis( + ['infer.py'], pathex=[], - binaries=binaries, - datas=datas, - hiddenimports=hiddenimports, + binaries=infer_binaries, + datas=infer_datas, + hiddenimports=infer_hiddenimports, hookspath=[], # PyInstaller hooks contrib should be auto-detected hooksconfig={}, runtime_hooks=['runtime_hook.py'], # Add runtime hook to set KMP_DUPLICATE_LIB_OK @@ -372,11 +429,39 @@ a = Analysis( noarchive=False, ) -pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) +a_modal = Analysis( + ['modal_infer.py'], + pathex=[], + binaries=modal_binaries, + datas=modal_datas, + hiddenimports=modal_hiddenimports, + hookspath=[], + hooksconfig={}, + runtime_hooks=['runtime_hook.py'], + excludes=[ + 'matplotlib', + 'tkinter', + 'PyQt5', + 'PyQt6', + 'PySide2', + 'PySide6', + 'notebook', + 'jupyter', + 'IPython', + 'pytest', + ], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False, +) + +pyz_infer = PYZ(a_infer.pure, a_infer.zipped_data, cipher=block_cipher) +pyz_modal = PYZ(a_modal.pure, a_modal.zipped_data, cipher=block_cipher) infer_exe = EXE( - pyz, - [a.scripts[0]], + pyz_infer, + [a_infer.scripts[0]], [], exclude_binaries=True, name='infer', @@ -394,8 +479,8 @@ infer_exe = EXE( ) modal_exe = EXE( - pyz, - [a.scripts[1]], + pyz_modal, + [a_modal.scripts[0]], [], exclude_binaries=True, name='modal_infer', @@ -415,9 +500,12 @@ modal_exe = EXE( coll = COLLECT( infer_exe, modal_exe, - a.binaries, - a.zipfiles, - a.datas, + a_infer.binaries, + a_infer.zipfiles, + a_infer.datas, + a_modal.binaries, + a_modal.zipfiles, + a_modal.datas, strip=False, upx=False, upx_exclude=[], From 1236802d3f31e2bdc3b274dfadeabba3f52c204c Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 13 Jan 2026 16:49:16 +0800 Subject: [PATCH 12/25] =?UTF-8?q?CI=E9=80=82=E9=85=8Dengine=E4=B8=8Eclient?= =?UTF-8?q?=E8=BE=93=E5=87=BA=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build-release-conda.yml | 32 ++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build-release-conda.yml b/.github/workflows/build-release-conda.yml index e7fa697..08f5a79 100644 --- a/.github/workflows/build-release-conda.yml +++ b/.github/workflows/build-release-conda.yml @@ -285,6 +285,19 @@ jobs: python build_windows.py + - name: Prepare distribution layout + run: | + echo "Preparing distribution layout..." + mkdir -p dist/faster_whisper_transwithai_chickenrice + if [ -d "dist/engine" ]; then + mv dist/engine dist/faster_whisper_transwithai_chickenrice/engine + fi + if [ -d "dist/client" ]; then + mv dist/client dist/faster_whisper_transwithai_chickenrice/client + fi + echo "Distribution root:" + find dist/faster_whisper_transwithai_chickenrice -maxdepth 2 -type d -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null || true + - name: Copy models to distribution run: | echo "Copying models to distribution directory..." @@ -294,19 +307,19 @@ jobs: find models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null echo "" - # Create models directory in dist - mkdir -p dist/faster_whisper_transwithai_chickenrice/models + # Create models directory in engine dist + mkdir -p dist/faster_whisper_transwithai_chickenrice/engine/models # Copy VAD model files (always included) echo "Copying VAD models..." - cp models/*.onnx dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true - cp models/*.json dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true + cp models/*.onnx dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true + cp models/*.json dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true # Copy whisper-base for feature extractor (always included for offline usage) echo "Copying whisper-base for feature extractor..." if [ -d "models/whisper-base" ]; then echo " Found whisper-base directory, copying..." - cp -r models/whisper-base dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true + cp -r models/whisper-base dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true echo " Whisper-base copied for offline feature extractor support" else echo " WARNING: whisper-base directory not found" @@ -322,9 +335,9 @@ jobs: if [ "$model_name" != "whisper-base" ]; then echo " Copying model contents from: $model_name" # Copy the contents of the model directory, not the directory itself - cp -r "$dir"* dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true + cp -r "$dir"* dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true # Also copy hidden files if any exist - cp -r "$dir".* dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true + cp -r "$dir".* dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true fi fi done @@ -332,7 +345,7 @@ jobs: echo "" echo "Models in distribution:" - find dist/faster_whisper_transwithai_chickenrice/models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null + find dist/faster_whisper_transwithai_chickenrice/engine/models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null echo "" echo "Total distribution size:" du -sh dist/faster_whisper_transwithai_chickenrice/ @@ -380,8 +393,9 @@ jobs: - name: Test executable (CPU mode) shell: cmd /C CALL {0} run: | - cd dist\faster_whisper_transwithai_chickenrice + cd dist\faster_whisper_transwithai_chickenrice\engine infer.exe --help + cd ..\client modal_infer.exe --help - name: Upload artifact From 1a00c3a4f886ac862cafeea7cc9f74da882926dc Mon Sep 17 00:00:00 2001 From: neo Date: Tue, 13 Jan 2026 16:59:11 +0800 Subject: [PATCH 13/25] =?UTF-8?q?amend=20=E6=8A=8A.spec=20=E7=9A=84=20COLL?= =?UTF-8?q?ECT=20=E6=8B=86=E6=88=90=E4=B8=A4=E4=B8=AA=E8=BE=93=E5=87=BA?= =?UTF-8?q?=E7=9B=AE=E5=BD=95=EF=BC=88dist/engine=20=E5=92=8C=20dist/clien?= =?UTF-8?q?t=EF=BC=89=EF=BC=8C=E4=BB=A5=E5=BD=BB=E5=BA=95=E9=9A=94?= =?UTF-8?q?=E7=A6=BB=20DLL=20=E5=86=B2=E7=AA=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- project.spec | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/project.spec b/project.spec index 24e99c0..9deb0c2 100644 --- a/project.spec +++ b/project.spec @@ -497,17 +497,24 @@ modal_exe = EXE( icon='transwithai.ico' if os.path.exists('transwithai.ico') else None, ) -coll = COLLECT( +coll_infer = COLLECT( infer_exe, - modal_exe, a_infer.binaries, a_infer.zipfiles, a_infer.datas, + strip=False, + upx=False, + upx_exclude=[], + name='engine', +) + +coll_modal = COLLECT( + modal_exe, a_modal.binaries, a_modal.zipfiles, a_modal.datas, strip=False, upx=False, upx_exclude=[], - name='faster_whisper_transwithai_chickenrice', + name='client', ) From c45c08c9f9c0ab230d3faa48ea6bb1406a0cce35 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 18:09:00 +0800 Subject: [PATCH 14/25] =?UTF-8?q?=E4=BF=AE=E6=94=B9Verify=20build=20succee?= =?UTF-8?q?ded=E6=A3=80=E6=9F=A5=E7=9B=AE=E5=BD=95=E9=9A=94=E7=A6=BB?= =?UTF-8?q?=E4=B8=8B=E7=9A=84=E4=B8=A4=E4=B8=AA=E5=AD=90=E7=9B=AE=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build_windows.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/build_windows.py b/build_windows.py index 83c3eb9..a58157e 100644 --- a/build_windows.py +++ b/build_windows.py @@ -209,8 +209,17 @@ def build(): # Verify build succeeded and check for CUDA libraries if result.returncode == 0: - dist_dir = Path("dist/faster_whisper_transwithai_chickenrice") + dist_root = Path("dist") + dist_dir = dist_root / "faster_whisper_transwithai_chickenrice" + engine_dir = dist_root / "engine" + client_dir = dist_root / "client" + if dist_dir.exists(): + verify_dirs = [dist_dir] + else: + verify_dirs = [p for p in [engine_dir, client_dir] if p.exists()] + + if verify_dirs: # Quick verification of critical libraries print("\nVerifying CUDA libraries in distribution...") @@ -219,15 +228,20 @@ def build(): missing_libs = [] # Check in root directory and all subdirectories - all_dlls = list(dist_dir.glob("**/*.dll")) + all_dlls = [] + for root_dir in verify_dirs: + all_dlls.extend(root_dir.glob("**/*.dll")) for critical in critical_libs: found_in_locations = [] for dll_path in all_dlls: if critical in dll_path.name.lower(): - # Get relative path from dist_dir - rel_path = dll_path.relative_to(dist_dir) - location = str(rel_path.parent) if str(rel_path.parent) != '.' else 'root' + # Get relative path from dist root for consistent display + try: + rel_path = dll_path.relative_to(dist_root) + location = str(rel_path.parent) if str(rel_path.parent) != '.' else 'root' + except ValueError: + location = str(dll_path.parent) found_in_locations.append(location) if found_in_locations: @@ -247,9 +261,13 @@ def build(): print(" Note: The PyInstaller hooks should have included these.") print(" If GPU acceleration doesn't work, check your conda environment.") - print(f"\nBuild complete! Output in: {dist_dir}") + if dist_dir.exists(): + output_locations = [str(dist_dir)] + else: + output_locations = [str(p) for p in verify_dirs] + print(f"\nBuild complete! Output in: {', '.join(output_locations)}") else: - print("Error: dist/faster_whisper_transwithai_chickenrice directory not found after build") + print("Error: dist/engine or dist/client directory not found after build") return 1 else: print("\nBuild failed!") From aec0e5662261b4ba1193202fe22c1ada4e8f2484 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 18:26:12 +0800 Subject: [PATCH 15/25] change back to v1.5 .spec and workflow --- .github/workflows/build-release-conda.yml | 73 ++--------- project.spec | 145 +++------------------- 2 files changed, 25 insertions(+), 193 deletions(-) diff --git a/.github/workflows/build-release-conda.yml b/.github/workflows/build-release-conda.yml index 08f5a79..6c377eb 100644 --- a/.github/workflows/build-release-conda.yml +++ b/.github/workflows/build-release-conda.yml @@ -28,45 +28,12 @@ jobs: strategy: matrix: include: - # CUDA 11.8 versions - - cuda: "11.8" - env_file: "environment-cuda118.yml" - env_name: "faster-whisper-cu118" - artifact_suffix: "cu118" - model_variant: "base" - hf_model: "" - - cuda: "11.8" - env_file: "environment-cuda118.yml" - env_name: "faster-whisper-cu118" - artifact_suffix: "cu118-chickenrice" - model_variant: "chickenrice" - hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2" - # CUDA 12.2 versions - - cuda: "12.2" - env_file: "environment-cuda122.yml" - env_name: "faster-whisper-cu122" - artifact_suffix: "cu122" - model_variant: "base" - hf_model: "" - - cuda: "12.2" - env_file: "environment-cuda122.yml" - env_name: "faster-whisper-cu122" - artifact_suffix: "cu122-chickenrice" - model_variant: "chickenrice" - hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2" - # CUDA 12.8 versions - cuda: "12.8" env_file: "environment-cuda128.yml" env_name: "faster-whisper-cu128" artifact_suffix: "cu128" model_variant: "base" hf_model: "" - - cuda: "12.8" - env_file: "environment-cuda128.yml" - env_name: "faster-whisper-cu128" - artifact_suffix: "cu128-chickenrice" - model_variant: "chickenrice" - hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2" steps: - name: Checkout code @@ -212,11 +179,6 @@ jobs: python -c "import ctranslate2; print(f'CTranslate2 version: {ctranslate2.__version__}')" echo "Note: CUDA availability check skipped (no GPU on GitHub runners)" - - name: Install Modal dependencies - run: | - pip install modal questionary - python -c "import modal; print(f'Modal version: {modal.__version__}')" - - name: Check cached models run: | echo "Checking for cached models..." @@ -285,19 +247,6 @@ jobs: python build_windows.py - - name: Prepare distribution layout - run: | - echo "Preparing distribution layout..." - mkdir -p dist/faster_whisper_transwithai_chickenrice - if [ -d "dist/engine" ]; then - mv dist/engine dist/faster_whisper_transwithai_chickenrice/engine - fi - if [ -d "dist/client" ]; then - mv dist/client dist/faster_whisper_transwithai_chickenrice/client - fi - echo "Distribution root:" - find dist/faster_whisper_transwithai_chickenrice -maxdepth 2 -type d -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null || true - - name: Copy models to distribution run: | echo "Copying models to distribution directory..." @@ -307,19 +256,19 @@ jobs: find models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null echo "" - # Create models directory in engine dist - mkdir -p dist/faster_whisper_transwithai_chickenrice/engine/models + # Create models directory in dist + mkdir -p dist/faster_whisper_transwithai_chickenrice/models # Copy VAD model files (always included) echo "Copying VAD models..." - cp models/*.onnx dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true - cp models/*.json dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true + cp models/*.onnx dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true + cp models/*.json dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true # Copy whisper-base for feature extractor (always included for offline usage) echo "Copying whisper-base for feature extractor..." if [ -d "models/whisper-base" ]; then echo " Found whisper-base directory, copying..." - cp -r models/whisper-base dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true + cp -r models/whisper-base dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true echo " Whisper-base copied for offline feature extractor support" else echo " WARNING: whisper-base directory not found" @@ -335,9 +284,9 @@ jobs: if [ "$model_name" != "whisper-base" ]; then echo " Copying model contents from: $model_name" # Copy the contents of the model directory, not the directory itself - cp -r "$dir"* dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true + cp -r "$dir"* dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true # Also copy hidden files if any exist - cp -r "$dir".* dist/faster_whisper_transwithai_chickenrice/engine/models/ 2>/dev/null || true + cp -r "$dir".* dist/faster_whisper_transwithai_chickenrice/models/ 2>/dev/null || true fi fi done @@ -345,7 +294,7 @@ jobs: echo "" echo "Models in distribution:" - find dist/faster_whisper_transwithai_chickenrice/engine/models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null + find dist/faster_whisper_transwithai_chickenrice/models -maxdepth 1 -printf "%M %u %g %s %TY-%Tm-%Td %TH:%TM %p\n" 2>/dev/null echo "" echo "Total distribution size:" du -sh dist/faster_whisper_transwithai_chickenrice/ @@ -393,10 +342,8 @@ jobs: - name: Test executable (CPU mode) shell: cmd /C CALL {0} run: | - cd dist\faster_whisper_transwithai_chickenrice\engine + cd dist\faster_whisper_transwithai_chickenrice infer.exe --help - cd ..\client - modal_infer.exe --help - name: Upload artifact uses: actions/upload-artifact@v4 @@ -667,4 +614,4 @@ jobs: repository: ${{ github.repository }} tag_name: ${{ github.ref }} files: faster_whisper_transwithai_windows_cu128-chickenrice.zip - token: ${{ secrets.GITHUB_TOKEN }} + token: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/project.spec b/project.spec index 9deb0c2..b8078db 100644 --- a/project.spec +++ b/project.spec @@ -345,69 +345,12 @@ datas += [ ('locales', 'locales'), # Include the locales directory with translations ] -# Base collections (for infer.exe only) -infer_datas = list(datas) -infer_binaries = list(binaries) -infer_hiddenimports = list(hiddenimports) - -# Extend collections for modal_infer.exe only -modal_datas = list(datas) -modal_binaries = list(binaries) -modal_hiddenimports = list(hiddenimports) - -# Collect modal / questionary and their tricky deps explicitly (modal_infer only) -try: - m_datas, m_binaries, m_hiddenimports = collect_all('modal') - modal_datas += m_datas - modal_binaries += m_binaries - modal_hiddenimports += m_hiddenimports - print("Collected modal successfully") -except: - print("Warning: could not collect modal") - -try: - s_datas, s_binaries, s_hiddenimports = collect_all('synchronicity') - modal_datas += s_datas - modal_binaries += s_binaries - modal_hiddenimports += s_hiddenimports -except: - print("Warning: could not collect synchronicity") - -try: - q_datas, q_binaries, q_hiddenimports = collect_all('questionary') - modal_datas += q_datas - modal_binaries += q_binaries - modal_hiddenimports += q_hiddenimports -except: - print("Warning: could not collect questionary") - -modal_hiddenimports += [ - 'modal', - 'modal.proto', - 'synchronicity', - 'grpclib', - 'google.protobuf', - 'google.protobuf.internal', - 'toml', - 'rich', - 'typer', - 'click', - 'questionary', - 'prompt_toolkit', - 'prompt_toolkit.styles', - 'prompt_toolkit.key_binding', - 'prompt_toolkit.formatted_text', - 'prompt_toolkit.shortcuts', - 'prompt_toolkit.output', - 'prompt_toolkit.input', -] - -a_infer = Analysis( +a = Analysis( ['infer.py'], pathex=[], - binaries=infer_binaries, - datas=infer_datas, - hiddenimports=infer_hiddenimports, + binaries=binaries, + datas=datas, + hiddenimports=hiddenimports, hookspath=[], # PyInstaller hooks contrib should be auto-detected hooksconfig={}, runtime_hooks=['runtime_hook.py'], # Add runtime hook to set KMP_DUPLICATE_LIB_OK @@ -429,39 +372,11 @@ a_infer = Analysis( noarchive=False, ) -a_modal = Analysis( - ['modal_infer.py'], - pathex=[], - binaries=modal_binaries, - datas=modal_datas, - hiddenimports=modal_hiddenimports, - hookspath=[], - hooksconfig={}, - runtime_hooks=['runtime_hook.py'], - excludes=[ - 'matplotlib', - 'tkinter', - 'PyQt5', - 'PyQt6', - 'PySide2', - 'PySide6', - 'notebook', - 'jupyter', - 'IPython', - 'pytest', - ], - win_no_prefer_redirects=False, - win_private_assemblies=False, - cipher=block_cipher, - noarchive=False, -) +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) -pyz_infer = PYZ(a_infer.pure, a_infer.zipped_data, cipher=block_cipher) -pyz_modal = PYZ(a_modal.pure, a_modal.zipped_data, cipher=block_cipher) - -infer_exe = EXE( - pyz_infer, - [a_infer.scripts[0]], +exe = EXE( + pyz, + a.scripts, [], exclude_binaries=True, name='infer', @@ -478,43 +393,13 @@ infer_exe = EXE( icon='transwithai.ico' if os.path.exists('transwithai.ico') else None, ) -modal_exe = EXE( - pyz_modal, - [a_modal.scripts[0]], - [], - exclude_binaries=True, - name='modal_infer', - debug=False, - bootloader_ignore_signals=False, - strip=False, - upx=False, - console=True, - disable_windowed_traceback=False, - argv_emulation=False, - target_arch=None, - codesign_identity=None, - entitlements_file=None, - icon='transwithai.ico' if os.path.exists('transwithai.ico') else None, -) - -coll_infer = COLLECT( - infer_exe, - a_infer.binaries, - a_infer.zipfiles, - a_infer.datas, +coll = COLLECT( + exe, + a.binaries, + a.zipfiles, + a.datas, strip=False, upx=False, upx_exclude=[], - name='engine', -) - -coll_modal = COLLECT( - modal_exe, - a_modal.binaries, - a_modal.zipfiles, - a_modal.datas, - strip=False, - upx=False, - upx_exclude=[], - name='client', -) + name='faster_whisper_transwithai_chickenrice', +) \ No newline at end of file From 7fd3f7eea6be7f322003f1d69fc44a32a249e746 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 19:27:03 +0800 Subject: [PATCH 16/25] standalone modal build process --- .github/workflows/build-release-conda.yml | 34 ++++++++++++++ build_windows.py | 17 ++++++- modal.spec | 54 +++++++++++++++++++++++ modal_infer.py | 15 ++++--- 4 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 modal.spec diff --git a/.github/workflows/build-release-conda.yml b/.github/workflows/build-release-conda.yml index 6c377eb..3d22b0c 100644 --- a/.github/workflows/build-release-conda.yml +++ b/.github/workflows/build-release-conda.yml @@ -28,12 +28,45 @@ jobs: strategy: matrix: include: + # CUDA 11.8 versions + - cuda: "11.8" + env_file: "environment-cuda118.yml" + env_name: "faster-whisper-cu118" + artifact_suffix: "cu118" + model_variant: "base" + hf_model: "" + - cuda: "11.8" + env_file: "environment-cuda118.yml" + env_name: "faster-whisper-cu118" + artifact_suffix: "cu118-chickenrice" + model_variant: "chickenrice" + hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2" + # CUDA 12.2 versions + - cuda: "12.2" + env_file: "environment-cuda122.yml" + env_name: "faster-whisper-cu122" + artifact_suffix: "cu122" + model_variant: "base" + hf_model: "" + - cuda: "12.2" + env_file: "environment-cuda122.yml" + env_name: "faster-whisper-cu122" + artifact_suffix: "cu122-chickenrice" + model_variant: "chickenrice" + hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2" + # CUDA 12.8 versions - cuda: "12.8" env_file: "environment-cuda128.yml" env_name: "faster-whisper-cu128" artifact_suffix: "cu128" model_variant: "base" hf_model: "" + - cuda: "12.8" + env_file: "environment-cuda128.yml" + env_name: "faster-whisper-cu128" + artifact_suffix: "cu128-chickenrice" + model_variant: "chickenrice" + hf_model: "--hf-model chickenrice0721/whisper-large-v2-translate-zh-v0.2-st-ct2" steps: - name: Checkout code @@ -344,6 +377,7 @@ jobs: run: | cd dist\faster_whisper_transwithai_chickenrice infer.exe --help + modal_infer.exe --help - name: Upload artifact uses: actions/upload-artifact@v4 diff --git a/build_windows.py b/build_windows.py index a58157e..cba704b 100644 --- a/build_windows.py +++ b/build_windows.py @@ -209,6 +209,21 @@ def build(): # Verify build succeeded and check for CUDA libraries if result.returncode == 0: + # Build modal_infer if modal.spec is present (separate target). + modal_spec = Path("modal.spec") + if modal_spec.exists(): + modal_cmd = [ + sys.executable, "-m", "PyInstaller", + "--clean", + "--noconfirm", + str(modal_spec), + ] + print(f"\nRunning: {' '.join(modal_cmd)}") + modal_result = subprocess.run(modal_cmd, capture_output=False) + if modal_result.returncode != 0: + print("\nModal build failed!") + return 1 + dist_root = Path("dist") dist_dir = dist_root / "faster_whisper_transwithai_chickenrice" engine_dir = dist_root / "engine" @@ -276,4 +291,4 @@ def build(): return 0 if __name__ == "__main__": - sys.exit(build()) \ No newline at end of file + sys.exit(build()) diff --git a/modal.spec b/modal.spec new file mode 100644 index 0000000..a9ad761 --- /dev/null +++ b/modal.spec @@ -0,0 +1,54 @@ +# -*- mode: python ; coding: utf-8 -*- +import os +from PyInstaller.utils.hooks import collect_all + +block_cipher = None + +datas = [("environment-cuda128.yml", ".")] +binaries = [] +hiddenimports = [] + +for package in ["modal", "questionary", "prompt_toolkit", "rich", "typer", "click"]: + try: + pkg_datas, pkg_binaries, pkg_hiddenimports = collect_all(package) + datas += pkg_datas + binaries += pkg_binaries + hiddenimports += pkg_hiddenimports + except Exception: + pass + +a = Analysis( + ["modal_infer.py"], + pathex=[], + binaries=binaries, + datas=datas, + hiddenimports=hiddenimports, + hookspath=[], + hooksconfig={}, + runtime_hooks=[], + excludes=[], + noarchive=False, +) + +pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher) + +exe = EXE( + pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + exclude_binaries=False, + name="modal_infer", + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=False, + console=True, + disable_windowed_traceback=False, + argv_emulation=False, + target_arch=None, + codesign_identity=None, + entitlements_file=None, + icon="transwithai.ico" if os.path.exists("transwithai.ico") else None, +) diff --git a/modal_infer.py b/modal_infer.py index 3668de0..75d066c 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -7,7 +7,7 @@ import sys from dataclasses import dataclass from datetime import datetime -from pathlib import Path +from pathlib import Path, PurePosixPath import subprocess <<<<<<< HEAD <<<<<<< HEAD @@ -37,7 +37,7 @@ REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice" <<<<<<< HEAD VOLUME_NAME = "Faster_Whisper" -VOLUME_ROOT = Path("/Faster_Whisper") +VOLUME_ROOT = "/Faster_Whisper" REMOTE_MOUNT = VOLUME_ROOT APP_ROOT_REL = Path(APP_NAME) SESSION_SUBDIR = Path("sessions") @@ -98,6 +98,10 @@ >>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) ] +def resolve_resource_path(filename: str) -> Path: + base_dir = Path(getattr(sys, "_MEIPASS", Path(__file__).resolve().parent)) + return base_dir / filename + @dataclass class ModelProfile: @@ -167,7 +171,8 @@ def rel_to_volume_path(path: Path) -> str: def rel_to_container_path(path: Path) -> str: - return str((REMOTE_MOUNT / path).as_posix()) + base = PurePosixPath(REMOTE_MOUNT) + return str((base / path.as_posix()).as_posix()) def volume_path_to_relative(path: str) -> Path: @@ -439,7 +444,7 @@ def upload_single_file( base_dir: 基础目录(用于文件夹模式,输出到此目录) """ session_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid4().hex[:6]}" - remote_session_rel = SESSION_SUBDIR / session_id + remote_session_rel = Path(SESSION_SUBDIR) / session_id remote_logs_rel = remote_session_rel / "logs" # 使用固定文件名避免全角字符等问题 @@ -546,7 +551,7 @@ def build_modal_image() -> modal.Image: modal.Image.micromamba(python_version="3.10") .apt_install("git") .micromamba_install( - spec_file="environment-cuda128.yml", + spec_file=str(resolve_resource_path("environment-cuda128.yml")), channels=["conda-forge", "defaults"], ) .pip_install("modal", "questionary") From 3c6d1129c2abdb316d13b5010eb0a698992055b7 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 19:45:38 +0800 Subject: [PATCH 17/25] change modal.exe output path --- build_windows.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build_windows.py b/build_windows.py index cba704b..c153f14 100644 --- a/build_windows.py +++ b/build_windows.py @@ -216,6 +216,8 @@ def build(): sys.executable, "-m", "PyInstaller", "--clean", "--noconfirm", + "--distpath", str(Path("dist") / "faster_whisper_transwithai_chickenrice"), + "--workpath", str(Path("build") / "modal"), str(modal_spec), ] print(f"\nRunning: {' '.join(modal_cmd)}") From d4c30e9802d7e871cacea16d6db4df6b9cd99c30 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 19:59:54 +0800 Subject: [PATCH 18/25] install modal dependencies on the fly --- build_windows.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/build_windows.py b/build_windows.py index c153f14..cc8dd7d 100644 --- a/build_windows.py +++ b/build_windows.py @@ -212,6 +212,21 @@ def build(): # Build modal_infer if modal.spec is present (separate target). modal_spec = Path("modal.spec") if modal_spec.exists(): + # Ensure modal dependencies are available in the current env. + try: + import modal # noqa: F401 + import questionary # noqa: F401 + except ImportError: + print("\nmodal/questionary not found; installing for modal.spec build...") + install_cmd = [ + sys.executable, "-m", "pip", "install", + "modal", "questionary", + ] + install_result = subprocess.run(install_cmd, capture_output=False) + if install_result.returncode != 0: + print("\nFailed to install modal/questionary.") + return 1 + modal_cmd = [ sys.executable, "-m", "PyInstaller", "--clean", From bc5efca0f27c30583182b45c02b5c09047b9f9e5 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 20:14:36 +0800 Subject: [PATCH 19/25] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E4=BA=86=20ensure=5Fut?= =?UTF-8?q?f8=5Fstdio()=EF=BC=8C=E5=BC=BA=E5=88=B6=E6=8A=8A=20stdout/stder?= =?UTF-8?q?r=20=E6=94=B9=E6=88=90=20UTF=E2=80=918=EF=BC=88=E5=B9=B6?= =?UTF-8?q?=E7=94=A8=20errors=3D"replace"=20=E5=85=9C=E5=BA=95=EF=BC=89?= =?UTF-8?q?=EF=BC=8C=E9=81=BF=E5=85=8D=20argparse=20=E6=89=93=E5=8D=B0?= =?UTF-8?q?=E4=B8=AD=E6=96=87=E6=97=B6=E5=B4=A9=E6=8E=89=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modal_infer.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index 75d066c..576a9fe 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -1,8 +1,7 @@ -"""feature-modal: 交互式 CLI,完成 Modal App 构建、音频上传、推理执行与结果回传。""" - from __future__ import annotations import argparse +import io import logging import sys from dataclasses import dataclass @@ -20,6 +19,28 @@ >>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) from uuid import uuid4 +def ensure_utf8_stdio() -> None: + for name in ("stdout", "stderr"): + stream = getattr(sys, name, None) + if stream is None: + continue + try: + encoding = getattr(stream, "encoding", None) + if encoding and encoding.lower().startswith("utf-8"): + continue + if hasattr(stream, "reconfigure"): + stream.reconfigure(encoding="utf-8", errors="replace") + elif hasattr(stream, "buffer"): + setattr( + sys, + name, + io.TextIOWrapper(stream.buffer, encoding="utf-8", errors="replace"), + ) + except Exception: + pass + +ensure_utf8_stdio() + try: import questionary # type: ignore from questionary import Choice # type: ignore From d39e0f5dd71a927db45d3bc00ca1c5b807d75999 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 20:44:14 +0800 Subject: [PATCH 20/25] =?UTF-8?q?=E6=96=B0=E5=A2=9E=20REPO=5FREF=20=3D=20"?= =?UTF-8?q?v1.4"=EF=BC=8C=E5=B9=B6=E5=9C=A8=20clone/update=20=E6=97=B6?= =?UTF-8?q?=E5=BC=BA=E5=88=B6=20checkout/reset=20=E5=88=B0=E8=AF=A5?= =?UTF-8?q?=E7=89=88=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modal_infer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index 576a9fe..f30f573 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -57,6 +57,7 @@ def ensure_utf8_stdio() -> None: APP_NAME = "Faster-Whisper-TransWithAI-ChickenRice" REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice" <<<<<<< HEAD +REPO_REF = "v1.4" VOLUME_NAME = "Faster_Whisper" VOLUME_ROOT = "/Faster_Whisper" REMOTE_MOUNT = VOLUME_ROOT @@ -1010,11 +1011,12 @@ def log(msg: str) -> None: if not (repo_dir / ".git").exists(): log("开始克隆仓库...") - run(["git", "clone", REPO_URL, str(repo_dir)]) + run(["git", "clone", "--branch", REPO_REF, "--depth", "1", REPO_URL, str(repo_dir)]) else: log("更新仓库...") - run(["git", "-C", str(repo_dir), "fetch", "origin", "main"]) - run(["git", "-C", str(repo_dir), "reset", "--hard", "origin/main"]) + run(["git", "-C", str(repo_dir), "fetch", "--tags", "origin"]) + run(["git", "-C", str(repo_dir), "checkout", "-f", REPO_REF]) + run(["git", "-C", str(repo_dir), "reset", "--hard", REPO_REF]) model_profile = job["model_profile"] model_path = repo_dir / "models" From 25eebe2f38a3813817756e6f0596e8d45fc9134b Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 21:08:38 +0800 Subject: [PATCH 21/25] fix: audio_suffixes variable added to modal_infer.py --- modal_infer.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index f30f573..f6ccc72 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -1061,16 +1061,12 @@ def snapshot(path: str) -> set: cmd = [ "python", str(repo_dir / "infer.py"), - "--device", - "cuda", - "--model_name_or_path", - str(model_path), - "--sub_formats", - job["sub_formats"], - "--log_level", - "INFO", - "--output_dir", - str(output_dir), + "--audio_suffixes", "mp3,wav,flac,m4a,aac,ogg,wma,mp4,mkv,avi,mov,webm,flv,wmv", + "--device","cuda", + "--model_name_or_path",str(model_path), + "--sub_formats",job["sub_formats"], + "--log_level","INFO", + "--output_dir",str(output_dir), ] if job["enable_batching"]: cmd.append("--enable_batching") From f51736942beb044141ce36ff358773c7c33cdd14 Mon Sep 17 00:00:00 2001 From: neo <2418660459@qq.com> Date: Tue, 13 Jan 2026 21:27:14 +0800 Subject: [PATCH 22/25] clean diff with v1.6 --- .gitignore | 1 - build_windows.py | 28 ++++++++-------------------- environment-cuda118.yml | 3 +-- environment-cuda128.yml | 3 +-- 4 files changed, 10 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 8ff3ddd..0d6661a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,6 @@ __pycache__/ # C extensions *.so -node_modules/ # Distribution / packaging .Python build/ diff --git a/build_windows.py b/build_windows.py index cc8dd7d..7932ade 100644 --- a/build_windows.py +++ b/build_windows.py @@ -209,6 +209,7 @@ def build(): # Verify build succeeded and check for CUDA libraries if result.returncode == 0: + dist_dir = Path("dist/faster_whisper_transwithai_chickenrice") # Build modal_infer if modal.spec is present (separate target). modal_spec = Path("modal.spec") if modal_spec.exists(): @@ -247,11 +248,6 @@ def build(): client_dir = dist_root / "client" if dist_dir.exists(): - verify_dirs = [dist_dir] - else: - verify_dirs = [p for p in [engine_dir, client_dir] if p.exists()] - - if verify_dirs: # Quick verification of critical libraries print("\nVerifying CUDA libraries in distribution...") @@ -260,20 +256,15 @@ def build(): missing_libs = [] # Check in root directory and all subdirectories - all_dlls = [] - for root_dir in verify_dirs: - all_dlls.extend(root_dir.glob("**/*.dll")) + all_dlls = list(dist_dir.glob("**/*.dll")) for critical in critical_libs: found_in_locations = [] for dll_path in all_dlls: if critical in dll_path.name.lower(): - # Get relative path from dist root for consistent display - try: - rel_path = dll_path.relative_to(dist_root) - location = str(rel_path.parent) if str(rel_path.parent) != '.' else 'root' - except ValueError: - location = str(dll_path.parent) + # Get relative path from dist_dir + rel_path = dll_path.relative_to(dist_dir) + location = str(rel_path.parent) if str(rel_path.parent) != '.' else 'root' found_in_locations.append(location) if found_in_locations: @@ -293,13 +284,9 @@ def build(): print(" Note: The PyInstaller hooks should have included these.") print(" If GPU acceleration doesn't work, check your conda environment.") - if dist_dir.exists(): - output_locations = [str(dist_dir)] - else: - output_locations = [str(p) for p in verify_dirs] - print(f"\nBuild complete! Output in: {', '.join(output_locations)}") + print(f"\nBuild complete! Output in: {dist_dir}") else: - print("Error: dist/engine or dist/client directory not found after build") + print("Error: dist/faster_whisper_transwithai_chickenrice directory not found after build") return 1 else: print("\nBuild failed!") @@ -309,3 +296,4 @@ def build(): if __name__ == "__main__": sys.exit(build()) + diff --git a/environment-cuda118.yml b/environment-cuda118.yml index 8d3c832..7d66261 100644 --- a/environment-cuda118.yml +++ b/environment-cuda118.yml @@ -37,8 +37,7 @@ dependencies: - backports.functools-lru-cache # Fix for PyInstaller ModuleNotFoundError # Build tools - # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments - - pyinstaller==6.16.0 + - pyinstaller>=6.0.0 - setuptools>=65.0.0 - wheel>=0.38.0 - build>=0.10.0 diff --git a/environment-cuda128.yml b/environment-cuda128.yml index 655dd6a..b9ff41a 100644 --- a/environment-cuda128.yml +++ b/environment-cuda128.yml @@ -34,8 +34,7 @@ dependencies: - backports.functools-lru-cache # Fix for PyInstaller ModuleNotFoundError # Build tools - # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments - - pyinstaller==6.16.0 + - pyinstaller>=6.0.0 - setuptools>=65.0.0 - wheel>=0.38.0 - build>=0.10.0 From e90a3305326e2e3ec08ce8ebf9d906a63b2e9acf Mon Sep 17 00:00:00 2001 From: neo Date: Wed, 14 Jan 2026 10:01:06 +0800 Subject: [PATCH 23/25] remove HEAD<< artifacts from rebase process --- modal_infer.py | 371 +------------------------------------------------ 1 file changed, 4 insertions(+), 367 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index f6ccc72..8cf646c 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -8,15 +8,7 @@ from datetime import datetime from pathlib import Path, PurePosixPath import subprocess -<<<<<<< HEAD -<<<<<<< HEAD from typing import Dict, List, Optional, Sequence, Tuple -======= -from typing import Dict, Iterable, List, Optional, Sequence, Tuple ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) -======= -from typing import Dict, List, Optional, Sequence, Tuple ->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) from uuid import uuid4 def ensure_utf8_stdio() -> None: @@ -56,21 +48,13 @@ def ensure_utf8_stdio() -> None: APP_NAME = "Faster-Whisper-TransWithAI-ChickenRice" REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice" -<<<<<<< HEAD REPO_REF = "v1.4" VOLUME_NAME = "Faster_Whisper" VOLUME_ROOT = "/Faster_Whisper" REMOTE_MOUNT = VOLUME_ROOT -APP_ROOT_REL = Path(APP_NAME) -SESSION_SUBDIR = Path("sessions") -======= -VOLUME_NAME = "agent_volume" -VOLUME_ROOT = Path("/agent_volume") -REMOTE_MOUNT = VOLUME_ROOT -APP_ROOT_REL = Path(APP_NAME) -SESSION_SUBDIR = APP_ROOT_REL / "sessions" ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) -REPO_VOLUME_DIR = VOLUME_ROOT / "repo" +APP_ROOT_REL = APP_NAME +SESSION_SUBDIR = "sessions" +REPO_VOLUME_DIR = f"{VOLUME_ROOT}/repo" SUB_FORMATS = "srt,vtt,lrc" SUB_SUFFIXES = {".srt", ".vtt", ".lrc"} AUDIO_SUFFIXES = { @@ -81,13 +65,6 @@ def ensure_utf8_stdio() -> None: ".aac", ".ogg", ".wma", -<<<<<<< HEAD -<<<<<<< HEAD -======= - ".mp4", ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) -======= ->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) ".mkv", ".avi", ".mov", @@ -95,17 +72,10 @@ def ensure_utf8_stdio() -> None: ".flv", ".wmv", } -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) VIDEO_NEED_CONVERT = {".mp4"} # 需要用户手动转换的格式 DEFAULT_GPU_CHOICES = [ "T4", "L4", -======= -DEFAULT_GPU_CHOICES = [ ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) "L40S", "A10G", "A100-40GB", @@ -113,11 +83,6 @@ def ensure_utf8_stdio() -> None: "H100", "H200", "B200", -<<<<<<< HEAD -======= - "L4", - "T4", ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) ] def resolve_resource_path(filename: str) -> Path: @@ -157,7 +122,6 @@ class UploadManifest: remote_output_rel: Path local_output_dir: Path remote_logs_rel: Path -<<<<<<< HEAD original_filename: Optional[str] = None # 原始文件名(用于恢复空格) @@ -169,20 +133,6 @@ class ScanResult: class NoAudioFilesError(Exception): pass -======= - - -@dataclass -class ScanResult: - audio_files: List[Path] - mp4_files: List[Path] - - -@dataclass -class RemoteResult: - created_files: Dict[str, List[str]] - log_file: Optional[str] ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def rel_to_volume_path(path: Path) -> str: @@ -267,19 +217,6 @@ def ensure_questionary(): def ask_selection() -> UserSelection: ensure_questionary() -<<<<<<< HEAD -======= - run_mode = questionary.select( - "选择运行模式:", - choices=[ - Choice(title="一次性运行(modal run)", value="once"), - Choice(title="持久化 App(modal deploy)", value="persistent"), - ], - ).ask() - if not run_mode: - raise KeyboardInterrupt - ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) gpu_choice = questionary.select( "选择 GPU", choices=DEFAULT_GPU_CHOICES, @@ -340,11 +277,7 @@ def ask_selection() -> UserSelection: ) return UserSelection( -<<<<<<< HEAD run_mode="once", -======= - run_mode=run_mode, ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) gpu_choice=gpu_choice, input_path=input_path, model_profile=model_profile, @@ -357,15 +290,10 @@ def ask_selection() -> UserSelection: ) -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) def scan_audio_files(path: Path) -> ScanResult: """扫描目录,返回音频文件和需要转换的 mp4 文件""" audio_files: List[Path] = [] mp4_files: List[Path] = [] -<<<<<<< HEAD for file in path.rglob("*"): if file.is_file(): suffix = file.suffix.lower() @@ -401,56 +329,10 @@ def validate_audio_path(path: Path) -> ScanResult: if not scan_result.audio_files: raise NoAudioFilesError(f"输入的文件夹内没有音频文件:{path}") return scan_result -======= -def iter_audio_files(path: Path) -> List[Path]: - files: List[Path] = [] -======= ->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) - for file in path.rglob("*"): - if file.is_file(): - suffix = file.suffix.lower() - if suffix in AUDIO_SUFFIXES: - audio_files.append(file) - elif suffix in VIDEO_NEED_CONVERT: - mp4_files.append(file) - return ScanResult(audio_files=audio_files, mp4_files=mp4_files) - - -def validate_audio_path(path: Path) -> ScanResult: - """验证音频路径,返回扫描结果。如果发现 mp4 文件会打印警告。""" - if path.is_file(): - suffix = path.suffix.lower() - if suffix in VIDEO_NEED_CONVERT: - raise ValueError( - f"文件 {path} 是 mp4 格式,请先使用 ffmpeg 转换为 mp3:\n" - f" ffmpeg -i \"{path}\" -vn -acodec libmp3lame \"{path.with_suffix('.mp3')}\"" - ) - if suffix not in AUDIO_SUFFIXES: - raise ValueError(f"文件 {path} 不属于支持的音/视频格式。") - return ScanResult(audio_files=[path], mp4_files=[]) - elif path.is_dir(): - scan_result = scan_audio_files(path) - if scan_result.mp4_files: - logging.warning("=" * 60) - logging.warning("发现 %d 个 mp4 文件,这些文件将被跳过:", len(scan_result.mp4_files)) - for mp4_file in scan_result.mp4_files: - logging.warning(" - %s", mp4_file) - logging.warning("请使用 ffmpeg 转换为 mp3 后再处理,例如:") - logging.warning(" ffmpeg -i \"input.mp4\" -vn -acodec libmp3lame \"output.mp3\"") - logging.warning("=" * 60) - if not scan_result.audio_files: - raise ValueError(f"文件夹 {path} 中没有支持的音/视频文件。") -<<<<<<< HEAD ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) -======= - return scan_result ->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) else: raise ValueError(f"路径 {path} 既不是文件也不是文件夹。") -<<<<<<< HEAD -<<<<<<< HEAD def upload_single_file( volume: modal.Volume, selection: UserSelection, @@ -490,45 +372,6 @@ def upload_single_file( local_output_dir=local_output_dir, remote_logs_rel=remote_logs_rel, original_filename=original_filename, # 始终记录原始文件名 -======= -def prepare_upload( -======= -def upload_single_file( ->>>>>>> 3f6ae4b (refactor: 优化文件夹处理逻辑,逐文件上传避免连接断开) - volume: modal.Volume, - selection: UserSelection, - audio_file: Path, - base_dir: Optional[Path] = None, -) -> UploadManifest: - """上传单个音频文件到 Modal Volume。 - - Args: - volume: Modal Volume 实例 - selection: 用户选择配置 - audio_file: 要上传的音频文件路径 - base_dir: 基础目录(用于文件夹模式,输出到此目录) - """ - session_id = f"{datetime.now().strftime('%Y%m%d-%H%M%S')}-{uuid4().hex[:6]}" - remote_session_rel = SESSION_SUBDIR / session_id - remote_logs_rel = remote_session_rel / "logs" - - with volume.batch_upload(force=True) as batch: - remote_rel = remote_session_rel / audio_file.name - logging.info("上传文件 -> %s", rel_to_volume_path(remote_rel)) - batch.put_file(str(audio_file), rel_to_volume_path(remote_rel)) - - # 如果指定了 base_dir(文件夹模式),输出到 base_dir;否则输出到文件所在目录 - local_output_dir = base_dir if base_dir else audio_file.parent - - return UploadManifest( - session_id=session_id, - source_type="file", - local_source=audio_file, - remote_inputs_rel=[remote_rel], - remote_output_rel=remote_session_rel, - local_output_dir=local_output_dir, - remote_logs_rel=remote_logs_rel, ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) ) @@ -585,20 +428,12 @@ def run_remote_pipeline( selection: UserSelection, manifest: UploadManifest, payload: Dict, -<<<<<<< HEAD ) -> Dict: logging.info("=== 开始构建 Modal 镜像 ===") image = build_modal_image() logging.info("✓ 镜像构建完成") logging.info("使用 GPU:%s", selection.gpu_choice) logging.info("超时时间:%d 分钟", selection.timeout_minutes) -======= -) -> RemoteResult: - logging.info("=== 开始构建 Modal 镜像 ===") - image = build_modal_image() - logging.info("✓ 镜像构建完成") - logging.info("使用 GPU:%s", selection.gpu_choice) ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) app = modal.App(APP_NAME) @app.function( @@ -611,7 +446,6 @@ def run_remote_pipeline( def modal_pipeline(job_payload: Dict) -> Dict: return _remote_pipeline(job_payload) -<<<<<<< HEAD logging.info("=== 开始远程执行 ===") logging.info("正在启动 GPU 容器并执行推理任务...") logging.info("(以下为远程容器输出)") @@ -623,84 +457,6 @@ def modal_pipeline(job_payload: Dict) -> Dict: return result # 直接返回 Dict,包含 created_files 和 log_content -def process_directory_files( - volume: modal.Volume, - selection: UserSelection, - audio_files: List[Path], -) -> Tuple[int, int]: - """处理文件夹中的所有音频文件,容器复用。 - - Args: - volume: Modal Volume 实例 - selection: 用户选择配置 - audio_files: 要处理的音频文件列表 - - Returns: - (成功数, 失败数) 元组 - """ - logging.info("=== 开始构建 Modal 镜像 ===") - image = build_modal_image() - logging.info("✓ 镜像构建完成") - logging.info("使用 GPU:%s", selection.gpu_choice) - logging.info("超时时间:%d 分钟", selection.timeout_minutes) - logging.info("待处理文件数:%d", len(audio_files)) - - app = modal.App(APP_NAME) - - @app.function( - image=image, - gpu=selection.gpu_choice, - timeout=selection.timeout_minutes * 60, - volumes={str(REMOTE_MOUNT): volume}, - serialized=True, - min_containers=1, # 保持容器预热,复用容器 - ) - def modal_pipeline(job_payload: Dict) -> Dict: - return _remote_pipeline(job_payload) - - success_count = 0 - fail_count = 0 - base_dir = selection.input_path # 文件夹模式下,输出到源文件夹 - - with app.run(): - for i, audio_file in enumerate(audio_files, 1): - logging.info("=" * 60) - logging.info("处理文件 [%d/%d]: %s", i, len(audio_files), audio_file.name) - logging.info("=" * 60) - try: - # 1. 上传单个文件 - manifest = upload_single_file(volume, selection, audio_file, base_dir) - - # 2. 构建 payload - payload = build_job_payload(selection, manifest) - - # 3. 执行推理(复用容器) - logging.info("正在执行推理...") - result = modal_pipeline.remote(payload) - - # 4. 写入结果文件到本地 - download_outputs(manifest, result) - - logging.info("✓ 文件 %s 处理完成", audio_file.name) - success_count += 1 - except Exception as e: - logging.error("✗ 文件 %s 处理失败: %s", audio_file.name, e) - fail_count += 1 - continue # 继续处理下一个文件 - - return success_count, fail_count -======= - with app.run(): - result = modal_pipeline.remote(payload) - logging.info("-" * 60) - logging.info("✓ 远程执行完成") - created = { - remote_dir: files for remote_dir, files in result.get("created", {}).items() - } - return RemoteResult(created_files=created, log_file=result.get("log_file")) ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) - - def process_directory_files( volume: modal.Volume, selection: UserSelection, @@ -771,7 +527,6 @@ def modal_pipeline(job_payload: Dict) -> Dict: def download_outputs( manifest: UploadManifest, -<<<<<<< HEAD result: Dict, ) -> None: """从远程结果中提取文件内容并写入本地""" @@ -807,60 +562,15 @@ def download_outputs( def summarize(manifest: UploadManifest, result: Dict) -> None: -======= - result: RemoteResult, -) -> None: - """从远程结果中提取文件内容并写入本地""" - import base64 - - created_files = result.get("created_files", {}) - log_content = result.get("log_content") - - # 获取原始文件名的 stem(不含扩展名) - original_stem = Path(manifest.original_filename).stem if manifest.original_filename else "todo" - - for filename, content_b64 in created_files.items(): - content = base64.b64decode(content_b64) - # 将 todo.xxx 替换为原始文件名 - if filename.startswith("todo."): - suffix = Path(filename).suffix - new_filename = original_stem + suffix - else: - new_filename = filename - - local_path = manifest.local_output_dir / new_filename - local_path.parent.mkdir(parents=True, exist_ok=True) - local_path.write_bytes(content) - logging.info("写入文件: %s (%d bytes)", local_path, len(content)) - - # 写入 log 文件 - if log_content: - log_dir = Path("logs") - log_dir.mkdir(exist_ok=True) - log_path = log_dir / f"modal_run_{manifest.session_id}.log" - log_path.write_bytes(base64.b64decode(log_content)) - logging.info("写入日志: %s", log_path) - - -def summarize(manifest: UploadManifest, result: RemoteResult) -> None: ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) logging.info("=== 运行完成 ===") logging.info("Session: %s", manifest.session_id) logging.info("源路径: %s", manifest.local_source) logging.info("输出路径: %s", manifest.local_output_dir if manifest.source_type == "directory" else manifest.local_source.parent) -<<<<<<< HEAD created_files = result.get("created_files", {}) if created_files: logging.info("新生成文件:") for filename in created_files.keys(): logging.info(" %s", filename) -======= - if result.created_files: - logging.info("新生成文件:") - for remote_dir, files in result.created_files.items(): - for file in files: - logging.info(" %s", file) ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def parse_args() -> argparse.Namespace: @@ -873,7 +583,6 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -<<<<<<< HEAD def prompt_exit(enabled: bool) -> None: if not enabled: return @@ -928,80 +637,27 @@ def main() -> int: prompt_exit(not args.non_interactive) return exit_code -======= -def main() -> None: - parse_args() - log_path = setup_logger() - exit_code = 0 - try: - selection = ask_selection() - volume = modal.Volume.from_name(VOLUME_NAME, create_if_missing=True) - - # 验证路径并获取扫描结果 - scan_result = validate_audio_path(selection.input_path) - - if selection.input_path.is_dir(): - # 文件夹模式:逐个处理文件,容器复用 - logging.info("检测到文件夹输入,将逐个处理 %d 个音频文件", len(scan_result.audio_files)) - success_count, fail_count = process_directory_files( - volume, selection, scan_result.audio_files - ) - logging.info("=" * 60) - logging.info("=== 批量处理完成 ===") - logging.info("成功: %d, 失败: %d", success_count, fail_count) - logging.info("输出路径: %s", selection.input_path) - logging.info("✅ 请在上方输出路径查看字幕结果。") - else: - # 单文件模式:保持原有逻辑 - manifest = upload_single_file(volume, selection, selection.input_path) - payload = build_job_payload(selection, manifest) - result = run_remote_pipeline(volume, selection, manifest, payload) - download_outputs(manifest, result) - summarize(manifest, result) - logging.info("✅ 请在上方输出路径查看字幕结果。") - except KeyboardInterrupt: - logging.warning("用户中断,未执行任何远程操作。") - exit_code = 1 - except Exception as exc: - if isinstance(exc, NoAudioFilesError): - logging.error("%s", exc) - else: - logging.exception("运行失败:%s", exc) - logging.error("日志见:%s", log_path) - exit_code = 1 - - ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def _remote_pipeline(job: Dict) -> Dict: import subprocess from pathlib import Path import os -<<<<<<< HEAD # 强制重新加载 Volume,确保看到最新上传的文件 from modal import Volume volume = Volume.from_name("Faster_Whisper") volume.reload() -======= ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def run(cmd: Sequence[str], cwd: Optional[str] = None, env: Optional[dict] = None) -> None: print(" ".join(cmd), flush=True) subprocess.run(cmd, check=True, cwd=cwd, env=env) mount_root = Path(job["mount_root"]) - repo_dir = REPO_VOLUME_DIR -<<<<<<< HEAD + repo_dir = Path(REPO_VOLUME_DIR) # log 文件放在 session 目录下,而不是 logs 子目录 session_dir = Path(job["remote_output_dir"]) session_dir.mkdir(parents=True, exist_ok=True) log_file = session_dir / "modal_run.log" -======= - logs_dir = Path(job["remote_logs_dir"]) - logs_dir.mkdir(parents=True, exist_ok=True) - log_file = logs_dir / "modal_run.log" ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def log(msg: str) -> None: line = f"[modal_run] {msg}" @@ -1076,7 +732,6 @@ def snapshot(path: str) -> set: cmd.extend(job["remote_inputs"]) -<<<<<<< HEAD # 在执行推理前,等待文件同步完成 import time log("等待文件同步...") @@ -1134,21 +789,13 @@ def snapshot(path: str) -> set: log(f"=== 调试信息结束 ===") raise -======= - log(f"执行推理命令:{' '.join(cmd)}") - run(cmd, cwd=str(repo_dir)) ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) def to_volume_path(path_str: str) -> str: return container_to_volume_path(path_str) -<<<<<<< HEAD # 收集生成的文件内容(直接返回,避免 volume 同步问题) import base64 created_files = {} # {filename: base64_content} -======= - created = {} ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) for target in job["output_targets"]: remote_dir = target["remote_dir"] after = snapshot(remote_dir) @@ -1158,7 +805,6 @@ def to_volume_path(path_str: str) -> str: for file in after - prev if Path(file).suffix.lower() in SUB_SUFFIXES ) -<<<<<<< HEAD for file_path in new_files: file_path = Path(file_path) if file_path.exists(): @@ -1181,12 +827,3 @@ def to_volume_path(path_str: str) -> str: if __name__ == "__main__": # pragma: no cover sys.exit(main()) -======= - created[to_volume_path(remote_dir)] = [to_volume_path(path) for path in new_files] - - return {"created": created, "log_file": to_volume_path(str(log_file))} - - -if __name__ == "__main__": # pragma: no cover - main() ->>>>>>> fe20a3c (feat: Add Modal cloud GPU inference support) From be2b53b0891502736c3e577c39c08d70df1f5cd3 Mon Sep 17 00:00:00 2001 From: neo Date: Thu, 15 Jan 2026 09:39:13 +0800 Subject: [PATCH 24/25] revert pyinstall version in cuda122.yaml as request --- environment-cuda122.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/environment-cuda122.yml b/environment-cuda122.yml index 6665cd3..aeb91bf 100644 --- a/environment-cuda122.yml +++ b/environment-cuda122.yml @@ -34,8 +34,7 @@ dependencies: - backports.functools-lru-cache # Fix for PyInstaller ModuleNotFoundError # Build tools - # PyInstaller 6.17.0 has a conda hook regression (KeyError: 'depends') in some conda environments - - pyinstaller==6.16.0 + - pyinstaller>=6.0.0 - setuptools>=65.0.0 - wheel>=0.38.0 - build>=0.10.0 From a71ccc20b0534d8a29de00856dd23024586aef7b Mon Sep 17 00:00:00 2001 From: Randomless <2418660459@qq.com> Date: Thu, 15 Jan 2026 22:36:01 +0800 Subject: [PATCH 25/25] =?UTF-8?q?=E7=A7=BB=E9=99=A4=20REPO=5FREF=20?= =?UTF-8?q?=E7=89=88=E6=9C=AC=E9=94=81=E5=AE=9A=EF=BC=8C=E6=94=B9=E4=B8=BA?= =?UTF-8?q?=E4=BD=BF=E7=94=A8=20main=20=E5=88=86=E6=94=AF=E6=9C=80?= =?UTF-8?q?=E6=96=B0=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 删除 REPO_REF = "v1.4" 常量 - 克隆时不再指定 --branch,使用默认分支 - 更新时 reset 到 origin/main 而非固定 tag 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- modal_infer.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/modal_infer.py b/modal_infer.py index 8cf646c..4cb753b 100644 --- a/modal_infer.py +++ b/modal_infer.py @@ -48,7 +48,6 @@ def ensure_utf8_stdio() -> None: APP_NAME = "Faster-Whisper-TransWithAI-ChickenRice" REPO_URL = "https://github.com/TransWithAI/Faster-Whisper-TransWithAI-ChickenRice" -REPO_REF = "v1.4" VOLUME_NAME = "Faster_Whisper" VOLUME_ROOT = "/Faster_Whisper" REMOTE_MOUNT = VOLUME_ROOT @@ -667,12 +666,11 @@ def log(msg: str) -> None: if not (repo_dir / ".git").exists(): log("开始克隆仓库...") - run(["git", "clone", "--branch", REPO_REF, "--depth", "1", REPO_URL, str(repo_dir)]) + run(["git", "clone", "--depth", "1", REPO_URL, str(repo_dir)]) else: log("更新仓库...") - run(["git", "-C", str(repo_dir), "fetch", "--tags", "origin"]) - run(["git", "-C", str(repo_dir), "checkout", "-f", REPO_REF]) - run(["git", "-C", str(repo_dir), "reset", "--hard", REPO_REF]) + run(["git", "-C", str(repo_dir), "fetch", "origin"]) + run(["git", "-C", str(repo_dir), "reset", "--hard", "origin/main"]) model_profile = job["model_profile"] model_path = repo_dir / "models"