From 9f80a8901b9479ac81d946c26aecdc0079121ee9 Mon Sep 17 00:00:00 2001 From: "git remote add origin git@github.com:luogaiyu/TickApp.git" Date: Sun, 4 May 2025 13:10:07 +0800 Subject: [PATCH 01/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E=20=E7=BA=BF=E4=B8=8A?= =?UTF-8?q?=E8=87=AA=E5=8A=A8=E8=BF=90=E8=A1=8C=E7=8E=AF=E5=A2=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml | 14 +++++++------- core/prompts_storage.py | 12 +++++++++++- core/step1_ytdlp.py | 12 +++++++++--- total.sh | 18 ++++++++++++++++++ 4 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 total.sh diff --git a/config.yaml b/config.yaml index 3622e7c8..eea03e11 100644 --- a/config.yaml +++ b/config.yaml @@ -6,7 +6,7 @@ display_language: "zh-CN" # API settings api: - key: 'your_api_key' + key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' base_url: 'https://api.302.ai' model: 'gemini-2.0-flash' @@ -25,7 +25,7 @@ whisper: # Whisper running mode ["local", "cloud", "elevenlabs"]. Specifies where to run, cloud uses 302.ai API runtime: 'local' # 302.ai API key - whisperX_302_api_key: 'your_302_api_key' + whisperX_302_api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' # ElevenLabs API key elevenlabs_api_key: 'your_elevenlabs_api_key' @@ -67,7 +67,7 @@ tts_method: 'f5tts' # SiliconFlow FishTTS sf_fish_tts: # SiliconFlow API key - api_key: 'YOUR_API_KEY' + api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' # only for mode "preset" voice: 'anna' # *only for mode "custom", dont set manually @@ -78,17 +78,17 @@ sf_fish_tts: # OpenAI TTS-1 API configuration, 302.ai API only openai_tts: - api_key: 'YOUR_302_API_KEY' + api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' voice: 'alloy' # Azure configuration, 302.ai API only azure_tts: - api_key: 'YOUR_302_API_KEY' + api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' voice: 'zh-CN-YunfengNeural' # FishTTS configuration, 302.ai API only fish_tts: - api_key: 'YOUR_302_API_KEY' + api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' character: 'AD学姐' character_id_dict: 'AD学姐': '7f92f8afb8ec43bf81429cc1c9199cb1' @@ -108,7 +108,7 @@ gpt_sovits: refer_mode: 3 f5tts: - 302_api: 'YOUR_302_API_KEY' + 302_api: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' # *Audio speed range speed_factor: diff --git a/core/prompts_storage.py b/core/prompts_storage.py index ea10e140..74324708 100644 --- a/core/prompts_storage.py +++ b/core/prompts_storage.py @@ -155,6 +155,13 @@ def get_prompt_faithfulness(lines, shared_prompt): 3. Understand the context: Fully comprehend and reflect the background and contextual relationships of the text. +### Response Format Requirements +- IMPORTANT: The response MUST be a single JSON object/dictionary, NOT a list +- Each line should be a key-value pair in the dictionary +- Keys should be the line numbers as strings +- DO NOT wrap the response in an array/list +- DO NOT include any explanation text outside the JSON structure + ## INPUT {lines} @@ -163,7 +170,10 @@ def get_prompt_faithfulness(lines, shared_prompt): ## Output in only JSON format {json.dumps(json_format, ensure_ascii=False, indent=4)} -Note: << >> represents placeholders that should not appear in your answer +Note: +1. << >> represents placeholders that should not appear in your answer +2. The output must be a SINGLE dictionary/object +3. The format should exactly match the example above ''' return prompt_faithfulness.strip() diff --git a/core/step1_ytdlp.py b/core/step1_ytdlp.py index f79d88e5..01f39105 100644 --- a/core/step1_ytdlp.py +++ b/core/step1_ytdlp.py @@ -15,15 +15,21 @@ def sanitize_filename(filename): def download_video_ytdlp(url, save_path='output', resolution='1080', cutoff_time=None): allowed_resolutions = ['360', '1080', 'best'] - if resolution not in allowed_resolutions: - resolution = '360' + # if resolution not in allowed_resolutions: + # resolution = '360' os.makedirs(save_path, exist_ok=True) + # 配置 yt-dlp 选项 ydl_opts = { + # 选择最佳视频和音频格式 'format': 'bestvideo+bestaudio/best' if resolution == 'best' else f'bestvideo[height<={resolution}]+bestaudio/best[height<={resolution}]', + # 输出模板 'outtmpl': f'{save_path}/%(title)s.%(ext)s', + # 不下载播放列表 'noplaylist': True, + # 写入缩略图 'writethumbnail': True, + # 缩略图处理 'postprocessors': [{ 'key': 'FFmpegThumbnailsConvertor', 'format': 'jpg', @@ -55,7 +61,7 @@ def download_video_ytdlp(url, save_path='output', resolution='1080', cutoff_time if new_filename != filename: os.rename(os.path.join(save_path, file), os.path.join(save_path, new_filename + ext)) - # cut the video to make demo + # 提供裁剪功能 但不是所有的都是用 if cutoff_time: print(f"Cutoff time: {cutoff_time}, Now checking video duration...") video_file = find_video_files(save_path) diff --git a/total.sh b/total.sh new file mode 100644 index 00000000..dc73ea31 --- /dev/null +++ b/total.sh @@ -0,0 +1,18 @@ +# conda activate videolingo +# rm -rf output/ +# python -m core.step1_ytdlp +# 语音识别 +python -m core.step2_whisperX + +# # # 文本分割 +python -m core.step3_1_spacy_split +python -m core.step3_2_splitbymeaning + +# # 文本处理和翻译 +python -m core.step4_1_summarize +python -m core.step4_2_translate_all + +# 字幕处理 +python -m core.step5_splitforsub +python -m core.step6_generate_final_timeline +python -m core.step7_merge_sub_to_vid From c3e1169beaec5ee1bf40bfd82d7661495268b61c Mon Sep 17 00:00:00 2001 From: "1449528975@qq.com" Date: Tue, 27 May 2025 23:27:28 +0800 Subject: [PATCH 02/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E=EF=BC=9Atotal.sh?= =?UTF-8?q?=E8=87=AA=E5=8A=A8=E8=BF=9B=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/step1_ytdlp.py | 2 +- total.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/step1_ytdlp.py b/core/step1_ytdlp.py index 01f39105..2f33bf7a 100644 --- a/core/step1_ytdlp.py +++ b/core/step1_ytdlp.py @@ -20,7 +20,7 @@ def download_video_ytdlp(url, save_path='output', resolution='1080', cutoff_time os.makedirs(save_path, exist_ok=True) # 配置 yt-dlp 选项 - ydl_opts = { + ydl_opts = { # 选择最佳视频和音频格式 'format': 'bestvideo+bestaudio/best' if resolution == 'best' else f'bestvideo[height<={resolution}]+bestaudio/best[height<={resolution}]', # 输出模板 diff --git a/total.sh b/total.sh index dc73ea31..9cb3ada0 100644 --- a/total.sh +++ b/total.sh @@ -1,6 +1,6 @@ -# conda activate videolingo -# rm -rf output/ -# python -m core.step1_ytdlp +conda activate videolingo +rm -rf output/ +python -m core.step1_ytdlp # 语音识别 python -m core.step2_whisperX From 1c6293999aabb8b84a34b35a44d63b52e0fed15d Mon Sep 17 00:00:00 2001 From: "1449528975@qq.com" Date: Sat, 31 May 2025 22:51:10 +0800 Subject: [PATCH 03/15] =?UTF-8?q?=E5=8F=98=E6=9B=B4:=E5=88=87=E6=8D=A2?= =?UTF-8?q?=E6=88=90youtube=E7=9A=84=E4=B8=8B=E8=BD=BD=E9=BB=98=E8=AE=A4?= =?UTF-8?q?=E4=B8=BA1080?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml | 2 +- core/step1_ytdlp.py | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/config.yaml b/config.yaml index eea03e11..da677a84 100644 --- a/config.yaml +++ b/config.yaml @@ -11,7 +11,7 @@ api: model: 'gemini-2.0-flash' # Language settings, written into the prompt, can be described in natural language -target_language: '简体中文' +target_language: 'Chinese' # Whether to use Demucs for vocal separation before transcription demucs: true diff --git a/core/step1_ytdlp.py b/core/step1_ytdlp.py index 2f33bf7a..4f583ca5 100644 --- a/core/step1_ytdlp.py +++ b/core/step1_ytdlp.py @@ -6,30 +6,22 @@ from core.config_utils import load_key def sanitize_filename(filename): - # Remove or replace illegal characters filename = re.sub(r'[<>:"/\\|?*]', '', filename) - # Ensure filename doesn't start or end with a dot or space filename = filename.strip('. ') - # Use default name if filename is empty return filename if filename else 'video' def download_video_ytdlp(url, save_path='output', resolution='1080', cutoff_time=None): allowed_resolutions = ['360', '1080', 'best'] - # if resolution not in allowed_resolutions: - # resolution = '360' - os.makedirs(save_path, exist_ok=True) # 配置 yt-dlp 选项 - ydl_opts = { + ydl_opts = { # 选择最佳视频和音频格式 'format': 'bestvideo+bestaudio/best' if resolution == 'best' else f'bestvideo[height<={resolution}]+bestaudio/best[height<={resolution}]', # 输出模板 'outtmpl': f'{save_path}/%(title)s.%(ext)s', - # 不下载播放列表 'noplaylist': True, # 写入缩略图 'writethumbnail': True, - # 缩略图处理 'postprocessors': [{ 'key': 'FFmpegThumbnailsConvertor', 'format': 'jpg', From ea7a86cc85e363f8a92577adc50a984bd98da522 Mon Sep 17 00:00:00 2001 From: "1449528975@qq.com" Date: Sun, 8 Jun 2025 18:13:32 +0800 Subject: [PATCH 04/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E:=20=E6=89=B9=E9=87=8F?= =?UTF-8?q?=E7=94=9F=E6=88=90=E6=A0=87=E9=A2=98=E5=92=8C=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E7=AE=80=E4=BB=8B|=E4=BF=AE=E5=A4=8D=E9=97=AE=E9=A2=98:prompt?= =?UTF-8?q?=E4=BC=98=E5=8C=96-=E4=BF=AE=E5=A4=8D=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E5=88=87=E8=AF=8D=E5=AF=BC=E8=87=B4=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../batch_processor_get_title_introduction.py | 146 ++++++++++++++++++ core/prompts_storage.py | 67 ++++++++ 2 files changed, 213 insertions(+) create mode 100644 batch/utils/batch_processor_get_title_introduction.py diff --git a/batch/utils/batch_processor_get_title_introduction.py b/batch/utils/batch_processor_get_title_introduction.py new file mode 100644 index 00000000..773b0d46 --- /dev/null +++ b/batch/utils/batch_processor_get_title_introduction.py @@ -0,0 +1,146 @@ +import os, sys +sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))#补充当前文件的父目录到系统路径 +from core.ask_gpt import ask_gpt +import re +from pathlib import Path +from core.prompts_storage import get_title_introduction_prompt +import pandas as pd +from rich import print as rprint +import json +import shutil + +def clean_srt_date(srt_date): + pass + +#### 为了提高主页的CTR,需要手动优化标题前缀 +prefix_base_dir = '麻省理工分布式系统-' +### + +def read_all_trans_srt(): + ''' + need move to root path + ''' + rprint(f"[yellow]read the srt from the output folder....[/yellow]") + all_trans_str=[] + print(os.path.dirname(__file__)) + base_path = Path(os.path.dirname(__file__)).parent / 'output' + # 获取所有Lecture文件夹并按数字排序 + lecture_folders = [f for f in base_path.iterdir() if f.is_dir() and f.name.startswith("Lecture")] + lecture_folders.sort(key=lambda x: int(re.search(r'Lecture (\d+)', x.name).group(1))) + + # 按顺序读取每个trans.srt文件 + for folder in lecture_folders: + trans_file = folder / "trans.srt" + if trans_file.exists(): + with open(trans_file, 'r', encoding='utf-8') as f: + content = str(trans_file.parent) + '||' + str(trans_file.parent.name) + '||' + f.read() + print(content) + all_trans_str.append(content) + rprint(f"[green]🎉 read the all srt {len(all_trans_str)} from the output folder completed![/green]") + rprint(f"[green]=================================================[/green]") + rprint(f"[green]{all_trans_str}[/green]") + rprint(f"[green]=================================================[/green]") + return all_trans_str + + +def copy_and_rename_videos(responses, result_dir_path): + """ + 根据生成的标题,复制并重命名视频文件 + + Args: + responses: 包含file_path和title的字典列表 + result_dir_path: 目标目录路径 (字符串或Path对象) + + Returns: + tuple: (成功数量, 总数量) + """ + rprint(f"[yellow]start copy and rename videos....[/yellow]") + # 创建result目录 + result_dir = Path(result_dir_path) + result_dir.mkdir(exist_ok=True) + + total_count = len(responses) + rprint(f"[yellow]start copy and rename videos....[/yellow]") + for i, response in enumerate(responses, 1): + file_path = response.get('file_path', '') + title = response.get('title', f'unknown_{i}') + + rprint(f"[yellow]🌐 processing {i+1}/{total_count}[/yellow]") + # 构建源文件路径 + source_video = Path(file_path) / 'output_sub.mp4' + + if source_video.exists(): + # 清理标题作为新文件名 + new_filename = f"{prefix_base_dir} + {title}.mp4" + target_path = result_dir / new_filename + + try: + # 复制文件 + shutil.copy2(source_video, target_path) + except Exception as e: + rprint(f"[red]❌ 复制失败: {e}[/red]") + else: + rprint(f"[red]⚠️ 源文件不存在: {source_video}[/red]") + + +# 根据当前output的目录中的简介调用大模型 +# 批量生成视频的标题和简介 +def get_tasks_setting_info(): + base_path = Path(os.path.dirname(__file__)).parent / 'tasks_setting.xlsx' + df = pd.read_excel(base_path) + return df + +def json_valid(response_data): + try: + json.loads(response_data) + return response_data + except (json.JSONDecodeError, ValueError): + return "{'title':'error', 'introduction':'error'}" + +def get_title_introduction_batch(): + responses = [] + all_trans_srt =read_all_trans_srt() + + trans_srt_len = len(all_trans_srt) + for i in range(trans_srt_len): + trans_srt = all_trans_srt[i] + rprint(f"[yellow]🌐 processing {i}/{trans_srt_len}[/yellow]") + prompt = get_title_introduction_prompt(trans_srt); + try: + response = ask_gpt(prompt, response_json=True, log_title='subtitle_trim') + responses.append(response) + rprint(f"[yellow]{responses[-1]}[/yellow]") + except Exception as e: + print(f"Error: {e}") + flat_responses = [] + # 去除responses数组中的空字符串元素 + for item in responses: + if isinstance(item, list): + # 如果是列表,展开里面的字典 + flat_responses.extend(item) + elif isinstance(item, dict): + # 如果是字典,直接添加 + flat_responses.append(item) + + responses = flat_responses + rprint(f"[green]=================================================[/green]") + rprint(f"[green]🎉 responses:[/green]") + rprint(responses) + rprint("[green]🎉 All processing completed![/green]") + + base_path = Path(os.path.dirname(__file__)).parent + + tasks_setting_info = get_tasks_setting_info() + + copy_and_rename_videos(responses, base_path / 'result') + # 将 responses JSON 数组转换成 DataFrame + responses_df = pd.DataFrame(responses) + result_df = pd.concat([tasks_setting_info, responses_df], axis=1) + result_df.to_excel(base_path / 'result.xlsx' , index=False, engine='openpyxl') + + + + + +if __name__ == "__main__": + get_title_introduction_batch() \ No newline at end of file diff --git a/core/prompts_storage.py b/core/prompts_storage.py index 74324708..73e94b9b 100644 --- a/core/prompts_storage.py +++ b/core/prompts_storage.py @@ -18,6 +18,27 @@ def get_split_prompt(sentence, num_parts = 2, word_limit = 20): 3. Split at natural points like punctuation marks or conjunctions 4. If provided text is repeated words, simply split at the middle of the repeated words. +## Example +Input: 'This is a long sentence that needs splitting for subtitles' +Expected Output JSON: +{{ + "analysis": "Long sentence about subtitle splitting, can be split after 'sentence'", + "split": "This is a long sentence[br]that needs splitting for subtitles" +}} + +Input: 'Machine learning algorithms are becoming increasingly sophisticated and powerful in modern applications' +Expected Output JSON: +{{ + "analysis": "Technical sentence about ML, natural split after 'sophisticated'", + "split": "Machine learning algorithms are becoming increasingly sophisticated[br]and powerful in modern applications" +}} + +## Output Requirements +- MUST use [br] tags to mark split positions +- MUST return valid JSON format +- The "split" field MUST contain the complete sentence with [br] inserted at split points +- Do NOT split the sentence into separate parts, keep it as one string with [br] markers + ## Output in only JSON format {{ "analysis": "Brief analysis of the text structure", @@ -344,3 +365,49 @@ def get_correct_text_prompt(text): "text": "cleaned text here" }} '''.strip() + + +## ================================================================ +## @ batch_processor_get_title_introduction.py +def get_title_introduction_prompt(text): + return f''' +## Role +You are a professional video title and introduction generator for Bilibili platform. + +## Task +1. Extract the file path from the input (before first "||") +2. Extract the original title (between first and second "||") and get the lecture number +3. Analyze the SRT subtitle content (after second "||") to understand the video topic +4. Generate appropriate title and introduction based on the subtitle content + +## Requirements +1. Title must be concise and attractive for Chinese audience +2. Introduction should be engaging but not too verbose +3. Format should follow Bilibili style +4. Title must include the chapter number as prefix + +## Format Requirements +- Title format: 第X章:[核心主题] 关键词1-关键词2-关键词3 (总长度不超过35字) +- Introduction format: 30-50字的简洁介绍,要有吸引力但不冗长 + +## Examples +Good title: 第20章:[Raft算法] 日志复制-选举机制-一致性保证 +Good introduction: 深入浅出讲解Raft算法核心机制,包含日志复制、领导选举等关键概念,助你轻松掌握分布式一致性! + +## INPUT Format +The input contains: file_path||original_title||srt_content +Where: +- file_path: The complete path to the subtitle file +- original_title: The original lecture title (e.g., "Lecture 20: Blockstack") +- srt_content: The subtitle content with timestamps and text + +## INPUT +{text} + +## Output in only JSON format +{{ + "file_path": "提取的完整文件路径", + "title": "第X章:[核心主题] 关键词1-关键词2-关键词3", + "introduction": "基于字幕内容生成的30-50字简洁介绍" +}} +''' \ No newline at end of file From 34184d8546d9d6fe98998af0093ac72d3e38b8a2 Mon Sep 17 00:00:00 2001 From: "1449528975@qq.com" Date: Sun, 29 Jun 2025 14:17:53 +0800 Subject: [PATCH 05/15] Fix: update prompts_storage.py to avoid some error when model return unformat data --- core/prompts_storage.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/core/prompts_storage.py b/core/prompts_storage.py index ea10e140..74324708 100644 --- a/core/prompts_storage.py +++ b/core/prompts_storage.py @@ -155,6 +155,13 @@ def get_prompt_faithfulness(lines, shared_prompt): 3. Understand the context: Fully comprehend and reflect the background and contextual relationships of the text. +### Response Format Requirements +- IMPORTANT: The response MUST be a single JSON object/dictionary, NOT a list +- Each line should be a key-value pair in the dictionary +- Keys should be the line numbers as strings +- DO NOT wrap the response in an array/list +- DO NOT include any explanation text outside the JSON structure + ## INPUT {lines} @@ -163,7 +170,10 @@ def get_prompt_faithfulness(lines, shared_prompt): ## Output in only JSON format {json.dumps(json_format, ensure_ascii=False, indent=4)} -Note: << >> represents placeholders that should not appear in your answer +Note: +1. << >> represents placeholders that should not appear in your answer +2. The output must be a SINGLE dictionary/object +3. The format should exactly match the example above ''' return prompt_faithfulness.strip() From a9643c8b06be4b9ad32bc20b44508de9bf0d4f5d Mon Sep 17 00:00:00 2001 From: "1449528975@qq.com" Date: Sat, 19 Jul 2025 20:30:18 +0800 Subject: [PATCH 06/15] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=89=8D=E7=BC=80?= =?UTF-8?q?=EF=BC=8C=E4=BB=A5=E5=8F=8Aprompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../batch_processor_get_title_introduction.py | 4 +-- core/prompts_storage.py | 34 +++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/batch/utils/batch_processor_get_title_introduction.py b/batch/utils/batch_processor_get_title_introduction.py index 773b0d46..28ca5d72 100644 --- a/batch/utils/batch_processor_get_title_introduction.py +++ b/batch/utils/batch_processor_get_title_introduction.py @@ -13,7 +13,7 @@ def clean_srt_date(srt_date): pass #### 为了提高主页的CTR,需要手动优化标题前缀 -prefix_base_dir = '麻省理工分布式系统-' +prefix_base_dir = '[加州大学伯克利分校-电子工程与计算机科学]-' ### def read_all_trans_srt(): @@ -25,7 +25,7 @@ def read_all_trans_srt(): print(os.path.dirname(__file__)) base_path = Path(os.path.dirname(__file__)).parent / 'output' # 获取所有Lecture文件夹并按数字排序 - lecture_folders = [f for f in base_path.iterdir() if f.is_dir() and f.name.startswith("Lecture")] + lecture_folders = [f for f in base_path.iterdir() if f.is_dir() ] lecture_folders.sort(key=lambda x: int(re.search(r'Lecture (\d+)', x.name).group(1))) # 按顺序读取每个trans.srt文件 diff --git a/core/prompts_storage.py b/core/prompts_storage.py index 73e94b9b..f8f4dc82 100644 --- a/core/prompts_storage.py +++ b/core/prompts_storage.py @@ -388,11 +388,39 @@ def get_title_introduction_prompt(text): ## Format Requirements - Title format: 第X章:[核心主题] 关键词1-关键词2-关键词3 (总长度不超过35字) -- Introduction format: 30-50字的简洁介绍,要有吸引力但不冗长 +- Introduction format: 至少400字的简洁介绍,要求有吸引力,并且段落清晰 ## Examples Good title: 第20章:[Raft算法] 日志复制-选举机制-一致性保证 -Good introduction: 深入浅出讲解Raft算法核心机制,包含日志复制、领导选举等关键概念,助你轻松掌握分布式一致性! +Good introduction: +🎲 精彩内容: + MIT教授用生动的赌博游戏演示概率论 + 深入解析著名的蒙提霍尔问题 + 揭示统计数据背后的真相与陷阱 + 探讨条件概率在现实生活中的应用 +🎯 核心知识点: + 条件概率的基本概念与计算 + 概率树方法的应用 + 容斥原理在概率中的运用 + 统计数据的正确解读方法 +💡 精彩案例: + 蒙提霍尔游戏的完整分析 + 伯克利性别歧视案例研究 + 航空公司准点率的统计陷阱 + 赌场骰子游戏的概率分析 +⏰ 重要时间点: + 00:00 课程介绍 + 05:23 蒙提霍尔问题详解 + 32:15 条件概率基础 + 45:30 统计陷阱案例分析 + 58:20 实际应用讨论 +🎓 适合人群: + 数学专业学生 + 概率论初学者 + 数据分析从业者 + 对统计学感兴趣的观众 +#数学教育 #概率论 #MIT公开课 #统计学 #数据分析 + ## INPUT Format The input contains: file_path||original_title||srt_content @@ -408,6 +436,6 @@ def get_title_introduction_prompt(text): {{ "file_path": "提取的完整文件路径", "title": "第X章:[核心主题] 关键词1-关键词2-关键词3", - "introduction": "基于字幕内容生成的30-50字简洁介绍" + "introduction": "至少400字的简洁介绍,要求有吸引力,并且段落清晰,参考上面的例子" }} ''' \ No newline at end of file From 1cb2dee3af730cd277e2fbd81187bf8a16b6106a Mon Sep 17 00:00:00 2001 From: darkchunk Date: Sat, 27 Dec 2025 20:43:15 +0800 Subject: [PATCH 07/15] =?UTF-8?q?=E6=96=B0=E5=A2=9Eutils:=20=E5=88=87?= =?UTF-8?q?=E5=88=86=E8=A7=86=E9=A2=91=E7=9A=84=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/split_video_utils/split_video.py | 957 ++++++++++++++++++++++++ core/split_video_utils/video_analyse.py | 272 +++++++ core/step2_whisperX.py | 2 +- core/step7_merge_sub_to_vid.py | 9 +- 4 files changed, 1235 insertions(+), 5 deletions(-) create mode 100644 core/split_video_utils/split_video.py create mode 100644 core/split_video_utils/video_analyse.py diff --git a/core/split_video_utils/split_video.py b/core/split_video_utils/split_video.py new file mode 100644 index 00000000..9bd09f68 --- /dev/null +++ b/core/split_video_utils/split_video.py @@ -0,0 +1,957 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import os +import sys +import subprocess +import argparse +import json +from pathlib import Path +from rich.console import Console +from rich.panel import Panel +from rich import print as rprint + +# 创建控制台对象 +console = Console() + +def format_time(seconds): + """格式化时间显示""" + if seconds < 0: + return "0:00.000" + + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + secs = seconds % 60 + + if hours > 0: + return f"{hours}:{minutes:02d}:{secs:06.3f}" + else: + return f"{minutes}:{secs:06.3f}" + +def get_video_duration(video_path): + """获取视频时长""" + try: + cmd = [ + 'ffprobe', + '-v', 'quiet', + '-print_format', 'json', + '-show_format', + video_path + ] + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + + if result.returncode == 0: + info = json.loads(result.stdout) + duration = float(info['format']['duration']) + return duration + else: + rprint(f"[red]❌ 获取视频时长失败[/red]") + return None + + except Exception as e: + rprint(f"[red]❌ 获取视频时长错误: {e}[/red]") + return None + +def check_demucs_installation(): + """检查Demucs是否安装""" + try: + result = subprocess.run(['python', '-c', 'import demucs'], + capture_output=True, text=True, timeout=10) + return result.returncode == 0 + except: + return False + +def extract_video_segment(input_path, start_time, duration, output_path): + """提取视频片段""" + rprint(f"[cyan]✂️ 提取视频片段: {format_time(start_time)} - {format_time(start_time + duration)}[/cyan]") + + cmd = [ + 'ffmpeg', + '-i', input_path, + '-ss', str(start_time), + '-t', str(duration), + '-c', 'copy', + output_path, + '-y' + ] + + try: + subprocess.run(cmd, check=True, capture_output=True, timeout=60) + rprint(f"[green]✓ 视频片段提取完成: {os.path.basename(output_path)}[/green]") + return True + except Exception as e: + rprint(f"[red]❌ 视频片段提取失败: {e}[/red]") + return False + +def extract_audio_from_video(video_path, output_audio_path): + """从视频中提取音频""" + rprint(f"[cyan]🎵 提取音频: {os.path.basename(video_path)} -> {os.path.basename(output_audio_path)}[/cyan]") + + cmd = [ + 'ffmpeg', + '-i', video_path, + '-vn', + '-acodec', 'libmp3lame', + '-ab', '192k', + '-ar', '44100', + output_audio_path, + '-y' + ] + + try: + subprocess.run(cmd, check=True, capture_output=True, timeout=30) + rprint(f"[green]✓ 音频提取完成: {os.path.basename(output_audio_path)}[/green]") + return True + except Exception as e: + rprint(f"[red]❌ 音频提取失败: {e}[/red]") + return False + +def separate_vocals_with_demucs(audio_path, output_dir): + """使用Demucs分离人声""" + rprint(f"[cyan]🎤 使用Demucs分离人声: {os.path.basename(audio_path)}[/cyan]") + + try: + # 创建临时目录 + temp_dir = os.path.join(output_dir, "demucs_temp") + os.makedirs(temp_dir, exist_ok=True) + + # 运行Demucs + cmd = [ + 'python', '-m', 'demucs.separate', + '--two-stems=vocals', + '-o', temp_dir, + audio_path + ] + + with console.status("[yellow]🎤 分离人声中...", spinner="dots"): + result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) + + if result.returncode == 0: + # 查找输出文件 + audio_name = os.path.splitext(os.path.basename(audio_path))[0] + vocals_path = None + no_vocals_path = None + + # 搜索输出文件 + for root, dirs, files in os.walk(temp_dir): + for file in files: + if 'vocals' in file and audio_name in file: + vocals_path = os.path.join(root, file) + elif 'no_vocals' in file and audio_name in file: + no_vocals_path = os.path.join(root, file) + + if vocals_path: + # 移动到输出目录 + final_vocals_path = os.path.join(output_dir, f"{audio_name}_vocals.mp3") + final_no_vocals_path = os.path.join(output_dir, f"{audio_name}_no_vocals.mp3") + + # 转换为mp3格式 + if vocals_path.endswith('.wav'): + subprocess.run([ + 'ffmpeg', '-i', vocals_path, + '-acodec', 'libmp3lame', final_vocals_path, '-y' + ], capture_output=True) + else: + subprocess.run(['cp', vocals_path, final_vocals_path]) + + if no_vocals_path and no_vocals_path.endswith('.wav'): + subprocess.run([ + 'ffmpeg', '-i', no_vocals_path, + '-acodec', 'libmp3lame', final_no_vocals_path, '-y' + ], capture_output=True) + elif no_vocals_path: + subprocess.run(['cp', no_vocals_path, final_no_vocals_path]) + + rprint(f"[green]✓ 人声分离完成:[/green]") + rprint(f" [cyan]🎤 人声: {os.path.basename(final_vocals_path)}[/cyan]") + rprint(f" [cyan]🎵 伴奏: {os.path.basename(final_no_vocals_path)}[/cyan]") + + return final_vocals_path, final_no_vocals_path + else: + rprint(f"[red]❌ 未找到人声分离输出文件[/red]") + return None, None + else: + rprint(f"[red]❌ Demucs分离失败: {result.stderr}[/red]") + return None, None + + except Exception as e: + rprint(f"[red]❌ 人声分离错误: {e}[/red]") + return None, None + +def generate_cut_segments(cut_points, total_duration): + """根据切分点生成段落信息""" + segments = [] + + # 第一个段落:从开始到第一个切分点 + if cut_points: + segments.append({ + 'index': 1, + 'start': 0, + 'end': cut_points[0]['actual'], + 'duration': cut_points[0]['actual'], + 'cut_type': 'start' + }) + + # 中间段落 + for i in range(len(cut_points) - 1): + segments.append({ + 'index': i + 2, + 'start': cut_points[i]['actual'], + 'end': cut_points[i + 1]['actual'], + 'duration': cut_points[i + 1]['actual'] - cut_points[i]['actual'], + 'cut_type': 'middle' + }) + + # 最后一个段落:从最后一个切分点到结束 + segments.append({ + 'index': len(cut_points) + 1, + 'start': cut_points[-1]['actual'], + 'end': total_duration, + 'duration': total_duration - cut_points[-1]['actual'], + 'cut_type': 'end' + }) + else: + # 没有切分点,整个视频作为一个段落 + segments.append({ + 'index': 1, + 'start': 0, + 'end': total_duration, + 'duration': total_duration, + 'cut_type': 'whole' + }) + + return segments + +def detect_silence_fixed(audio_path, noise_db=-25, min_duration=0.1): + """修复的静音检测函数""" + rprint(f"[cyan]🔍 检测静音段 ({noise_db}dB, ≥{min_duration}s)...[/cyan]") + + cmd = [ + 'ffmpeg', + '-i', audio_path, + '-af', f'silencedetect=noise={noise_db}dB:duration={min_duration}', + '-f', 'null', + '-', + '-v', 'info' + ] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + + silence_periods = [] + current_silence_start = None + + for line in result.stderr.split('\n'): + line = line.strip() + + # 解析 silence_start + if 'silence_start:' in line: + try: + start_part = line.split('silence_start:')[1].strip() + silence_start = float(start_part.split()[0]) + current_silence_start = silence_start + except Exception: + continue + + # 解析 silence_end + elif 'silence_end:' in line and current_silence_start is not None: + try: + parts = line.split('silence_end:')[1] + + if '|' in parts: + end_part = parts.split('|')[0].strip() + duration_part = parts.split('silence_duration:')[1].strip() + silence_end = float(end_part) + silence_duration = float(duration_part) + else: + silence_end = float(parts.strip()) + silence_duration = silence_end - current_silence_start + + if silence_duration >= min_duration: + silence_periods.append({ + 'start': current_silence_start, + 'end': silence_end, + 'duration': silence_duration, + 'center': (current_silence_start + silence_end) / 2 + }) + + current_silence_start = None + + except Exception: + continue + + if silence_periods: + rprint(f"[green]✓ 找到 {len(silence_periods)} 个静音段 ({noise_db}dB, ≥{min_duration}s)[/green]") + + # 按时长分类 + short_silences = [s for s in silence_periods if 0.1 <= s['duration'] < 0.5] + medium_silences = [s for s in silence_periods if 0.5 <= s['duration'] < 1.0] + long_silences = [s for s in silence_periods if s['duration'] >= 1.0] + + rprint(f" [dim]短静音(0.1-0.5s): {len(short_silences)} | " + f"中静音(0.5-1.0s): {len(medium_silences)} | " + f"长静音(1.0s+): {len(long_silences)}[/dim]") + + # 显示详细信息 + for i, period in enumerate(silence_periods[:10]): + silence_type = "🔸" if period['duration'] < 0.5 else "🔹" if period['duration'] < 1.0 else "🔶" + rprint(f" {silence_type} {i+1:2d}. {format_time(period['start'])} - {format_time(period['end'])} " + f"({period['duration']:.3f}s) 中点: {format_time(period['center'])}") + + if len(silence_periods) > 10: + rprint(f" ... 还有 {len(silence_periods) - 10} 个静音段") + else: + rprint(f"[yellow]⚠️ 未找到符合条件的静音段 ({noise_db}dB, ≥{min_duration}s)[/yellow]") + + return silence_periods + + except Exception as e: + rprint(f"[red]❌ 静音检测失败: {e}[/red]") + return [] + +def detect_speech_pauses_fixed(audio_path, audio_type="音频"): + """修复的人声停顿检测""" + rprint(f"[cyan]🎤 检测{audio_type}中的人声停顿...[/cyan]") + + # 精细参数配置 + speech_configs = [ + (-15, 0.05, "词间停顿(-15dB, 50ms)", "词间"), + (-18, 0.05, "短句停顿(-18dB, 50ms)", "短句"), + (-20, 0.05, "句间停顿(-20dB, 50ms)", "句间"), + (-25, 0.05, "段落停顿(-25dB, 50ms)", "段落"), + (-15, 0.1, "词间停顿(-15dB, 100ms)", "词间"), + (-18, 0.1, "短句停顿(-18dB, 100ms)", "短句"), + (-20, 0.1, "句间停顿(-20dB, 100ms)", "句间"), + (-25, 0.1, "段落停顿(-25dB, 100ms)", "段落"), + (-15, 0.15, "长词间(-15dB, 150ms)", "长词间"), + (-18, 0.15, "长句间(-18dB, 150ms)", "长句间"), + (-20, 0.15, "自然停顿(-20dB, 150ms)", "自然"), + ] + + all_results = [] + + for noise_db, min_duration, desc, pause_type in speech_configs: + cmd = [ + 'ffmpeg', + '-i', audio_path, + '-af', f'silencedetect=noise={noise_db}dB:duration={min_duration}', + '-f', 'null', + '-', + '-v', 'info' + ] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=15) + + silence_periods = [] + current_silence_start = None + + for line in result.stderr.split('\n'): + line = line.strip() + + if 'silence_start:' in line: + try: + start_part = line.split('silence_start:')[1].strip() + silence_start = float(start_part.split()[0]) + current_silence_start = silence_start + except: + continue + + elif 'silence_end:' in line and current_silence_start is not None: + try: + parts = line.split('silence_end:')[1] + + if '|' in parts: + end_part = parts.split('|')[0].strip() + duration_part = parts.split('silence_duration:')[1].strip() + silence_end = float(end_part) + silence_duration = float(duration_part) + else: + silence_end = float(parts.strip()) + silence_duration = silence_end - current_silence_start + + if silence_duration >= min_duration: + silence_periods.append({ + 'start': current_silence_start, + 'end': silence_end, + 'duration': silence_duration, + 'center': (current_silence_start + silence_end) / 2, + 'type': pause_type + }) + current_silence_start = None + except: + continue + + # 按停顿时长分类 + micro_pauses = [s for s in silence_periods if 0.05 <= s['duration'] < 0.1] + short_pauses = [s for s in silence_periods if 0.1 <= s['duration'] < 0.2] + medium_pauses = [s for s in silence_periods if 0.2 <= s['duration'] < 0.5] + long_pauses = [s for s in silence_periods if s['duration'] >= 0.5] + + result_info = { + 'config': (noise_db, min_duration, desc, pause_type), + 'silences': silence_periods, + 'count': len(silence_periods), + 'micro': len(micro_pauses), + 'short': len(short_pauses), + 'medium': len(medium_pauses), + 'long': len(long_pauses) + } + all_results.append(result_info) + + if silence_periods: + rprint(f" [green]✓ {desc}: {len(silence_periods):3d} 个停顿[/green] " + f"[dim](微:{len(micro_pauses)} 短:{len(short_pauses)} 中:{len(medium_pauses)} 长:{len(long_pauses)})[/dim]") + + # 显示前3个停顿 + for i, period in enumerate(silence_periods[:3]): + if period['duration'] < 0.1: + icon = "🔸" + elif period['duration'] < 0.2: + icon = "🔹" + elif period['duration'] < 0.5: + icon = "🔷" + else: + icon = "🔶" + + rprint(f" {icon} {i+1}. {format_time(period['start'])} - {format_time(period['end'])} " + f"({period['duration']*1000:5.0f}ms) [{period['type']}]") + + if len(silence_periods) > 3: + rprint(f" ... 还有 {len(silence_periods) - 3} 个停顿") + else: + rprint(f" [red]✗ {desc}: 0 个停顿[/red]") + + except Exception as e: + rprint(f" [red]✗ {desc}: 检测失败 - {e}[/red]") + + return all_results + +def find_optimal_speech_cuts_fixed(all_results, target_interval_minutes=30, total_duration=None): + """从人声停顿中找到最佳切分点""" + rprint(f"\n[cyan]🎯 从人声停顿中寻找{target_interval_minutes}分钟间隔的最佳切分点...[/cyan]") + + if not total_duration: + rprint("[red]❌ 需要提供总时长[/red]") + return [] + + # 选择最佳的检测结果 + best_result = None + + for result in all_results: + count = result['count'] + config = result['config'] + + if count >= 3: # 至少要有3个停顿 + score = 0 + + # 基础分数 + if 5 <= count <= 30: + score += 10 + elif count >= 3: + score += 5 + + # 停顿类型加分 + score += result['short'] * 2 + score += result['medium'] * 1.5 + score += result['micro'] * 1 + + # 噪音阈值加分 + if config[0] >= -20: + score += 3 + elif config[0] >= -25: + score += 2 + + # 时长加分 + if 0.05 <= config[1] <= 0.15: + score += 3 + elif 0.05 <= config[1] <= 0.2: + score += 2 + + result['score'] = score + + if best_result is None or score > best_result['score']: + best_result = result + + if not best_result: + rprint("[red]❌ 未找到合适的停顿检测结果[/red]") + return [] + + config = best_result['config'] + silences = best_result['silences'] + + rprint(f"[green]🏆 选择最佳配置: {config[2]} (评分: {best_result['score']:.1f})[/green]") + rprint(f"[yellow]📊 停顿统计: 总计{len(silences)}个 | " + f"微停顿{best_result['micro']}个 | 短停顿{best_result['short']}个 | " + f"中停顿{best_result['medium']}个 | 长停顿{best_result['long']}个[/yellow]") + + # 计算目标切分点 + target_seconds = target_interval_minutes * 60 + target_points = [] + + current = target_seconds + while current < total_duration - 60: + target_points.append(current) + current += target_seconds + + if not target_points: + rprint(f"[yellow]⚠️ 音频时长不足以按{target_interval_minutes}分钟切分[/yellow]") + return [] + + rprint(f"[yellow]🎯 目标切分点: {len(target_points)} 个[/yellow]") + + cut_points = [] + + for i, target_point in enumerate(target_points): + rprint(f"[yellow]🔍 切分点 {i+1} (目标: {format_time(target_point)}):[/yellow]") + + # 在目标点前后寻找最佳停顿 + search_ranges = [15, 30, 60, 120, 300] + + found_cut = False + + for search_range in search_ranges: + if found_cut: + break + + search_start = max(0, target_point - search_range) + search_end = min(total_duration, target_point + search_range) + + # 找到范围内的停顿 + candidates = [] + for silence in silences: + if search_start <= silence['center'] <= search_end: + distance = abs(silence['center'] - target_point) + + # 评分系统 + duration_score = 1.0 + if 0.1 <= silence['duration'] <= 0.3: + duration_score = 2.0 + elif 0.05 <= silence['duration'] <= 0.5: + duration_score = 1.5 + + distance_score = 1.0 / (distance + 1) + + type_score = 1.0 + if silence['type'] in ['句间', '自然', '段落']: + type_score = 1.5 + elif silence['type'] in ['短句', '长句间']: + type_score = 1.3 + + total_score = duration_score * distance_score * type_score + + candidates.append({ + 'silence': silence, + 'distance': distance, + 'score': total_score + }) + + if candidates: + candidates.sort(key=lambda x: (-x['score'], x['distance'])) + best = candidates[0] + + cut_points.append({ + 'target': target_point, + 'actual': best['silence']['center'], + 'deviation': best['silence']['center'] - target_point, + 'silence_start': best['silence']['start'], + 'silence_end': best['silence']['end'], + 'silence_duration': best['silence']['duration'], + 'silence_type': best['silence']['type'], + 'search_range': search_range, + 'score': best['score'] + }) + + rprint(f" [green]✓ 找到停顿: {format_time(best['silence']['center'])} " + f"(偏差 {best['silence']['center'] - target_point:+.1f}s, " + f"停顿 {best['silence']['duration']*1000:.0f}ms, " + f"类型: {best['silence']['type']}, " + f"搜索范围 ±{search_range}s)[/green]") + + found_cut = True + else: + rprint(f" [yellow]⚠️ ±{search_range}s范围内无合适停顿[/yellow]") + + if not found_cut: + fallback_time = min(target_point + 30, total_duration - 30) + cut_points.append({ + 'target': target_point, + 'actual': fallback_time, + 'deviation': fallback_time - target_point, + 'silence_start': fallback_time, + 'silence_end': fallback_time, + 'silence_duration': 0, + 'silence_type': 'fallback', + 'search_range': 0, + 'score': 0 + }) + rprint(f" [red]✗ 无合适停顿,使用备选点 {format_time(fallback_time)}[/red]") + + return cut_points + +def extract_audio_from_video_large(video_path, output_audio_path, timeout_minutes=10): + """从大视频中提取音频,增加超时时间""" + rprint(f"[cyan]🎵 提取音频 (大文件模式): {os.path.basename(video_path)} -> {os.path.basename(output_audio_path)}[/cyan]") + + cmd = [ + 'ffmpeg', + '-i', video_path, + '-vn', + '-acodec', 'libmp3lame', + '-ab', '128k', + '-ar', '22050', + '-ac', '1', + output_audio_path, + '-y' + ] + + try: + timeout_seconds = timeout_minutes * 60 + with console.status(f"[yellow]🎵 提取音频中... (最多等待{timeout_minutes}分钟)", spinner="dots"): + subprocess.run(cmd, check=True, capture_output=True, timeout=timeout_seconds) + rprint(f"[green]✓ 音频提取完成: {os.path.basename(output_audio_path)}[/green]") + return True + except subprocess.TimeoutExpired: + rprint(f"[red]❌ 音频提取超时({timeout_minutes}分钟)[/red]") + return False + except Exception as e: + rprint(f"[red]❌ 音频提取失败: {e}[/red]") + return False + +def find_cut_points_from_silences(silences, target_interval_minutes=30, total_duration=None): + """从静音段中找到最佳切分点""" + rprint(f"[cyan]🎯 从静音段中寻找{target_interval_minutes}分钟间隔的切分点...[/cyan]") + + if not total_duration: + rprint("[red]❌ 需要提供总时长[/red]") + return [] + + # 计算目标切分点 + target_seconds = target_interval_minutes * 60 + target_points = [] + + current = target_seconds + while current < total_duration - 60: + target_points.append(current) + current += target_seconds + + if not target_points: + rprint(f"[yellow]⚠️ 音频时长不足以按{target_interval_minutes}分钟切分[/yellow]") + return [] + + rprint(f"[yellow]🎯 目标切分点: {len(target_points)} 个[/yellow]") + rprint(f"[yellow]📊 可用静音段: {len(silences)} 个[/yellow]") + + cut_points = [] + + for i, target_point in enumerate(target_points): + rprint(f"[yellow]🔍 切分点 {i+1} (目标: {format_time(target_point)}):[/yellow]") + + # 在目标点前后寻找最佳静音段 + search_ranges = [30, 60, 120, 300, 600] + + found_cut = False + + for search_range in search_ranges: + if found_cut: + break + + search_start = max(0, target_point - search_range) + search_end = min(total_duration, target_point + search_range) + + # 找到范围内的静音段 + candidates = [] + for silence in silences: + if search_start <= silence['center'] <= search_end: + distance = abs(silence['center'] - target_point) + # 评分:静音时长越长越好,距离目标点越近越好 + score = silence['duration'] / (distance + 1) + candidates.append({ + 'silence': silence, + 'distance': distance, + 'score': score + }) + + if candidates: + # 按评分排序 + candidates.sort(key=lambda x: (-x['score'], x['distance'])) + best = candidates[0] + + cut_points.append({ + 'target': target_point, + 'actual': best['silence']['center'], + 'deviation': best['silence']['center'] - target_point, + 'silence_start': best['silence']['start'], + 'silence_end': best['silence']['end'], + 'silence_duration': best['silence']['duration'], + 'search_range': search_range + }) + + rprint(f" [green]✓ 切分点: {format_time(best['silence']['center'])} " + f"(偏差 {best['silence']['center'] - target_point:+.1f}s, " + f"静音 {best['silence']['duration']:.3f}s, " + f"搜索范围 ±{search_range}s)[/green]") + + found_cut = True + else: + rprint(f" [yellow]⚠️ ±{search_range}s范围内无静音段[/yellow]") + + if not found_cut: + fallback_time = min(target_point + 60, total_duration - 60) + cut_points.append({ + 'target': target_point, + 'actual': fallback_time, + 'deviation': fallback_time - target_point, + 'silence_start': fallback_time, + 'silence_end': fallback_time, + 'silence_duration': 0, + 'search_range': 0, + 'type': 'fallback' + }) + rprint(f" [red]✗ 无合适静音段,使用备选点 {format_time(fallback_time)}[/red]") + + return cut_points + +def process_video_segments_25db(input_path, output_dir, segment_duration=30, target_interval=30): + """处理视频片段并基于人声停顿检测切分点""" + + rprint(Panel.fit("[bold magenta]🚀 基于人声停顿的智能切分工具 (修复版)[/bold magenta]", border_style="magenta")) + + # 检查文件 + if not os.path.exists(input_path): + rprint(f"[bold red]❌ 文件不存在: {input_path}[/bold red]") + return + + rprint(f"[green]✓ 输入文件[/green]: [cyan]{os.path.basename(input_path)}[/cyan]") + + # 获取视频信息 + total_duration = get_video_duration(input_path) + if total_duration is None: + return + + rprint(f"[green]✓ 视频总时长[/green]: [yellow]{format_time(total_duration)}[/yellow]") + + # 检查Demucs + if not check_demucs_installation(): + rprint("[red]❌ Demucs未安装,请运行: pip install demucs[/red]") + return + + # 创建输出目录 + os.makedirs(output_dir, exist_ok=True) + + # 提取测试片段进行分析 + test_segments = [] + + # 开头片段 + if total_duration > segment_duration: + test_segments.append({ + 'name': 'start', + 'start': 0, + 'duration': segment_duration, + 'desc': f'开头{segment_duration}秒' + }) + + # 中间片段 + if total_duration > segment_duration * 4: + middle_start = (total_duration - segment_duration) / 2 + test_segments.append({ + 'name': 'middle', + 'start': middle_start, + 'duration': segment_duration, + 'desc': f'中间{segment_duration}秒' + }) + + if not test_segments: + rprint(f"[red]❌ 视频太短,无法提取测试片段[/red]") + return + + rprint(f"[cyan]📋 将分析 {len(test_segments)} 个测试片段[/cyan]") + + best_vocals_path = None + + # 处理测试片段 + for segment in test_segments: + rprint(f"\n[yellow]🎬 处理{segment['desc']}片段...[/yellow]") + + # 提取视频片段 + video_segment_path = os.path.join(output_dir, f"segment_{segment['name']}.mp4") + if not extract_video_segment(input_path, segment['start'], segment['duration'], video_segment_path): + continue + + # 提取音频 + audio_path = os.path.join(output_dir, f"segment_{segment['name']}_audio.mp3") + if not extract_audio_from_video(video_segment_path, audio_path): + continue + + # 分析原始音频的静音段 + rprint(f"[cyan]📊 分析{segment['desc']}原始音频的静音段:[/cyan]") + original_silences = detect_silence_fixed(audio_path, noise_db=-25, min_duration=0.1) + + # 分离人声 + vocals_path, no_vocals_path = separate_vocals_with_demucs(audio_path, output_dir) + + if vocals_path: + best_vocals_path = vocals_path + + # 分析人声的静音段 + rprint(f"[cyan]📊 分析{segment['desc']}纯人声的静音段:[/cyan]") + vocal_silences = detect_silence_fixed(vocals_path, noise_db=-25, min_duration=0.1) + + # 分析人声的精细停顿 + rprint(f"[cyan]🎤 分析{segment['desc']}纯人声的精细停顿:[/cyan]") + speech_pauses = detect_speech_pauses_fixed(vocals_path, f"{segment['desc']}纯人声") + + # 对比分析 + rprint(f"[yellow]📈 对比分析:[/yellow]") + rprint(f" 原始音频静音段: {len(original_silences)} 个") + rprint(f" 纯人声静音段: {len(vocal_silences)} 个") + + # 统计精细停顿 + total_speech_pauses = sum(result['count'] for result in speech_pauses) + rprint(f" 纯人声精细停顿: {total_speech_pauses} 个") + + if len(vocal_silences) > len(original_silences): + rprint(f" [green]✓ 人声分离后检测到更多静音段 (+{len(vocal_silences) - len(original_silences)})[/green]") + elif len(vocal_silences) == len(original_silences): + rprint(f" [yellow]= 静音段数量相同[/yellow]") + else: + rprint(f" [red]- 人声分离后静音段减少 ({len(vocal_silences) - len(original_silences)})[/red]") + + if total_speech_pauses > 0: + rprint(f" [green]✓ 成功检测到人声精细停顿![/green]") + else: + rprint(f" [yellow]⚠️ 未检测到精细停顿[/yellow]") + + rprint(f"[green]✅ {segment['desc']}片段分析完成[/green]") + + # 如果有人声文件,进行完整视频的切分点分析 + if best_vocals_path: + rprint(f"\n[cyan]🎯 基于人声进行完整视频的{target_interval}分钟间隔切分分析...[/cyan]") + + # 提取完整音频进行分析 + full_audio_path = os.path.join(output_dir, "full_audio.mp3") + if extract_audio_from_video_large(input_path, full_audio_path, timeout_minutes=15): + # 分离完整音频的人声 + full_vocals_path, _ = separate_vocals_with_demucs(full_audio_path, output_dir) + + if full_vocals_path: + # 尝试精细停顿切分 + rprint(f"[cyan]🎤 尝试基于人声精细停顿进行切分...[/cyan]") + speech_results = detect_speech_pauses_fixed(full_vocals_path, "完整人声") + speech_cut_points = find_optimal_speech_cuts_fixed(speech_results, target_interval, total_duration) + + # 备选的静音段切分 + rprint(f"[cyan]🔍 尝试基于静音段进行切分...[/cyan]") + silence_cut_points = [] + full_silences = detect_silence_fixed(full_vocals_path, noise_db=-25, min_duration=0.3) + + if full_silences: + # 使用静音段进行切分 + silence_cut_points = find_cut_points_from_silences(full_silences, target_interval, total_duration) + + # 选择最佳切分方案 + final_cut_points = [] + cut_method = "" + + if speech_cut_points and len(speech_cut_points) > 0: + final_cut_points = speech_cut_points + cut_method = "人声精细停顿" + rprint(f"[green]🏆 选择人声精细停顿切分方案[/green]") + elif silence_cut_points and len(silence_cut_points) > 0: + final_cut_points = silence_cut_points + cut_method = "静音段" + rprint(f"[yellow]⚠️ 使用静音段切分方案[/yellow]") + else: + rprint(f"[red]❌ 两种切分方案都未找到合适的切分点[/red]") + + if final_cut_points: + # 生成段落信息 + segments = generate_cut_segments(final_cut_points, total_duration) + + rprint(f"\n[green]🎉 使用{cut_method}找到 {len(final_cut_points)} 个切分点,生成 {len(segments)} 个段落:[/green]") + + total_segments_duration = 0 + for segment in segments: + cut_type_desc = "精细停顿" if 'silence_type' in final_cut_points[0] and final_cut_points[0]['silence_type'] != 'fallback' else "静音切分" if segment['cut_type'] == 'silence_cut' else "备选切分" if segment['cut_type'] == 'fallback' else "最终段" + rprint(f" 📹 段落 {segment['index']:2d}: {format_time(segment['start'])} - {format_time(segment['end'])} " + f"({format_time(segment['duration'])}) [{cut_type_desc}]") + total_segments_duration += segment['duration'] + + rprint(f"\n[cyan]📊 切分统计:[/cyan]") + rprint(f" 总时长: {format_time(total_duration)}") + rprint(f" 段落总时长: {format_time(total_segments_duration)}") + rprint(f" 平均段落时长: {format_time(total_segments_duration / len(segments))}") + rprint(f" 切分方法: {cut_method}") + + # 保存切分点信息 + cut_points_file = os.path.join(output_dir, "cut_points_speech_fixed.txt") + with open(cut_points_file, 'w', encoding='utf-8') as f: + f.write(f"基于{cut_method}的切分点信息\n") + f.write("=" * 50 + "\n\n") + + f.write("切分点详情:\n") + for i, cp in enumerate(final_cut_points): + f.write(f"切分点 {i+1}: {format_time(cp['actual'])}\n") + f.write(f" 目标时间: {format_time(cp['target'])}\n") + f.write(f" 偏差: {cp['deviation']:+.1f}s\n") + f.write(f" 静音段: {format_time(cp['silence_start'])} - {format_time(cp['silence_end'])}\n") + f.write(f" 静音时长: {cp['silence_duration']:.3f}s\n") + if 'silence_type' in cp: + f.write(f" 停顿类型: {cp['silence_type']}\n") + f.write(f" 搜索范围: ±{cp['search_range']}s\n\n") + + f.write("生成的段落:\n") + for segment in segments: + f.write(f"段落 {segment['index']}: {format_time(segment['start'])} - {format_time(segment['end'])} ({format_time(segment['duration'])})\n") + + rprint(f"[green]✓ 切分点信息已保存到: {cut_points_file}[/green]") + else: + rprint("[red]❌ 未找到合适的切分点[/red]") + + # 显示结果总结 + rprint(Panel( + f"[bold green]🎉 基于人声停顿的智能切分分析完成![/bold green]\n\n" + f"• 分析片段: [blue]{len(test_segments)}[/blue] 个\n" + f"• 目标间隔: [yellow]{target_interval}[/yellow] 分钟\n" + f"• 输出目录: [cyan]{output_dir}[/cyan]\n\n" + f"[dim]💡 优先使用人声精细停顿(50ms起),备选静音段切分\n" + f"🔸 微停顿(50-100ms) 🔹 短停顿(100-200ms) 🔷 中停顿(200-500ms) 🔶 长停顿(500ms+)\n" + f"📋 切分点信息已保存到 cut_points_speech_fixed.txt[/dim]", + title="✨ 完成", + border_style="green" + )) + +def main(): + """命令行入口""" + parser = argparse.ArgumentParser(description="基于人声停顿的智能切分工具 (修复版)") + parser.add_argument("--input", "-i", required=True, help="输入视频文件") + parser.add_argument("--output", "-o", required=True, help="输出目录") + parser.add_argument("--duration", "-d", type=int, default=30, help="测试片段长度(秒)") + parser.add_argument("--interval", "-t", type=int, default=30, help="目标切分间隔(分钟)") + + args = parser.parse_args() + + process_video_segments_25db( + input_path=args.input, + output_dir=args.output, + segment_duration=args.duration, + target_interval=args.interval + ) + +if __name__ == "__main__": + import sys + + if len(sys.argv) > 1: + main() + else: + # 直接调用示例 + input_video = "/home/darkchunk/code/VideoLingo/output/Learn Solidity Smart Contract Development | Full 2024 Cyfrin Updraft Course.webm" + output_directory = "/home/darkchunk/code/VideoLingo/output/test_speech_cuts_fixed" + + process_video_segments_25db( + input_video, + output_directory, + segment_duration=30, + target_interval=30 # 30分钟间隔 + ) \ No newline at end of file diff --git a/core/split_video_utils/video_analyse.py b/core/split_video_utils/video_analyse.py new file mode 100644 index 00000000..284c1534 --- /dev/null +++ b/core/split_video_utils/video_analyse.py @@ -0,0 +1,272 @@ +import numpy as np +import librosa +import warnings + +warnings.filterwarnings('ignore') + +def create_terminal_30s_timeline(audio_path): + """在终端内显示30秒音频时间线""" + + print("🎵 Loading 30 seconds of audio...") + + # 加载音频 + y, sr = librosa.load(audio_path, sr=22050, duration=30.0) + hop_length = int(0.01 * sr) + frame_length = hop_length * 4 + + rms_energy = librosa.feature.rms(y=y, frame_length=frame_length, hop_length=hop_length)[0] + rms_db = librosa.amplitude_to_db(rms_energy, ref=np.max) + time_frames = librosa.frames_to_time(np.arange(len(rms_energy)), sr=sr, hop_length=hop_length) + + print(f"✓ Loaded: {len(y)/sr:.2f}s, Generated {len(rms_db)} data points") + + # 创建终端ASCII图表 + print("\n" + "="*100) + print(" 30-SECOND AUDIO dB TIMELINE") + print("="*100) + + # 图表参数 + width = 90 # 90个字符宽度,每个字符代表约0.33秒 + height = 25 # 25行高度 + + min_db = np.min(rms_db) + max_db = np.max(rms_db) + + # 绘制主图表 + for row in range(height): + line = "" + db_level = max_db - (row / height) * (max_db - min_db) + + # 添加dB标签 + db_label = f"{db_level:6.1f}dB |" + + for col in range(width): + time_idx = int((col / width) * len(rms_db)) + if time_idx < len(rms_db): + current_db = rms_db[time_idx] + + if current_db >= db_level: + line += "█" # 实心块 + elif current_db >= db_level - 1: + line += "▓" # 深灰 + elif current_db >= db_level - 2: + line += "▒" # 中灰 + elif current_db >= db_level - 3: + line += "░" # 浅灰 + else: + line += " " # 空白 + else: + line += " " + + # 添加阈值标记 + threshold_mark = "" + if abs(db_level - (-20)) < 1: + threshold_mark = " ← -20dB (Strict)" + elif abs(db_level - (-25)) < 1: + threshold_mark = " ← -25dB (Normal)" + elif abs(db_level - (-30)) < 1: + threshold_mark = " ← -30dB (Sensitive)" + elif abs(db_level - (-35)) < 1: + threshold_mark = " ← -35dB (Ultra)" + + print(db_label + line + "|" + threshold_mark) + + # 时间轴 + time_axis = " |" + for i in range(0, width, 15): # 每15个字符一个时间标记 + time_val = (i / width) * 30 + time_axis += f"{time_val:4.0f}s" + " " * 11 + print(time_axis) + + # 底部标尺 + scale_line = " |" + for i in range(0, width, 5): + if i % 15 == 0: + scale_line += "|" + else: + scale_line += "." + print(scale_line) + + print("="*100) + + # 静音检测可视化 + print("\n" + "="*100) + print(" SILENCE DETECTION TIMELINE") + print("="*100) + + # 创建静音检测图 + silence_levels = [ + (-20, "🔴", "STRICT"), + (-25, "🟠", "NORMAL"), + (-30, "🟢", "SENSITIVE"), + (-35, "🟣", "ULTRA") + ] + + for threshold, emoji, name in silence_levels: + line = f"{name:>10} {threshold:3d}dB |" + + for col in range(width): + time_idx = int((col / width) * len(rms_db)) + if time_idx < len(rms_db): + if rms_db[time_idx] < threshold: + line += "█" # 静音 + else: + line += "░" # 活跃 + else: + line += " " + + # 计算静音百分比 + silent_frames = np.sum(rms_db < threshold) + silent_percent = (silent_frames / len(rms_db)) * 100 + + line += f"| {silent_percent:5.1f}% silent" + print(line) + + # 时间轴(重复) + time_axis = " |" + for i in range(0, width, 15): + time_val = (i / width) * 30 + time_axis += f"{time_val:4.0f}s" + " " * 11 + print(time_axis) + + print("="*100) + + return time_frames, rms_db + +def show_30s_detailed_analysis(time_frames, rms_db): + """显示详细的30秒分析""" + + print("\n" + "🔍 DETAILED 30-SECOND ANALYSIS") + print("="*80) + + # 基本统计 + max_db = np.max(rms_db) + min_db = np.min(rms_db) + mean_db = np.mean(rms_db) + std_db = np.std(rms_db) + + print(f"📊 BASIC STATISTICS:") + print(f" Max dB: {max_db:7.2f} dB") + print(f" Min dB: {min_db:7.2f} dB") + print(f" Mean dB: {mean_db:7.2f} dB") + print(f" Std Dev: {std_db:7.2f} dB") + print(f" Range: {max_db - min_db:7.2f} dB") + + # 每秒分析 + print(f"\n⏱️ SECOND-BY-SECOND ANALYSIS:") + print("-" * 60) + print(f"{'Second':<8} {'Avg dB':<8} {'Min dB':<8} {'Max dB':<8} {'Status':<12}") + print("-" * 60) + + for sec in range(30): + start_idx = np.argmin(np.abs(time_frames - sec)) + end_idx = np.argmin(np.abs(time_frames - (sec + 1))) + + if end_idx > start_idx: + sec_data = rms_db[start_idx:end_idx] + avg_db = np.mean(sec_data) + min_sec_db = np.min(sec_data) + max_sec_db = np.max(sec_data) + + # 状态判断 + if avg_db < -35: + status = "VERY QUIET" + elif avg_db < -30: + status = "QUIET" + elif avg_db < -25: + status = "MEDIUM" + elif avg_db < -15: + status = "LOUD" + else: + status = "VERY LOUD" + + print(f"{sec:2d}s {avg_db:6.1f} {min_sec_db:6.1f} {max_sec_db:6.1f} {status}") + + # 活跃时段检测 + print(f"\n🎵 ACTIVE PERIODS (> -30dB):") + print("-" * 40) + + active_mask = rms_db > -30 + in_active = False + active_start = 0 + active_periods = [] + + for i, is_active in enumerate(active_mask): + if is_active and not in_active: + active_start = i + in_active = True + elif not is_active and in_active: + active_end = i - 1 + duration = time_frames[active_end] - time_frames[active_start] + if duration > 0.1: # 只显示超过0.1秒的活跃段 + active_periods.append({ + 'start': time_frames[active_start], + 'end': time_frames[active_end], + 'duration': duration, + 'peak_db': np.max(rms_db[active_start:active_end]) + }) + in_active = False + + # 处理最后一段 + if in_active: + duration = time_frames[-1] - time_frames[active_start] + if duration > 0.1: + active_periods.append({ + 'start': time_frames[active_start], + 'end': time_frames[-1], + 'duration': duration, + 'peak_db': np.max(rms_db[active_start:]) + }) + + if active_periods: + for i, period in enumerate(active_periods[:10]): # 显示前10个 + print(f"{i+1:2d}. {period['start']:6.2f}s - {period['end']:6.2f}s " + f"({period['duration']:5.2f}s) Peak: {period['peak_db']:6.1f}dB") + else: + print(" No significant active periods found") + + print("="*80) + +def terminal_30s_complete_analysis(audio_path): + """完整的终端30秒分析""" + + print("🎵 COMPLETE 30-SECOND AUDIO ANALYSIS") + print(f"📁 File: {audio_path}") + print("="*100) + + # 1. 创建终端时间线 + time_frames, rms_db = create_terminal_30s_timeline(audio_path) + + # 2. 详细分析 + show_30s_detailed_analysis(time_frames, rms_db) + + # 3. 推荐设置 + mean_db = np.mean(rms_db) + print(f"\n💡 RECOMMENDATIONS:") + print("-" * 30) + + if mean_db < -40: + print(" 🔇 Audio is very quiet - use -35dB threshold") + print(" 📝 Consider audio enhancement") + elif mean_db < -30: + print(" 🔉 Audio is quiet - use -30dB threshold") + elif mean_db < -20: + print(" 🔊 Audio is normal - use -25dB threshold (recommended)") + else: + print(" 📢 Audio is loud - use -20dB threshold") + + print("="*100) + + return time_frames, rms_db + +# 使用方法 +if __name__ == "__main__": + audio_file = "/home/darkchunk/code/VideoLingo/output/test_segments/segment_start_audio_vocals.mp3" + terminal_30s_complete_analysis(audio_file) + + +# 切分点 1: 2.75s (在1.5s-4.0s静音段的中点) +# 切分点 2: 7.25s (在6.5s-8.0s静音段的中点) +# 切分点 3: 13.25s (在12.5s-14.0s静音段的中点) +# 切分点 4: 18.25s (在17.5s-19.0s静音段的中点) +# 切分点 5: 23.0s (在22.0s-24.0s静音段的中点) \ No newline at end of file diff --git a/core/step2_whisperX.py b/core/step2_whisperX.py index 1fc5c8a1..df439d80 100644 --- a/core/step2_whisperX.py +++ b/core/step2_whisperX.py @@ -54,4 +54,4 @@ def transcribe(): save_results(df) if __name__ == "__main__": - transcribe() \ No newline at end of file + transcribe() diff --git a/core/step7_merge_sub_to_vid.py b/core/step7_merge_sub_to_vid.py index 2fa218f8..085d6105 100644 --- a/core/step7_merge_sub_to_vid.py +++ b/core/step7_merge_sub_to_vid.py @@ -36,10 +36,11 @@ TRANS_SRT = f"{OUTPUT_DIR}/trans.srt" def check_gpu_available(): - try: - result = subprocess.run(['ffmpeg', '-encoders'], capture_output=True, text=True) - return 'h264_nvenc' in result.stdout - except: + # 当前gpu + # try: + # result = subprocess.run(['ffmpeg', '-encoders'], capture_output=True, text=True) + # return 'h264_nvenc' in result.stdout + # except: return False def merge_subtitles_to_video(): From c0cf3ae8cc9b97cdc7b87663b037bfe7deedee5e Mon Sep 17 00:00:00 2001 From: "1449528975@qq.com" Date: Sun, 28 Dec 2025 02:38:17 +0800 Subject: [PATCH 08/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- batch/tasks_setting.xlsx | Bin 9127 -> 10066 bytes core/split_video_utils/split_video.py | 1303 ++++++++++++------------- 2 files changed, 616 insertions(+), 687 deletions(-) diff --git a/batch/tasks_setting.xlsx b/batch/tasks_setting.xlsx index 2ebd2df98b7993c8cfeb1cdf086f7848c443f94f..488cae2378b759a4908f50417bdc9b2f758c84c9 100644 GIT binary patch literal 10066 zcmeHtbyQVb*Y}~jB%~XOLw89d-Ju-1I}g%zKtK=y=}tkqyF=*?QMwUnX%P7idVTeJ z?>FA@{{1|g!QNx8vwm~$J!8$e=G>|ZaPW8lL;x}X0H6Yx?5A59zyJWz@BjcV02x+S z($T@y+`-jQ)6>Zutk3FUZ%3H}56h4RfQ8Qg@9`fjfs(jE2ukylItw;^HpNV zJbQt?_n6c~I^M?i6q)K~SXWK!UiF%{U zRiZ*0n4y7d+Q<}kkv$~36?0)^39-42k6ekHxT-MxqzKd#Z3}Go(ev?YXNY`pqI%WD zi*IGfqMIQGp$eS%08q7kQA1G5=`ia1Run&1aqzHg5{X$;MpU$!G9k4+ z^KxEqhLnl`A5);;o4)=A{tlHrQ&Jy?{Bfjpc1!jtHn-p0<|?osMn!E-O|v-+z!mfdUyV>JN^gz z;BT*95~HNj&5j;&Ap146?|gb88e2laU0k-AO4BbuZXT=lWiBnrLOVSvwkB~9oQ!{~ z-`9b;1>u*U`>4)Vxl3N$!xN&Zb1w}^ymEeu#Pq-=;gNI6%3B<_>C@@6WEpu+2DjEI zrs9TzY=!2^?X^MV@Ff{ zXA<(aF+`sWDDF-qxOCrMBbV_^>Fa z^{G>H-@D)(mFcC+I`s$Da32k(d%opE=`QX&><@fFQLqS&Q2%rih)7)GF;pT=P$wY< zAj5drvHkE9cSjdn6GuneyXf`PXJDWq49ev{`zTRYQh3Xb-HP@tl-)hm4IkⅅxE$ z)(#eOe+}c@11c`RlO+}?FjwJlPdkAH;rHOZaP1C+5FCe( zhvAS-_DabEWe{-h?S9@V-b2Go0pm7`#Pl&|;PG!>FwhZ&=H^pNoit8_aNCTqKjpLm zkC6>PZDz1$J>cq@#%^g~O?^FO3ZuowZKLGe8DeIjC&l$l%ahx6#k4c;EkN*X03x2SBdKZ#`d`{%!4Nf z7VG`J(@RiI|I=icJNWU!q2}@x0RX^)&VZWC@5b_4Q(t9{9nUwj=GN%)AwR3V6^%MK zago++z74vERM{FYL0I}nO#9%AgJ$IviET7E;L(gypMTVi>0PI6l(>!gIe0r)! z63*#zwep9pLVRGQ6*`ztpwf;A5)jLL&T zie$I(^zg!m5zTo{{(fcA)jRpjLfG6u9rt)S`YQO$PWAG4ogszAgCGK1HpUt445i75?$^>h94B`%iDk@_h+aG6t)YzqUg^O8 zmk77AYCHI`b15C~@Yc00ByI!HiuwZuGGZUgoAp-RVzF@UONw{gB$x>Klb{;etSUQ# zEwP6zx3QjI@_0W(7t5$#KuEvD&RhgKn0VcC%d;WHR-<5CbypjUm0txJ@rPe^zUIrD zT(nNfPu%9>H_b`1)o%7l)Wb?&7BTzMcM@wah=#ESvS8!R=+Z~~dXc|lH@awS*yE7* zWbQuE1>NktT>SRaweh-C8OcM(1;Tx^%`fnoXGr6I2hGvNq#q`5PE#qLjxu#3*-a!2 zl(&9Ul z!XPwtz{){dC+?Z!ZbXTVL*`8!z2{6ROdKC>P1Jt`ToaOg?$wUATgbovf)um;(n17t z`{F4LLX&C!CWwx|;gG|GX$Il(9DIB8aErjB+PtDH31Gr9T=CJ)$uyg@JyzhtE@3q? zrDpnadhK*fwBQnl+zy&P2LdpUHwKiH!cqwAwo+?WQjZ=xx0F&Za|*49yF3q&wOKp> zvdvWGBsexKpN*Lq(;jc6mQI8?z^mE^G7hWZ4klcg)9reGx-iSSBDqLS3(~-2adb+nj8_Fa^*6HZ|oGAs8OJjXMtmUePu{zhwDlPt3F48O;Bl&Gi&clDJ#EdZ+;XK>@`QIa5JK zV8*9Ub$*sD^&#Ye?8Nqz_F@HTya*@DS!plklP8{3Oi0XzIJ)oKK1FCJv1dt}*c>flI zo2Dm*mf3vgkHLv0DlKHl3LpxzimRAd8_?T*(tpOd zm}ZAUt|WL>$|UAJCO4vwibIx($Uf(pIN%V z)dxxy`k0zahg3vtm}kVIFJd&xRtC!8luN-Xf|&-e$J#EFCtWT@u@KQqZRlwRW_er$%=Dg<>0sUC(4OkPd=w+t zp+BONidGKeU}ZpucHXPAYsuPUJ}=S}DzZqHCQ!B)2`)&P%yP9`l6y|GN#32@#nJy< z;);v{XLWp^JTFq;-I}3cT{3uIZK`L=Hb#VuAHuy@RCP=DXzxgVnF zvRz^Gf%j%W&4P?mrc#giK8I-2SqVPuiU$@)706@WOOuS!=beD*wT87cuXEejOVTRUR2beY z(&ouYR^KY%)gba|C?IAYDM8Uk4qQYinGe9xAH=Liv7nuBs|O!Xv80>^H_1NWrw)#) zS$rWT|xMTis4tqs|bmm2B^S${aj_wT6)zwt75M3&k1JIP9Dxw%=t zn^z+gQIMpiH!N$T;$DCiWao(ohB(4`|7JU7=vDi$fI|0c9z|c9%pO@){wPgq z3|?z9UFGFwZ9lwLRN(<4l?!|r^_-dwugyBLxfznN z$4nHFy@eb{g-3s+pg)1kZe6SD;(UE?GzWxN^X=y)m%T5mOO7s=l2W);}w=yYN0GD82!hsvjolc!05`_9mxfn> zefE25Rmcuq3{s{FHRz$(wow$Jw1_U)FGf6bWr}0%u$Q$@VSOl+vza%y&AbcETG#xc z>b<@Fe)36Kalq%pduE8RfzUO(c$$a$%1eO)p6c~C8(`q4opZwMg=B%P5m8qQq_!Q8 zc0zhKM}Oa&tDDKKgG@uiwwvRnW=D$5x;O5Qw|?KQS-zdjeLK1OVvgZ&IGOqJJW9op zLB z*x83cz{Ja0ul!ntW8nuVXzjz|%)N*sY3kcq-}n(tH3rQDM$w+*XD_p>N#um`KH$M{qndw`MV!Yc1}eus3xR>5+>l+gh`Z; zG*aCpP3um`<{J}=tVf^BIn+ALT+HVv`6^h0h%D!=n&QQ`%6Ps$Z4-Crjo;raQ7{G( z?JQutLt|$c%iq+`>n*PXJ8~}!Wck7%vdn6>ryjEfTbq|aNQf^2b=GW03m}Y2GW;d^ zTxmQ`l5lY7HZ_Hi$J;B3VO{X181rKtfFNS~Z97}6`ZHJCpR~=gmRAesNXIxgNkcXS z29alWs-yUXu!o301rreio&D2?GDL@w4!rz_KUwVk55yIy0R@>h6DLia*fa+@a+4|A zFNYBC*%-;=MsPX(AR~W}La;KE9VfFLwm$peWr+Ej#tc&!q}f4*81v(m`WTAVfzFo8 zR)R^h9b`PS(;aaS2!D~2WI?TF74_)vJ6d@Ui4DCUL8n9IwG5Tlcevzt_j=HGaV;Y;WUca@rocccW|V?bNgS(*UbjrA8j4IOCd7$%RFtE z2{uN`8{`>XHITnXjHWu$nC@h=kXXojnB|Sg!~OgiPJ`rb8`;1exJE09x>SpTt3_R_5f^RY%8xF2$)I{WC82w~u~mh0C)$?SN55-A z$XfHepnp8wZ$A{dp(}mbeWhgUi^Pjv6#tRYip)Lm%ROb|;h-$9J&j*Dh z-^q>pQ1iP|KWBuEK__JVeg6NIOof^|{7;qTm^yxc*5=^n-*u*zTYrLP-O4JI1@lF2 zS~qWFt=GK%dSCLihQ=KN&2fiF3Er!;_TW@xmW=MBwAKyTtwDF5|J;`<+C&f~Kx^3Q zr~m-oj|2d0W$a>Zrs3*h?O^%6807Q5)nSeuAidHmwj{o6pzzflPL2pZ*OT^?x~ZNRM4ZoC`HY4VNpQ%^dvlP6x8VNa?y|H$*@(qcj@j&Evj8_Uh;5#iQn2jSj2R$txrGRF;L zgG9-aeM#c?>@9;)#zZEU?EZ$C8FiLz%{9-eHg|aPcQ%%`z51P#Oqkk<8&g>~yNfDp zN)U_ReXj5-@h&ke96$tuLIXPED1S`S?IiyfpBI z_T7L$jf>UB*g$r?BrsGPvy}A5!1zFJybLf(o4iyQ!(t7;ce- z>d_dQ=HWp57G{p7sxFRBU^Y`n7xTY5?*DC9Kn*-1M&CAw9h%7^U1P;~zXZLsbcv}Bi+_6;wyA?#TNBxR zR(`d+*uH}5B$hkim^)s`a$qYZmfj+OJ|LEQUtROi;O zE>6sxCWmwl%az!=w9YxAeyYcW#1VF{s?1gjv3bB;u`!A-ZL{P+*ENL_dT?)e%ghRx zY!8K#u6eu?LOuOxox49NFk_{pXA_cIvb4|Cpy#jUrf$^9b%4@&UsfaXFQ^;p;KBum zR|DDKgR=yiKM=agN18J^vyRQn4C1stZk0=lbRpOtmsFkK3&CL-(mfGb|r*m%Cr zS;6M2TG9vzrEW&(!YsjNkwVMBHKHd;X>-GHo*Ip6_Y=LjI6fBduc5H;6-bD*z1C6j z7H2vOPVek^MR5I0VGwg$mnXFE7A7MS;vE9x-k~$~TsTc&bH_1F*=y})`bwzDAz_7- z&@g9?qDUG8XE5%Jb#_5aKYuD`M{&gF5XfxEeaqTJ-%j+cHiKO0i;AaL_qbyJ3`NPW z4KWNX3pDoq``1f9wfFP-FW)k$D*T>IsX55 zqW|RcXG8KArvqrO=a2T}pWr{+LchTIXy3uVnnZs>|EzKUg5p8@qR{*PS@Hgp!JpNL zUkr-z|L4K~QJeUa%bzLDFD@lyKi=Ta1m{lub2q@j%-AJc&hm_JG-QAtiA*H00pmZEsx*Lw9ba%&h(5K{m-uL?E z$KK~Ud)>2U-!rpjX5Cv+1_~Mx;-Q4dFAF`~|11dLA4WEYiuN|P4or$*Gj#AD@ITGg zNB0b*AR!>s01yzU|1{ILwPkd*vdoB+lW%864LXJyRKFb%hF8AKNo@BdUCbZ-iviaeUi0}tx96nl`|-tquar6FtohK9l|iiA zk@Jj9FR2os(eqJfal1D#0U4{;9-d33Q11`{VVIOgjsY9_kuY_B(t+f?l(c!Li7l=t z7AAelQrtC1O@IJPRze~+GgMi~Z{1#aqw{uJKYZkn=(L+`YrfaRryr}VSH=N;BwSX- z&BJPw)|ru;*_+T|B__hQb@M4VSmpEx$5FmaMTj2;hq%phbQsip;n?8G zE|;lQ{Y28=7j$p|XZr$r58M4a;kpTwuh&{kSsFLx?U9M#J;^=bciP{^ybBWm)2X=Hywv~-VpguOQ9mF@?XZSBd8)j+u+Q;_3EG-A@Lly*n*MrHF z+2h&s{ely=5t(2h_CBZ7-4`93R`CO{o&DoQ*LExIxv6NGYa`EgH0utQ{ht9Vrqh+|s^$m#-sz%Dp`hM55m~(ge{AxIv=nYWn-!GkNPd`Ju zEntESgMBoh3Q+_Wg(OH2@%(-0Y$`&!1`si5q}Ev|SnB3t5|rBD-S&fDpSX+!`@eX1 zM`hQl+`ATmaEPaPQpeORdIpHvvgM^H;%2XFo(SD2@}=R5L*??6>n?tm{M#R(nyVgN;*brAyP zqwV#*H8zA2Lt0a1I>{coSN6$XH!Dr-lwGT`NFL*p_@Exu)36AZ}G=$#mIpVc(8> z;y%F^Q@hK`|AsGXqZpcNcB?H&N24;H)2S}IICdypA@pvnBs}#;VUzGO?!ayk`rZTx zhzL0E9AlT89_L)y$~GAO;40N>B|o|Owhy07bD8SE(ky8O&@de>z^s_NBJf&p5WbUt zxyHBi>7bwCH4(8NZ?~PA%Y(tP;gGuu^n8gn3O3JP#G!RP5ywf-xk$=tFlPzb9LdA>#|s~s0JXPsVp0|(|9X<0u!v@3J~odu7q zTBW0aW!Ww~r$9YScHn)FH~TWD0u3vc#^FJybYelldkF0&=wFrdLPi&8tJ z6zU4&Oj7zQcAv~BEZ*cqh#0Al=l#MDGN;qofY6Vkq~xo1pkm$6?x7t5L}T1$9xlPR zGI)e>h+T&LuClZ9>5}`ilh^yY-_mHw(S*wdlG18-gQO|a62njQvHSY?NLc#iYD3HY zHPBQ$0Ys2}QIRWW*jGtRDr0Mde08y`ksWjh)h4kXd1Vlyd19u)LX* z>>`84$q0_2njk0vLvMC+l!OrMBt**D*Aq95y^|A&^RiehRR&i2x(hd>PMiLRjspki zt<HW0XUG?fNE?3n+V6BFhb82(p3j%z592MI9QMeXF4fgM4NPQM;l{KVB}Om*UYUkXh`m5nLhVhmntV)m+?PBhN6k? z9B(#84%>+s7)+NKqXU-Pk{7(s)~ASa1T(#lj}Ea`T{S!IuiB}aZ*JUpP+84LMk1K9 zZq9akBFY4AZyUNO4y^oMieJP?h-GW9z!Mz6JF*4aQBaZDHDR=)s+ht&^>aqp%tRzu zEafpVu6=43<)R^UNDcKmYtF@pAkZM+3}GEle+A;4+mwJ&8rfF#nD&(`cSL~wnt^%; zfu859&V~rXt}37=K#dJay6bE%hTC9|4WAA_CWSh8{URae+3udw*0j^RD}NaQr42%H z0$CY3)?6~G7|aM~b7=3>U|u$;K*#@s{j>m?=O`=tYko~@-EzR0A(PmIN1 z4#hzBeqLNI+uf>jA+Do9d*CR9{SwF$`_*#53i1jw-5W|2Nb7V>nD&xUnfe7oj@xsG z*J67I=3E_0%E1J~-1G*5F)Hv%k!N+Z0sD4bq9?o;kX>|hUdhk8<_fUg0r-$M{iQgRHHhDZP_P-CAsW-`ds?V0tLv_v+2O>-@x= zi_?_H_V!+pj(|m#F8ZrazW%If-&dfvlX04ieVVV^lKJ^qpSV30(0$FfO;u0R& zjh<23mIQTCLzc5I$D6O zXP|^7FnceqWYLdWcjRoMHCkht8^ z`}F1!m;J2szT&M8mW4}QA}Qx#ciyw9op=MLV73i+A&IQ*VI*F>0TE)+t|$h@#awwo zMgLIYakMwLHg$N|7&*lV%XEMbYy1B9BrDb`;$q}dDAEe^JMIc&Gn~WZ9BeaMK0r;e zKt{rab62)Tsx$?`is$5xEY#lG6Y0X`cDDMeH^Wt2@6t(9nNxUR6+NL=qMDlGLOY~6 zKP*2VIUs^^V(yg$ikBB9JBRux(Ek*YB?Cjr~G$G}g-=#ZG>Htd3Dz z-Kn<`srD#|QLjsacgEAlm*y*P&l2lf_i|9ra^;K{H{XSS&Ao&Dof`EoDo4FwTIRt! zP?W#-jRrP07C*O(D!Mjl%;;WOmG^p=8Q5{du;`ye5VZ30i$>|^GAU7H%imfW@sdqc z-V5M~2#%XCZ(b2*d3ZKAHolm6azm=D)JnD?rvnc^BrbeO;{V1awZd1(ydS9_nhMR6 z67lu7EI|fsk}Pk;L+D5d4YZO1oKWn@D}r)}BsL?mrLk)z6!P znaB-Y?61wHDDy?}R&x1t&ZGvX2dKO6&1YiRQ!E$vx${^9w7Yd}eQV_kxUKlu-T$3_irJ0b zbKv69hV7ePevv%*q~W3f0fGF-jBs?bH2ygiCUoxKfg~~R3BvAM?IGt#oS*yj)a;sg z$T>4)IDUxNA}0@5pXr#XvRz)}p4cPF6)B^~r+r4N$eqz)SD@_u0SP~}IND-A%Y1b1 z-k*!DdR>k++Kb{%qi*3Bg64C9ec(`k+F@jc*KX}PZH<-BhU;y8krk%ex4jx*^q62t z@dD7?N>$AmyYea{Y5R1y-rWFY!h-kBZSNT~x7{w@%W4AZ57}V5m%_}`wCKURWDQfe z*6vUZC<~7K%~ObrHJ@s2jH6@k%tG-m0pg-mJPIiiX9rD&sPP z8T9+{q7`8xj}~|~nHJ(TWJ6F3aXMWl{(i|;!fD9-dVY8tHtj>vqEmxBC{y@TB2tqV zh-9R&kQa2TG_(aHnztKuC9KQwLk97BZQ!ZSxLOd;+{p_F!}}_m8?zM{jpj~)rb3;| z$wazuw^L<Cci;#goawF<~qYD%+kt>(-Rse%tR>qvMiD^ma+SJYQ1p6shgqh zYX!gO%ep!Hk9KCzk{}2EHweBzVzk}!7Pe+~q7P@vUh{bf?Z0??DQbo*JIvg}G0WV> z>+y6rL5H3hZ|5G$K3UJou3jA2cVfW)1Eyk)h;Ur3kG4&)kF=@ag+@PSHF|&=^Q*if zOk+jNzG2?A1ybO1NhON+K`S_^mgbr?8~qBezmfW+%9_Y#F1|LO*+ZB#+epHZZJo8; z|FSqfpr5CYgX!3fWa%LFIcUHan?0%PIeOoxIo+}yOsv^%6~4-iX7ON0`MZ)txAQzi z?M_O8E*5OS+Xy)jzF`%ms((a&fIo{4{xU=SQYm| zVm~#BpXP^ew!I>_o(^vHPL19{gtt|<4Xb`Xpk*aq?TB=zZn*6pc2nA^UeRdEajtHN zl%k}WQ3{l>n&Eq*{bKd>*(PC{5+Fo2FCr~bt5kiWcj9f83GVFHZgPwFz^BRH7cJC` zX+;%xczR9Bv}!V1a$Hr$E|4E@G3qpxr_xM}d>G6n&Zgo_UTkQ3TLixpu&^5FzSA0w z+$p0NNTir*EFbfQv$2q4``VnuPIhe}#pVTHc{BM$;cn{vU*I(iWJtYOqtahB8QtH@ zvSk>xRi3pR^Ok4l2NsKIq2(v+%nnvtxIm>yUJWjxT|Q}qOjUOmhOkiN1rZcDZJcT| z;$xDAZNy8qDdU&aPfRSrs}E$uwU#O^2Fako*sFZvKYbnUCn5>N(37B6**6|+%w}4O zZ1@;2s63#||2|Rj&_bPfpg}XNM8F^FtL#gbof!ht@rdU*LkO>~5U57mFTyF8Ic?(1 zlQmf)Fe-ABHTx^-u`Wr5St2pt&~(2SA&+GUk|Z`njO`%xTv4d2xC$lzdKn7M3cfd$ zyd$uIrdwwrYL_Jqirb@5GnpJxMGGq^WYzSNGiS2rO@fICbsT}LU)3;A9!(;xJa^0r z0q1x$n_v)gi*zg607TWQ*1}ZP9`m~R2ay@Z~O` zR%(-KZGff@f&{c=jj3>|&mwQ8B#YP$GDv^aT1TWFA>Wsmhkzs+uIEY6QGf9|!2);zJHdnzJe~0L} z+@C-eN>@ps{?R#Wp#aHLUqRtoc_sgv4r_7*;cWby%W5s%o1mAYp^AX4MR)Rnd<{z$ zLdLNkb+UAI@%0U!5=#0iBDbdFr^MnByJnXJ)l9>?T1LAW5)hse*C-uxaCU*{M-tAu z1cF}fcNe;W$9gNZi%kVG z$ToEAkS1GJtMyf)tbo{_1w%5U{ubj6(DN}?!@%9^Z$x5~k_78E^RQzoADsI&V9&k+710et9GNa#oMG9CJV}P9tNY;y5h=oz zCw!P>yU*Q)Ap}?5@vBM+b&8m}xCEm9bU@8lOi`k2;(*bTAd!}QMadO-SqchBgrLPq zMakR{$^mU^=$c>=Qo(6nQMQjhcY2;oCU9=jQEdrN|?;ua3{Uj`L(dCo5S6@x1p@ z{UbOS&tmQ0-fU=kHcwk@C`(5Q@<;4r&3urVv9((_Wy%x?6BlFLP-cu2yx+MYa`F4| zg9?R7;A-(EIm>pojjl{2E-&g8leg}S;#v_}9Uuo?3Uk1bgVaY4n?y-PUW3dYajQ`* zsw1OTcZ>APqLX2mf5)eew;Os<&x3D2cRJ?CL?UXCoxKcBa0ydddbh2u zFXX9+#G2QiCU#m84m+#;GK=?5x=4o7Trp z@6@N17Vqmg9G7aAZiZ)2Z;-u9KW9IO&HBIuVR8+&8R z$GL0|Zs*2(@UsPQJ{##T8^#CA(zqdM@F}fx-+v!(Ie<7Q0=O(2CPuEv-{*ooY2`D#J z+cxNits{f$z<@Qrgjq)NSY1!$Vb%~{nt1b(O;tsqEM5-r0v5!6A69!~)ouCwz@b9ZS&35#uT{`ka;S?VRE``R%*4tWRN3<*ZBwqwlWN+V- zvvzf?B)JK2ypgm&Vkma`?_B?-Bw!Ht_{8rWdu0=*zcWLc>0_X%dJxh|gduZLzb ziAgS-cSD6r6g<^z&0=IF>+U-|FYe^c7;bXt1?ei&_f6?>TC~}Elkt@rB-slTTRfR~ z|73xLpqu0i$2Y2Ow#E*+ z4@cqBgg$uy3wrQ5Wm9{uHk*pAex$zMl5rvVGSv843;Rh@rvJfNCXF6I*e^0o>c#G5 z^ZgC*i3FWRIEC2w{6xu{(^ru4?s;=1CPeYcN0}HYfhujTa2(oDUq=qE-ZE>#wfGki zOqNK|@YzOxg`_J#g2W$?^{ONtVIFivwP!SGvGD-%G6H25l~c8lIq~^gRq17EGc`^; zevtbMtMgf7R+qdFLyb%#ja6xK-11enM!-;8ku5@x|< z0V2w9)*jj8dkM)07d5LzPoInxCJEsp$)KqYSCmTaOJnZ@}pZ zNC5iJEX^;G{c+EL2QUOA1Tpw}L;%)+|FfTdRq*dB+C#+oRUY(uZLnJL=Y;v)>Ej6W zfcfRr=P%698S}rKJxs}8<-yrQ`Nwwzzx#XqCBcL7FMm}3g#Gbd!DEUZ3*vvHGr>dp zFQ~Fp8ag!e>`u?t98>}sZoBU7O{#T {os.path.basename(output_audio_path)}[/cyan]") - cmd = [ 'ffmpeg', '-i', video_path, @@ -101,20 +98,25 @@ def extract_audio_from_video(video_path, output_audio_path): try: subprocess.run(cmd, check=True, capture_output=True, timeout=30) - rprint(f"[green]✓ 音频提取完成: {os.path.basename(output_audio_path)}[/green]") return True except Exception as e: rprint(f"[red]❌ 音频提取失败: {e}[/red]") return False - def separate_vocals_with_demucs(audio_path, output_dir): """使用Demucs分离人声""" - rprint(f"[cyan]🎤 使用Demucs分离人声: {os.path.basename(audio_path)}[/cyan]") - try: + # 检查输入文件 + if not os.path.exists(audio_path): + rprint(f"[red]❌ 音频文件不存在: {audio_path}[/red]") + return None + + file_size = os.path.getsize(audio_path) + rprint(f"[cyan] 📁 音频文件: {os.path.basename(audio_path)} ({file_size/1024:.1f}KB)[/cyan]") + # 创建临时目录 temp_dir = os.path.join(output_dir, "demucs_temp") os.makedirs(temp_dir, exist_ok=True) + rprint(f"[cyan] 📂 临时目录: {temp_dir}[/cyan]") # 运行Demucs cmd = [ @@ -124,197 +126,122 @@ def separate_vocals_with_demucs(audio_path, output_dir): audio_path ] + rprint(f"[cyan] 🎤 开始分离人声...[/cyan]") + rprint(f"[dim] 命令: {' '.join(cmd)}[/dim]") + with console.status("[yellow]🎤 分离人声中...", spinner="dots"): result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) - if result.returncode == 0: - # 查找输出文件 - audio_name = os.path.splitext(os.path.basename(audio_path))[0] - vocals_path = None - no_vocals_path = None - - # 搜索输出文件 - for root, dirs, files in os.walk(temp_dir): - for file in files: - if 'vocals' in file and audio_name in file: - vocals_path = os.path.join(root, file) - elif 'no_vocals' in file and audio_name in file: - no_vocals_path = os.path.join(root, file) - - if vocals_path: - # 移动到输出目录 - final_vocals_path = os.path.join(output_dir, f"{audio_name}_vocals.mp3") - final_no_vocals_path = os.path.join(output_dir, f"{audio_name}_no_vocals.mp3") - - # 转换为mp3格式 - if vocals_path.endswith('.wav'): - subprocess.run([ - 'ffmpeg', '-i', vocals_path, - '-acodec', 'libmp3lame', final_vocals_path, '-y' - ], capture_output=True) - else: - subprocess.run(['cp', vocals_path, final_vocals_path]) - - if no_vocals_path and no_vocals_path.endswith('.wav'): - subprocess.run([ - 'ffmpeg', '-i', no_vocals_path, - '-acodec', 'libmp3lame', final_no_vocals_path, '-y' - ], capture_output=True) - elif no_vocals_path: - subprocess.run(['cp', no_vocals_path, final_no_vocals_path]) - - rprint(f"[green]✓ 人声分离完成:[/green]") - rprint(f" [cyan]🎤 人声: {os.path.basename(final_vocals_path)}[/cyan]") - rprint(f" [cyan]🎵 伴奏: {os.path.basename(final_no_vocals_path)}[/cyan]") + rprint(f"[cyan] 📊 Demucs返回码: {result.returncode}[/cyan]") + + if result.returncode != 0: + rprint(f"[red]❌ Demucs执行失败[/red]") + rprint(f"[red]stderr: {result.stderr}[/red]") + rprint(f"[red]stdout: {result.stdout}[/red]") + return None + + # 查找输出文件 + audio_name = os.path.splitext(os.path.basename(audio_path))[0] + rprint(f"[cyan] 🔍 查找输出文件,音频名: {audio_name}[/cyan]") + + vocals_path = None + all_files = [] + + # 搜索输出文件 + for root, dirs, files in os.walk(temp_dir): + rprint(f"[dim] 搜索目录: {root}[/dim]") + for file in files: + full_path = os.path.join(root, file) + all_files.append(full_path) + rprint(f"[dim] 文件: {file}[/dim]") - return final_vocals_path, final_no_vocals_path - else: - rprint(f"[red]❌ 未找到人声分离输出文件[/red]") - return None, None - else: - rprint(f"[red]❌ Demucs分离失败: {result.stderr}[/red]") - return None, None - - except Exception as e: - rprint(f"[red]❌ 人声分离错误: {e}[/red]") - return None, None - -def generate_cut_segments(cut_points, total_duration): - """根据切分点生成段落信息""" - segments = [] - - # 第一个段落:从开始到第一个切分点 - if cut_points: - segments.append({ - 'index': 1, - 'start': 0, - 'end': cut_points[0]['actual'], - 'duration': cut_points[0]['actual'], - 'cut_type': 'start' - }) + if 'vocals' in file.lower() and audio_name in file: + vocals_path = full_path + rprint(f"[green] ✓ 找到人声文件: {file}[/green]") + break - # 中间段落 - for i in range(len(cut_points) - 1): - segments.append({ - 'index': i + 2, - 'start': cut_points[i]['actual'], - 'end': cut_points[i + 1]['actual'], - 'duration': cut_points[i + 1]['actual'] - cut_points[i]['actual'], - 'cut_type': 'middle' - }) + if not vocals_path: + rprint(f"[yellow]⚠️ 未找到匹配的人声文件[/yellow]") + rprint(f"[yellow]期望包含: 'vocals' 和 '{audio_name}'[/yellow]") + rprint(f"[yellow]所有文件:[/yellow]") + for f in all_files: + rprint(f"[dim] - {f}[/dim]") + + # 尝试查找任何包含vocals的文件 + for f in all_files: + if 'vocals' in os.path.basename(f).lower(): + vocals_path = f + rprint(f"[yellow] 🔄 使用备选文件: {os.path.basename(f)}[/yellow]") + break - # 最后一个段落:从最后一个切分点到结束 - segments.append({ - 'index': len(cut_points) + 1, - 'start': cut_points[-1]['actual'], - 'end': total_duration, - 'duration': total_duration - cut_points[-1]['actual'], - 'cut_type': 'end' - }) - else: - # 没有切分点,整个视频作为一个段落 - segments.append({ - 'index': 1, - 'start': 0, - 'end': total_duration, - 'duration': total_duration, - 'cut_type': 'whole' - }) - - return segments - -def detect_silence_fixed(audio_path, noise_db=-25, min_duration=0.1): - """修复的静音检测函数""" - rprint(f"[cyan]🔍 检测静音段 ({noise_db}dB, ≥{min_duration}s)...[/cyan]") - - cmd = [ - 'ffmpeg', - '-i', audio_path, - '-af', f'silencedetect=noise={noise_db}dB:duration={min_duration}', - '-f', 'null', - '-', - '-v', 'info' - ] - - try: - result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + if not vocals_path: + rprint(f"[red]❌ 完全找不到人声文件[/red]") + return None - silence_periods = [] - current_silence_start = None + # 检查找到的文件 + if not os.path.exists(vocals_path): + rprint(f"[red]❌ 人声文件不存在: {vocals_path}[/red]") + return None - for line in result.stderr.split('\n'): - line = line.strip() - - # 解析 silence_start - if 'silence_start:' in line: - try: - start_part = line.split('silence_start:')[1].strip() - silence_start = float(start_part.split()[0]) - current_silence_start = silence_start - except Exception: - continue - - # 解析 silence_end - elif 'silence_end:' in line and current_silence_start is not None: - try: - parts = line.split('silence_end:')[1] - - if '|' in parts: - end_part = parts.split('|')[0].strip() - duration_part = parts.split('silence_duration:')[1].strip() - silence_end = float(end_part) - silence_duration = float(duration_part) - else: - silence_end = float(parts.strip()) - silence_duration = silence_end - current_silence_start - - if silence_duration >= min_duration: - silence_periods.append({ - 'start': current_silence_start, - 'end': silence_end, - 'duration': silence_duration, - 'center': (current_silence_start + silence_end) / 2 - }) - - current_silence_start = None - - except Exception: - continue + vocals_size = os.path.getsize(vocals_path) + rprint(f"[green] ✓ 人声文件大小: {vocals_size/1024:.1f}KB[/green]") - if silence_periods: - rprint(f"[green]✓ 找到 {len(silence_periods)} 个静音段 ({noise_db}dB, ≥{min_duration}s)[/green]") - - # 按时长分类 - short_silences = [s for s in silence_periods if 0.1 <= s['duration'] < 0.5] - medium_silences = [s for s in silence_periods if 0.5 <= s['duration'] < 1.0] - long_silences = [s for s in silence_periods if s['duration'] >= 1.0] - - rprint(f" [dim]短静音(0.1-0.5s): {len(short_silences)} | " - f"中静音(0.5-1.0s): {len(medium_silences)} | " - f"长静音(1.0s+): {len(long_silences)}[/dim]") - - # 显示详细信息 - for i, period in enumerate(silence_periods[:10]): - silence_type = "🔸" if period['duration'] < 0.5 else "🔹" if period['duration'] < 1.0 else "🔶" - rprint(f" {silence_type} {i+1:2d}. {format_time(period['start'])} - {format_time(period['end'])} " - f"({period['duration']:.3f}s) 中点: {format_time(period['center'])}") - - if len(silence_periods) > 10: - rprint(f" ... 还有 {len(silence_periods) - 10} 个静音段") - else: - rprint(f"[yellow]⚠️ 未找到符合条件的静音段 ({noise_db}dB, ≥{min_duration}s)[/yellow]") + if vocals_size < 1024: # 小于1KB可能是空文件 + rprint(f"[yellow]⚠️ 人声文件太小,可能分离失败[/yellow]") - return silence_periods + # 移动到输出目录 + final_vocals_path = os.path.join(output_dir, f"{audio_name}_vocals.mp3") + rprint(f"[cyan] 📁 目标路径: {final_vocals_path}[/cyan]") + # 转换为mp3格式 + if vocals_path.endswith('.wav'): + rprint(f"[cyan] 🔄 转换WAV到MP3[/cyan]") + convert_cmd = [ + 'ffmpeg', '-i', vocals_path, + '-acodec', 'libmp3lame', + '-ab', '192k', + final_vocals_path, '-y' + ] + convert_result = subprocess.run(convert_cmd, capture_output=True, text=True, timeout=60) + + if convert_result.returncode != 0: + rprint(f"[red]❌ 格式转换失败[/red]") + rprint(f"[red]stderr: {convert_result.stderr}[/red]") + return None + else: + rprint(f"[cyan] 📋 复制文件[/cyan]") + import shutil + shutil.copy2(vocals_path, final_vocals_path) + + # 验证最终文件 + if os.path.exists(final_vocals_path): + final_size = os.path.getsize(final_vocals_path) + rprint(f"[green] ✅ 人声分离完成: {os.path.basename(final_vocals_path)} ({final_size/1024:.1f}KB)[/green]") + + # 清理临时目录 + try: + import shutil + shutil.rmtree(temp_dir) + rprint(f"[dim] 🧹 清理临时目录[/dim]") + except: + pass + + return final_vocals_path + else: + rprint(f"[red]❌ 最终文件创建失败[/red]") + return None + + except subprocess.TimeoutExpired: + rprint(f"[red]❌ Demucs执行超时 (>300秒)[/red]") + return None except Exception as e: - rprint(f"[red]❌ 静音检测失败: {e}[/red]") - return [] + rprint(f"[red]❌ 人声分离错误: {e}[/red]") + import traceback + rprint(f"[red]详细错误: {traceback.format_exc()}[/red]") + return None -def detect_speech_pauses_fixed(audio_path, audio_type="音频"): - """修复的人声停顿检测""" - rprint(f"[cyan]🎤 检测{audio_type}中的人声停顿...[/cyan]") - - # 精细参数配置 +def detect_speech_pauses_in_segment(vocals_path): + """检测音频片段中的人声停顿""" speech_configs = [ (-15, 0.05, "词间停顿(-15dB, 50ms)", "词间"), (-18, 0.05, "短句停顿(-18dB, 50ms)", "短句"), @@ -334,7 +261,7 @@ def detect_speech_pauses_fixed(audio_path, audio_type="音频"): for noise_db, min_duration, desc, pause_type in speech_configs: cmd = [ 'ffmpeg', - '-i', audio_path, + '-i', vocals_path, '-af', f'silencedetect=noise={noise_db}dB:duration={min_duration}', '-f', 'null', '-', @@ -399,559 +326,561 @@ def detect_speech_pauses_fixed(audio_path, audio_type="音频"): 'long': len(long_pauses) } all_results.append(result_info) - - if silence_periods: - rprint(f" [green]✓ {desc}: {len(silence_periods):3d} 个停顿[/green] " - f"[dim](微:{len(micro_pauses)} 短:{len(short_pauses)} 中:{len(medium_pauses)} 长:{len(long_pauses)})[/dim]") - - # 显示前3个停顿 - for i, period in enumerate(silence_periods[:3]): - if period['duration'] < 0.1: - icon = "🔸" - elif period['duration'] < 0.2: - icon = "🔹" - elif period['duration'] < 0.5: - icon = "🔷" - else: - icon = "🔶" - - rprint(f" {icon} {i+1}. {format_time(period['start'])} - {format_time(period['end'])} " - f"({period['duration']*1000:5.0f}ms) [{period['type']}]") - - if len(silence_periods) > 3: - rprint(f" ... 还有 {len(silence_periods) - 3} 个停顿") - else: - rprint(f" [red]✗ {desc}: 0 个停顿[/red]") except Exception as e: - rprint(f" [red]✗ {desc}: 检测失败 - {e}[/red]") + continue return all_results -def find_optimal_speech_cuts_fixed(all_results, target_interval_minutes=30, total_duration=None): - """从人声停顿中找到最佳切分点""" - rprint(f"\n[cyan]🎯 从人声停顿中寻找{target_interval_minutes}分钟间隔的最佳切分点...[/cyan]") - - if not total_duration: - rprint("[red]❌ 需要提供总时长[/red]") - return [] - - # 选择最佳的检测结果 - best_result = None - - for result in all_results: - count = result['count'] - config = result['config'] - - if count >= 3: # 至少要有3个停顿 - score = 0 - - # 基础分数 - if 5 <= count <= 30: - score += 10 - elif count >= 3: - score += 5 - - # 停顿类型加分 - score += result['short'] * 2 - score += result['medium'] * 1.5 - score += result['micro'] * 1 - - # 噪音阈值加分 - if config[0] >= -20: - score += 3 - elif config[0] >= -25: - score += 2 - - # 时长加分 - if 0.05 <= config[1] <= 0.15: - score += 3 - elif 0.05 <= config[1] <= 0.2: - score += 2 - - result['score'] = score - - if best_result is None or score > best_result['score']: - best_result = result - - if not best_result: - rprint("[red]❌ 未找到合适的停顿检测结果[/red]") - return [] - - config = best_result['config'] - silences = best_result['silences'] - - rprint(f"[green]🏆 选择最佳配置: {config[2]} (评分: {best_result['score']:.1f})[/green]") - rprint(f"[yellow]📊 停顿统计: 总计{len(silences)}个 | " - f"微停顿{best_result['micro']}个 | 短停顿{best_result['short']}个 | " - f"中停顿{best_result['medium']}个 | 长停顿{best_result['long']}个[/yellow]") +# ==================== 主要功能函数 ==================== +def generate_cut_plan(input_video_path, output_dir, target_interval=30): + """ + 函数1: 生成切分计划 + 输入长视频,每隔30分钟进行切分检测,输出执行计划 + """ + rprint(Panel.fit("[bold magenta]🎯 生成智能切分计划[/bold magenta]", border_style="magenta")) + + # 检查文件和环境 + if not os.path.exists(input_video_path): + rprint(f"[bold red]❌ 文件不存在: {input_video_path}[/bold red]") + return None - # 计算目标切分点 - target_seconds = target_interval_minutes * 60 - target_points = [] + if not check_demucs_installation(): + rprint("[red]❌ Demucs未安装,请运行: pip install demucs[/red]") + return None - current = target_seconds - while current < total_duration - 60: - target_points.append(current) - current += target_seconds + # 获取视频信息 + total_duration = get_video_duration(input_video_path) + if total_duration is None: + return None - if not target_points: - rprint(f"[yellow]⚠️ 音频时长不足以按{target_interval_minutes}分钟切分[/yellow]") - return [] + rprint(f"[green]✓ 视频文件[/green]: [cyan]{os.path.basename(input_video_path)}[/cyan]") + rprint(f"[green]✓ 视频时长[/green]: [yellow]{format_time(total_duration)}[/yellow]") - rprint(f"[yellow]🎯 目标切分点: {len(target_points)} 个[/yellow]") + # 创建输出目录 + os.makedirs(output_dir, exist_ok=True) + # 💾 定义保存文件路径 + progress_file = os.path.join(output_dir, "cut_progress.json") + plan_file = os.path.join(output_dir, "cut_plan.json") + + # 计算检测点 + interval_seconds = target_interval * 60 + detection_points = [] + + current_time = interval_seconds + while current_time < total_duration: + detection_points.append(current_time) + current_time += interval_seconds + + if not detection_points: + rprint(f"[yellow]⚠️ 视频时长不足{target_interval}分钟,无需切分[/yellow]") + # 返回单段计划 + plan = { + 'input_video': input_video_path, + 'total_duration': total_duration, + 'target_interval': target_interval, + 'cut_points': [], + 'segments': [{ + 'index': 1, + 'start': 0, + 'end': total_duration, + 'duration': total_duration, + 'cut_type': 'whole' + }] + } + return plan + + # 🔄 检查是否有已保存的进度 cut_points = [] + start_index = 0 - for i, target_point in enumerate(target_points): - rprint(f"[yellow]🔍 切分点 {i+1} (目标: {format_time(target_point)}):[/yellow]") - - # 在目标点前后寻找最佳停顿 - search_ranges = [15, 30, 60, 120, 300] - - found_cut = False - - for search_range in search_ranges: - if found_cut: - break - - search_start = max(0, target_point - search_range) - search_end = min(total_duration, target_point + search_range) - - # 找到范围内的停顿 - candidates = [] - for silence in silences: - if search_start <= silence['center'] <= search_end: - distance = abs(silence['center'] - target_point) - - # 评分系统 - duration_score = 1.0 - if 0.1 <= silence['duration'] <= 0.3: - duration_score = 2.0 - elif 0.05 <= silence['duration'] <= 0.5: - duration_score = 1.5 - - distance_score = 1.0 / (distance + 1) - - type_score = 1.0 - if silence['type'] in ['句间', '自然', '段落']: - type_score = 1.5 - elif silence['type'] in ['短句', '长句间']: - type_score = 1.3 - - total_score = duration_score * distance_score * type_score - - candidates.append({ - 'silence': silence, - 'distance': distance, - 'score': total_score - }) + if os.path.exists(progress_file): + try: + with open(progress_file, 'r', encoding='utf-8') as f: + progress_data = json.load(f) - if candidates: - candidates.sort(key=lambda x: (-x['score'], x['distance'])) - best = candidates[0] + # 验证进度文件是否匹配当前任务 + if (progress_data.get('input_video') == input_video_path and + abs(progress_data.get('total_duration', 0) - total_duration) < 1): - cut_points.append({ - 'target': target_point, - 'actual': best['silence']['center'], - 'deviation': best['silence']['center'] - target_point, - 'silence_start': best['silence']['start'], - 'silence_end': best['silence']['end'], - 'silence_duration': best['silence']['duration'], - 'silence_type': best['silence']['type'], - 'search_range': search_range, - 'score': best['score'] - }) + cut_points = progress_data.get('completed_cut_points', []) + start_index = len(cut_points) - rprint(f" [green]✓ 找到停顿: {format_time(best['silence']['center'])} " - f"(偏差 {best['silence']['center'] - target_point:+.1f}s, " - f"停顿 {best['silence']['duration']*1000:.0f}ms, " - f"类型: {best['silence']['type']}, " - f"搜索范围 ±{search_range}s)[/green]") + if start_index > 0: + rprint(f"[green]🔄 发现已有进度: 已完成 {start_index}/{len(detection_points)} 个切分点[/green]") + for point in cut_points: + rprint(f"[dim] ✓ {format_time(point['target'])} -> {format_time(point['actual'])}[/dim]") + except: + rprint(f"[yellow]⚠️ 无法加载进度文件,重新开始[/yellow]") + + rprint(f"[cyan]📍 计划检测 {len(detection_points)} 个切分点[/cyan]") + + # 对每个检测点进行分析 + try: + for i, target_time in enumerate(detection_points): + # 跳过已完成的点 + if i < start_index: + continue - found_cut = True + rprint(f"\n[yellow]🎯 分析切分点 {i+1}/{len(detection_points)} (目标: {format_time(target_time)})[/yellow]") + + cut_point = detect_optimal_cut_point( + input_video_path, + target_time, + total_duration, + output_dir, + i+1 + ) + + if cut_point: + cut_points.append(cut_point) + rprint(f"[green]✅ 找到切分点: {format_time(cut_point['actual'])} (偏差: {cut_point['deviation']:+.1f}s)[/green]") else: - rprint(f" [yellow]⚠️ ±{search_range}s范围内无合适停顿[/yellow]") - - if not found_cut: - fallback_time = min(target_point + 30, total_duration - 30) - cut_points.append({ - 'target': target_point, - 'actual': fallback_time, - 'deviation': fallback_time - target_point, - 'silence_start': fallback_time, - 'silence_end': fallback_time, - 'silence_duration': 0, - 'silence_type': 'fallback', - 'search_range': 0, - 'score': 0 - }) - rprint(f" [red]✗ 无合适停顿,使用备选点 {format_time(fallback_time)}[/red]") - - return cut_points - -def extract_audio_from_video_large(video_path, output_audio_path, timeout_minutes=10): - """从大视频中提取音频,增加超时时间""" - rprint(f"[cyan]🎵 提取音频 (大文件模式): {os.path.basename(video_path)} -> {os.path.basename(output_audio_path)}[/cyan]") + # 使用备选点 + fallback_point = { + 'target': target_time, + 'actual': target_time, + 'deviation': 0, + 'silence_duration': 0, + 'silence_type': 'fallback', + 'confidence': 'low' + } + cut_points.append(fallback_point) + rprint(f"[yellow]⚠️ 使用备选点: {format_time(target_time)}[/yellow]") + + # 💾 每完成一个点就保存进度 + try: + progress_data = { + 'input_video': input_video_path, + 'total_duration': total_duration, + 'target_interval': target_interval, + 'completed_cut_points': cut_points, + 'progress': f"{len(cut_points)}/{len(detection_points)}" + } + with open(progress_file, 'w', encoding='utf-8') as f: + json.dump(progress_data, f, ensure_ascii=False, indent=2) + rprint(f"[dim]💾 进度已保存 ({len(cut_points)}/{len(detection_points)})[/dim]") + except: + pass + + except KeyboardInterrupt: + rprint(f"\n[yellow]⚠️ 用户中断,进度已保存,可重新运行继续[/yellow]") + return None - cmd = [ - 'ffmpeg', - '-i', video_path, - '-vn', - '-acodec', 'libmp3lame', - '-ab', '128k', - '-ar', '22050', - '-ac', '1', - output_audio_path, - '-y' - ] + # 生成段落信息 + segments = [] - try: - timeout_seconds = timeout_minutes * 60 - with console.status(f"[yellow]🎵 提取音频中... (最多等待{timeout_minutes}分钟)", spinner="dots"): - subprocess.run(cmd, check=True, capture_output=True, timeout=timeout_seconds) - rprint(f"[green]✓ 音频提取完成: {os.path.basename(output_audio_path)}[/green]") - return True - except subprocess.TimeoutExpired: - rprint(f"[red]❌ 音频提取超时({timeout_minutes}分钟)[/red]") - return False - except Exception as e: - rprint(f"[red]❌ 音频提取失败: {e}[/red]") - return False - -def find_cut_points_from_silences(silences, target_interval_minutes=30, total_duration=None): - """从静音段中找到最佳切分点""" - rprint(f"[cyan]🎯 从静音段中寻找{target_interval_minutes}分钟间隔的切分点...[/cyan]") + # 第一段:从开始到第一个切分点 + if cut_points: + segments.append({ + 'index': 1, + 'start': 0, + 'end': cut_points[0]['actual'], + 'duration': cut_points[0]['actual'], + 'cut_type': 'start' + }) + + # 中间段落 + for i in range(len(cut_points) - 1): + segments.append({ + 'index': i + 2, + 'start': cut_points[i]['actual'], + 'end': cut_points[i + 1]['actual'], + 'duration': cut_points[i + 1]['actual'] - cut_points[i]['actual'], + 'cut_type': 'middle' + }) + + # 最后一段:从最后一个切分点到结束 + segments.append({ + 'index': len(cut_points) + 1, + 'start': cut_points[-1]['actual'], + 'end': total_duration, + 'duration': total_duration - cut_points[-1]['actual'], + 'cut_type': 'end' + }) - if not total_duration: - rprint("[red]❌ 需要提供总时长[/red]") - return [] + # 创建切分计划 + plan = { + 'input_video': input_video_path, + 'total_duration': total_duration, + 'target_interval': target_interval, + 'cut_points': cut_points, + 'segments': segments + } - # 计算目标切分点 - target_seconds = target_interval_minutes * 60 - target_points = [] + # 保存计划到文件 + with open(plan_file, 'w', encoding='utf-8') as f: + json.dump(plan, f, ensure_ascii=False, indent=2) - current = target_seconds - while current < total_duration - 60: - target_points.append(current) - current += target_seconds + rprint(f"[green]✓ 切分计划已保存: {plan_file}[/green]") - if not target_points: - rprint(f"[yellow]⚠️ 音频时长不足以按{target_interval_minutes}分钟切分[/yellow]") - return [] + # 🧹 完成后清理进度文件 + try: + if os.path.exists(progress_file): + os.remove(progress_file) + except: + pass - rprint(f"[yellow]🎯 目标切分点: {len(target_points)} 个[/yellow]") - rprint(f"[yellow]📊 可用静音段: {len(silences)} 个[/yellow]") + return plan + +def detect_optimal_cut_point(input_video_path, target_time, total_duration, output_dir, point_index): + """ + 函数2: 切分检测函数 (简化版) + 在指定时间点附近检测最佳切分位置 + - 使用30秒分析窗口 + - 只检测-25dB以下的静音 + - 选择窗口内最后一个静音点作为切分点 + """ + # 定义分析窗口:目标时间前后各30秒 + window_size = 30 # 30秒 + start_time = max(0, target_time - window_size) + end_time = min(total_duration, target_time + window_size) + analysis_duration = end_time - start_time + + rprint(f"[cyan] 📊 分析窗口: {format_time(start_time)} - {format_time(end_time)} (±{window_size}s)[/cyan]") + + # 提取分析片段 + segment_path = os.path.join(output_dir, f"temp_segment_{point_index}.mp4") + if not extract_video_segment(input_video_path, start_time, analysis_duration, segment_path): + rprint(f"[yellow] ⚠️ 提取片段失败,使用目标时间[/yellow]") + return { + 'target': target_time, + 'actual': target_time, + 'deviation': 0, + 'silence_duration': 0, + 'silence_type': 'fallback', + 'confidence': 'low', + 'reason': 'extract_failed' + } + + # 提取音频 + audio_path = os.path.join(output_dir, f"temp_audio_{point_index}.mp3") + if not extract_audio_from_video(segment_path, audio_path): + rprint(f"[yellow] ⚠️ 提取音频失败,使用目标时间[/yellow]") + if os.path.exists(segment_path): + os.remove(segment_path) + return { + 'target': target_time, + 'actual': target_time, + 'deviation': 0, + 'silence_duration': 0, + 'silence_type': 'fallback', + 'confidence': 'low', + 'reason': 'audio_failed' + } + + # 分离人声 + vocals_path = separate_vocals_with_demucs(audio_path, output_dir) + if not vocals_path: + rprint(f"[yellow] ⚠️ 人声分离失败,使用目标时间[/yellow]") + for temp_file in [segment_path, audio_path]: + if os.path.exists(temp_file): + os.remove(temp_file) + return { + 'target': target_time, + 'actual': target_time, + 'deviation': 0, + 'silence_duration': 0, + 'silence_type': 'fallback', + 'confidence': 'low', + 'reason': 'vocals_failed' + } + + # 检测30秒窗口内的所有静音段:-25dB,最小时长50ms + rprint(f"[cyan] 🔍 检测30秒窗口内的静音段 (-25dB, ≥50ms)[/cyan]") - cut_points = [] + cmd = [ + 'ffmpeg', + '-i', vocals_path, + '-af', 'silencedetect=noise=-25dB:duration=0.05', + '-f', 'null', + '-', + '-v', 'info' + ] - for i, target_point in enumerate(target_points): - rprint(f"[yellow]🔍 切分点 {i+1} (目标: {format_time(target_point)}):[/yellow]") - - # 在目标点前后寻找最佳静音段 - search_ranges = [30, 60, 120, 300, 600] + silences = [] + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) - found_cut = False + current_silence_start = None - for search_range in search_ranges: - if found_cut: - break - - search_start = max(0, target_point - search_range) - search_end = min(total_duration, target_point + search_range) + for line in result.stderr.split('\n'): + line = line.strip() - # 找到范围内的静音段 - candidates = [] - for silence in silences: - if search_start <= silence['center'] <= search_end: - distance = abs(silence['center'] - target_point) - # 评分:静音时长越长越好,距离目标点越近越好 - score = silence['duration'] / (distance + 1) - candidates.append({ - 'silence': silence, - 'distance': distance, - 'score': score - }) + # 解析 silence_start + if 'silence_start:' in line: + try: + start_part = line.split('silence_start:')[1].strip() + silence_start = float(start_part.split()[0]) + current_silence_start = silence_start + except Exception: + continue - if candidates: - # 按评分排序 - candidates.sort(key=lambda x: (-x['score'], x['distance'])) - best = candidates[0] - - cut_points.append({ - 'target': target_point, - 'actual': best['silence']['center'], - 'deviation': best['silence']['center'] - target_point, - 'silence_start': best['silence']['start'], - 'silence_end': best['silence']['end'], - 'silence_duration': best['silence']['duration'], - 'search_range': search_range - }) - - rprint(f" [green]✓ 切分点: {format_time(best['silence']['center'])} " - f"(偏差 {best['silence']['center'] - target_point:+.1f}s, " - f"静音 {best['silence']['duration']:.3f}s, " - f"搜索范围 ±{search_range}s)[/green]") - - found_cut = True - else: - rprint(f" [yellow]⚠️ ±{search_range}s范围内无静音段[/yellow]") - - if not found_cut: - fallback_time = min(target_point + 60, total_duration - 60) - cut_points.append({ - 'target': target_point, - 'actual': fallback_time, - 'deviation': fallback_time - target_point, - 'silence_start': fallback_time, - 'silence_end': fallback_time, - 'silence_duration': 0, - 'search_range': 0, - 'type': 'fallback' - }) - rprint(f" [red]✗ 无合适静音段,使用备选点 {format_time(fallback_time)}[/red]") - - return cut_points + # 解析 silence_end + elif 'silence_end:' in line and current_silence_start is not None: + try: + parts = line.split('silence_end:')[1] + + if '|' in parts: + end_part = parts.split('|')[0].strip() + duration_part = parts.split('silence_duration:')[1].strip() + silence_end = float(end_part) + silence_duration = float(duration_part) + else: + silence_end = float(parts.strip()) + silence_duration = silence_end - current_silence_start + + if silence_duration >= 0.05: # 至少50ms + silences.append({ + 'start': current_silence_start, + 'end': silence_end, + 'duration': silence_duration, + 'center': (current_silence_start + silence_end) / 2, + 'absolute_center': start_time + (current_silence_start + silence_end) / 2, + 'type': 'detected' + }) + + current_silence_start = None + + except Exception: + continue + + except Exception as e: + rprint(f"[red] ❌ 静音检测失败: {e}[/red]") + silences = [] + + if not silences: + rprint(f"[yellow] ⚠️ 未检测到静音段,使用目标时间[/yellow]") + # 清理临时文件 + for temp_file in [segment_path, audio_path, vocals_path]: + if os.path.exists(temp_file): + os.remove(temp_file) + return { + 'target': target_time, + 'actual': target_time, + 'deviation': 0, + 'silence_duration': 0, + 'silence_type': 'fallback', + 'confidence': 'low', + 'reason': 'no_silences' + } + + rprint(f"[green] ✓ 检测到 {len(silences)} 个静音段[/green]") + + # 显示所有静音段的信息 + for i, silence in enumerate(silences): + rprint(f" {i+1}. {format_time(silence['absolute_center'])} (时长: {silence['duration']*1000:.0f}ms)") + + # 选择最后一个静音段作为切分点 + last_silence = silences[-1] + absolute_time = last_silence['absolute_center'] + + best_candidate = { + 'target': target_time, + 'actual': absolute_time, + 'deviation': absolute_time - target_time, + 'silence_duration': last_silence['duration'], + 'silence_type': last_silence['type'], + 'confidence': 'high', + 'strategy': 'last_silence', + 'total_silences': len(silences) + } + + # 清理临时文件 + for temp_file in [segment_path, audio_path, vocals_path]: + if os.path.exists(temp_file): + os.remove(temp_file) + + # 输出结果 + rprint(f"[green] ✅ 选择最后一个静音段: {format_time(absolute_time)} | " + f"偏差: {best_candidate['deviation']:+.1f}s | " + f"静音: {best_candidate['silence_duration']*1000:.0f}ms | " + f"总静音段: {len(silences)}个[/green]") + + return best_candidate -def process_video_segments_25db(input_path, output_dir, segment_duration=30, target_interval=30): - """处理视频片段并基于人声停顿检测切分点""" - - rprint(Panel.fit("[bold magenta]🚀 基于人声停顿的智能切分工具 (修复版)[/bold magenta]", border_style="magenta")) - - # 检查文件 - if not os.path.exists(input_path): - rprint(f"[bold red]❌ 文件不存在: {input_path}[/bold red]") - return - - rprint(f"[green]✓ 输入文件[/green]: [cyan]{os.path.basename(input_path)}[/cyan]") - - # 获取视频信息 - total_duration = get_video_duration(input_path) - if total_duration is None: - return - - rprint(f"[green]✓ 视频总时长[/green]: [yellow]{format_time(total_duration)}[/yellow]") - - # 检查Demucs - if not check_demucs_installation(): - rprint("[red]❌ Demucs未安装,请运行: pip install demucs[/red]") - return +def execute_cut_plan(plan, output_dir): + """ + 函数3: 执行切分计划 + 根据切分计划实际切分视频 + """ + rprint(Panel.fit("[bold green]🚀 执行视频切分[/bold green]", border_style="green")) + + input_video = plan['input_video'] + segments = plan['segments'] + + if not os.path.exists(input_video): + rprint(f"[red]❌ 源视频文件不存在: {input_video}[/red]") + return False # 创建输出目录 - os.makedirs(output_dir, exist_ok=True) - - # 提取测试片段进行分析 - test_segments = [] - - # 开头片段 - if total_duration > segment_duration: - test_segments.append({ - 'name': 'start', - 'start': 0, - 'duration': segment_duration, - 'desc': f'开头{segment_duration}秒' - }) - - # 中间片段 - if total_duration > segment_duration * 4: - middle_start = (total_duration - segment_duration) / 2 - test_segments.append({ - 'name': 'middle', - 'start': middle_start, - 'duration': segment_duration, - 'desc': f'中间{segment_duration}秒' - }) - - if not test_segments: - rprint(f"[red]❌ 视频太短,无法提取测试片段[/red]") - return + segments_dir = os.path.join(output_dir, "segments") + os.makedirs(segments_dir, exist_ok=True) - rprint(f"[cyan]📋 将分析 {len(test_segments)} 个测试片段[/cyan]") + rprint(f"[cyan]📁 输出目录: {segments_dir}[/cyan]") + rprint(f"[cyan]🎬 开始切分 {len(segments)} 个片段...[/cyan]") - best_vocals_path = None + success_count = 0 - # 处理测试片段 - for segment in test_segments: - rprint(f"\n[yellow]🎬 处理{segment['desc']}片段...[/yellow]") - - # 提取视频片段 - video_segment_path = os.path.join(output_dir, f"segment_{segment['name']}.mp4") - if not extract_video_segment(input_path, segment['start'], segment['duration'], video_segment_path): - continue + for segment in segments: + segment_name = f"segment_{segment['index']:02d}.mp4" + output_path = os.path.join(segments_dir, segment_name) - # 提取音频 - audio_path = os.path.join(output_dir, f"segment_{segment['name']}_audio.mp3") - if not extract_audio_from_video(video_segment_path, audio_path): - continue - - # 分析原始音频的静音段 - rprint(f"[cyan]📊 分析{segment['desc']}原始音频的静音段:[/cyan]") - original_silences = detect_silence_fixed(audio_path, noise_db=-25, min_duration=0.1) + rprint(f"\n[yellow]✂️ 切分片段 {segment['index']}: {format_time(segment['start'])} - {format_time(segment['end'])}[/yellow]") - # 分离人声 - vocals_path, no_vocals_path = separate_vocals_with_demucs(audio_path, output_dir) + cmd = [ + 'ffmpeg', + '-i', input_video, + '-ss', str(segment['start']), + '-t', str(segment['duration']), + '-c', 'copy', + output_path, + '-y' + ] - if vocals_path: - best_vocals_path = vocals_path - - # 分析人声的静音段 - rprint(f"[cyan]📊 分析{segment['desc']}纯人声的静音段:[/cyan]") - vocal_silences = detect_silence_fixed(vocals_path, noise_db=-25, min_duration=0.1) - - # 分析人声的精细停顿 - rprint(f"[cyan]🎤 分析{segment['desc']}纯人声的精细停顿:[/cyan]") - speech_pauses = detect_speech_pauses_fixed(vocals_path, f"{segment['desc']}纯人声") - - # 对比分析 - rprint(f"[yellow]📈 对比分析:[/yellow]") - rprint(f" 原始音频静音段: {len(original_silences)} 个") - rprint(f" 纯人声静音段: {len(vocal_silences)} 个") - - # 统计精细停顿 - total_speech_pauses = sum(result['count'] for result in speech_pauses) - rprint(f" 纯人声精细停顿: {total_speech_pauses} 个") - - if len(vocal_silences) > len(original_silences): - rprint(f" [green]✓ 人声分离后检测到更多静音段 (+{len(vocal_silences) - len(original_silences)})[/green]") - elif len(vocal_silences) == len(original_silences): - rprint(f" [yellow]= 静音段数量相同[/yellow]") - else: - rprint(f" [red]- 人声分离后静音段减少 ({len(vocal_silences) - len(original_silences)})[/red]") + try: + with console.status(f"[yellow]处理片段 {segment['index']}...", spinner="dots"): + result = subprocess.run(cmd, capture_output=True, text=True, timeout=300) - if total_speech_pauses > 0: - rprint(f" [green]✓ 成功检测到人声精细停顿![/green]") + if result.returncode == 0: + file_size = os.path.getsize(output_path) / 1024 / 1024 # MB + rprint(f"[green]✅ 片段 {segment['index']} 完成: {segment_name} ({file_size:.1f}MB)[/green]") + success_count += 1 else: - rprint(f" [yellow]⚠️ 未检测到精细停顿[/yellow]") + rprint(f"[red]❌ 片段 {segment['index']} 失败: {result.stderr}[/red]") + + except subprocess.TimeoutExpired: + rprint(f"[red]❌ 片段 {segment['index']} 超时[/red]") + except Exception as e: + rprint(f"[red]❌ 片段 {segment['index']} 错误: {e}[/red]") + + # 生成切分报告 + report_file = os.path.join(output_dir, "cut_report.txt") + with open(report_file, 'w', encoding='utf-8') as f: + f.write("视频切分报告\n") + f.write("=" * 50 + "\n\n") + f.write(f"源视频: {os.path.basename(input_video)}\n") + f.write(f"总时长: {format_time(plan['total_duration'])}\n") + f.write(f"目标间隔: {plan['target_interval']} 分钟\n") + f.write(f"切分点数: {len(plan['cut_points'])}\n") + f.write(f"生成片段: {len(segments)}\n") + f.write(f"成功片段: {success_count}\n") + f.write(f"成功率: {success_count/len(segments)*100:.1f}%\n\n") - rprint(f"[green]✅ {segment['desc']}片段分析完成[/green]") + f.write("片段详情:\n") + f.write("-" * 30 + "\n") + for segment in segments: + f.write(f"片段 {segment['index']:2d}: {format_time(segment['start'])} - {format_time(segment['end'])} ({format_time(segment['duration'])})\n") - # 如果有人声文件,进行完整视频的切分点分析 - if best_vocals_path: - rprint(f"\n[cyan]🎯 基于人声进行完整视频的{target_interval}分钟间隔切分分析...[/cyan]") - - # 提取完整音频进行分析 - full_audio_path = os.path.join(output_dir, "full_audio.mp3") - if extract_audio_from_video_large(input_path, full_audio_path, timeout_minutes=15): - # 分离完整音频的人声 - full_vocals_path, _ = separate_vocals_with_demucs(full_audio_path, output_dir) - - if full_vocals_path: - # 尝试精细停顿切分 - rprint(f"[cyan]🎤 尝试基于人声精细停顿进行切分...[/cyan]") - speech_results = detect_speech_pauses_fixed(full_vocals_path, "完整人声") - speech_cut_points = find_optimal_speech_cuts_fixed(speech_results, target_interval, total_duration) - - # 备选的静音段切分 - rprint(f"[cyan]🔍 尝试基于静音段进行切分...[/cyan]") - silence_cut_points = [] - full_silences = detect_silence_fixed(full_vocals_path, noise_db=-25, min_duration=0.3) - - if full_silences: - # 使用静音段进行切分 - silence_cut_points = find_cut_points_from_silences(full_silences, target_interval, total_duration) - - # 选择最佳切分方案 - final_cut_points = [] - cut_method = "" - - if speech_cut_points and len(speech_cut_points) > 0: - final_cut_points = speech_cut_points - cut_method = "人声精细停顿" - rprint(f"[green]🏆 选择人声精细停顿切分方案[/green]") - elif silence_cut_points and len(silence_cut_points) > 0: - final_cut_points = silence_cut_points - cut_method = "静音段" - rprint(f"[yellow]⚠️ 使用静音段切分方案[/yellow]") - else: - rprint(f"[red]❌ 两种切分方案都未找到合适的切分点[/red]") - - if final_cut_points: - # 生成段落信息 - segments = generate_cut_segments(final_cut_points, total_duration) - - rprint(f"\n[green]🎉 使用{cut_method}找到 {len(final_cut_points)} 个切分点,生成 {len(segments)} 个段落:[/green]") - - total_segments_duration = 0 - for segment in segments: - cut_type_desc = "精细停顿" if 'silence_type' in final_cut_points[0] and final_cut_points[0]['silence_type'] != 'fallback' else "静音切分" if segment['cut_type'] == 'silence_cut' else "备选切分" if segment['cut_type'] == 'fallback' else "最终段" - rprint(f" 📹 段落 {segment['index']:2d}: {format_time(segment['start'])} - {format_time(segment['end'])} " - f"({format_time(segment['duration'])}) [{cut_type_desc}]") - total_segments_duration += segment['duration'] - - rprint(f"\n[cyan]📊 切分统计:[/cyan]") - rprint(f" 总时长: {format_time(total_duration)}") - rprint(f" 段落总时长: {format_time(total_segments_duration)}") - rprint(f" 平均段落时长: {format_time(total_segments_duration / len(segments))}") - rprint(f" 切分方法: {cut_method}") - - # 保存切分点信息 - cut_points_file = os.path.join(output_dir, "cut_points_speech_fixed.txt") - with open(cut_points_file, 'w', encoding='utf-8') as f: - f.write(f"基于{cut_method}的切分点信息\n") - f.write("=" * 50 + "\n\n") - - f.write("切分点详情:\n") - for i, cp in enumerate(final_cut_points): - f.write(f"切分点 {i+1}: {format_time(cp['actual'])}\n") - f.write(f" 目标时间: {format_time(cp['target'])}\n") - f.write(f" 偏差: {cp['deviation']:+.1f}s\n") - f.write(f" 静音段: {format_time(cp['silence_start'])} - {format_time(cp['silence_end'])}\n") - f.write(f" 静音时长: {cp['silence_duration']:.3f}s\n") - if 'silence_type' in cp: - f.write(f" 停顿类型: {cp['silence_type']}\n") - f.write(f" 搜索范围: ±{cp['search_range']}s\n\n") - - f.write("生成的段落:\n") - for segment in segments: - f.write(f"段落 {segment['index']}: {format_time(segment['start'])} - {format_time(segment['end'])} ({format_time(segment['duration'])})\n") - - rprint(f"[green]✓ 切分点信息已保存到: {cut_points_file}[/green]") - else: - rprint("[red]❌ 未找到合适的切分点[/red]") - - # 显示结果总结 - rprint(Panel( - f"[bold green]🎉 基于人声停顿的智能切分分析完成![/bold green]\n\n" - f"• 分析片段: [blue]{len(test_segments)}[/blue] 个\n" - f"• 目标间隔: [yellow]{target_interval}[/yellow] 分钟\n" - f"• 输出目录: [cyan]{output_dir}[/cyan]\n\n" - f"[dim]💡 优先使用人声精细停顿(50ms起),备选静音段切分\n" - f"🔸 微停顿(50-100ms) 🔹 短停顿(100-200ms) 🔷 中停顿(200-500ms) 🔶 长停顿(500ms+)\n" - f"📋 切分点信息已保存到 cut_points_speech_fixed.txt[/dim]", - title="✨ 完成", - border_style="green" - )) + rprint(f"\n[green]🎉 切分完成! 成功: {success_count}/{len(segments)}[/green]") + rprint(f"[cyan]📋 报告已保存: {report_file}[/cyan]") + + return success_count == len(segments) + +def display_cut_plan(plan): + """显示切分计划的详细信息""" + rprint(Panel.fit("[bold blue]📋 切分计划预览[/bold blue]", border_style="blue")) + + # 基本信息 + rprint(f"[green]📁 源视频[/green]: {os.path.basename(plan['input_video'])}") + rprint(f"[green]⏱️ 总时长[/green]: {format_time(plan['total_duration'])}") + rprint(f"[green]🎯 目标间隔[/green]: {plan['target_interval']} 分钟") + rprint(f"[green]✂️ 切分点[/green]: {len(plan['cut_points'])} 个") + rprint(f"[green]📹 生成片段[/green]: {len(plan['segments'])} 个") + + # 切分点详情 + if plan['cut_points']: + rprint(f"\n[cyan]🎯 切分点详情:[/cyan]") + for i, cp in enumerate(plan['cut_points']): + confidence_color = "green" if cp.get('confidence') == 'high' else "yellow" if cp.get('confidence') == 'medium' else "red" + rprint(f" {i+1}. {format_time(cp['actual'])} (偏差: {cp['deviation']:+.1f}s, 类型: {cp['silence_type']}, 置信度: [{confidence_color}]{cp.get('confidence', 'unknown')}[/{confidence_color}])") + + # 段落预览表格 + rprint(f"\n[cyan]📹 段落预览:[/cyan]") + table = Table(show_header=True, header_style="bold magenta") + table.add_column("片段", style="dim", width=6) + table.add_column("开始时间", style="cyan") + table.add_column("结束时间", style="cyan") + table.add_column("时长", style="yellow") + table.add_column("类型", style="green") + + for segment in plan['segments']: + table.add_row( + f"{segment['index']:02d}", + format_time(segment['start']), + format_time(segment['end']), + format_time(segment['duration']), + segment['cut_type'] + ) + + console.print(table) def main(): - """命令行入口""" - parser = argparse.ArgumentParser(description="基于人声停顿的智能切分工具 (修复版)") + """主函数:组装调用逻辑""" + parser = argparse.ArgumentParser(description="智能视频切分工具") parser.add_argument("--input", "-i", required=True, help="输入视频文件") parser.add_argument("--output", "-o", required=True, help="输出目录") - parser.add_argument("--duration", "-d", type=int, default=30, help="测试片段长度(秒)") parser.add_argument("--interval", "-t", type=int, default=30, help="目标切分间隔(分钟)") + parser.add_argument("--auto", "-a", action="store_true", help="自动执行,不询问确认") args = parser.parse_args() - process_video_segments_25db( - input_path=args.input, - output_dir=args.output, - segment_duration=args.duration, - target_interval=args.interval - ) + # 步骤1: 生成切分计划 + rprint("[bold cyan]步骤 1/3: 生成切分计划[/bold cyan]") + plan = generate_cut_plan(args.input, args.output, args.interval) + + if not plan: + rprint("[red]❌ 生成切分计划失败[/red]") + return + + # 步骤2: 显示计划并确认 + rprint(f"\n[bold cyan]步骤 2/3: 预览切分计划[/bold cyan]") + display_cut_plan(plan) + + # 询问用户确认 + if not args.auto: + if not Confirm.ask("\n[bold yellow]是否确认执行切分计划?[/bold yellow]"): + rprint("[yellow]❌ 用户取消操作[/yellow]") + return + + # 步骤3: 执行切分 + rprint(f"\n[bold cyan]步骤 3/3: 执行视频切分[/bold cyan]") + success = execute_cut_plan(plan, args.output) + + if success: + rprint(Panel( + "[bold green]🎉 视频切分完成![/bold green]\n\n" + f"• 源视频: {os.path.basename(plan['input_video'])}\n" + f"• 生成片段: {len(plan['segments'])} 个\n" + f"• 输出目录: {args.output}/segments\n" + f"• 切分报告: {args.output}/cut_report.txt", + title="✨ 完成", + border_style="green" + )) + else: + rprint("[red]❌ 视频切分过程中出现错误[/red]") if __name__ == "__main__": - import sys - if len(sys.argv) > 1: main() else: # 直接调用示例 - input_video = "/home/darkchunk/code/VideoLingo/output/Learn Solidity Smart Contract Development | Full 2024 Cyfrin Updraft Course.webm" - output_directory = "/home/darkchunk/code/VideoLingo/output/test_speech_cuts_fixed" - - process_video_segments_25db( - input_video, - output_directory, - segment_duration=30, - target_interval=30 # 30分钟间隔 - ) \ No newline at end of file + input_video = "/Users/luogaiyu/code/VideoLingo/videos/Learn Solidity Smart Contract Development | Full 2024 Cyfrin Updraft Course.webm" + output_directory = "/Users/luogaiyu/code/VideoLingo/output/smart_cut_test" + + # 步骤1: 生成切分计划 + rprint("[bold cyan]步骤 1/3: 生成切分计划[/bold cyan]") + plan = generate_cut_plan(input_video, output_directory, target_interval=30) + + if plan: + # 步骤2: 显示计划 + rprint(f"\n[bold cyan]步骤 2/3: 预览切分计划[/bold cyan]") + display_cut_plan(plan) + + # 步骤3: 询问确认并执行 + if Confirm.ask("\n[bold yellow]是否确认执行切分计划?[/bold yellow]"): + rprint(f"\n[bold cyan]步骤 3/3: 执行视频切分[/bold cyan]") + execute_cut_plan(plan, output_directory) + else: + rprint("[yellow]用户取消操作[/yellow]") \ No newline at end of file From b251c10599ca436f6d2b73fa60cd6d93f04436ee Mon Sep 17 00:00:00 2001 From: darkchunk Date: Sun, 28 Dec 2025 16:40:07 +0800 Subject: [PATCH 09/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- batch/tasks_setting.xlsx | Bin 10066 -> 6811 bytes core/all_whisper_methods/whisperX_302.py | 124 +++++++++++++++++------ core/step7_merge_sub_to_vid.py | 122 +++++++++++++++++++--- 3 files changed, 198 insertions(+), 48 deletions(-) diff --git a/batch/tasks_setting.xlsx b/batch/tasks_setting.xlsx index 488cae2378b759a4908f50417bdc9b2f758c84c9..477cc99c20520e547bb2a8286b607bbf7d1e3dae 100644 GIT binary patch literal 6811 zcmcgx2UJsAvkpzAi8N8FNLM;YLKl$UuLww&j#TNr2?V5frAk*?KziuC38D7@QiM>X z2k9@U_rF)(|G)Kmvd+oMto_a2GxN=yeP*l3V_=d10Dv0+9gP%isZK?ic+|f>)O8DW z8QU7E*xTAUaJ;ayV|TT-QiLhuws7KIn{WKXx$4~;>g=0Ogx{pq!PuPwD_N`qj<+^3 zMQz;Nx+pT)L+&$HzLnfzU$W-m?!6 z$b|>`yiiqw7-5$(bAly^yL=K*Wo%v0EYF|#5bGlJX)o#1 zw68t3r^Z-~*P&!8znMT*iTQ9Hwe(&z0D$oCCK%b;n_SwE6Rl|3%83_rA-5x}x)Ae+ z!kWCaG8LZs#6;i3ON!e*2A1?^hw`8J;1f22T3cDtM8!_GrldXJh!|vF8_ujMk{1W# z3ld|YMeW}$k~?}_O)GS4PKpn6^?$!#D?q!Qsp%*Ahg7t`>f6A{u41_?Gn4PZoAjLI zC6b{LL~(A|E(x4d`(~|ipN-R#-mO8|w-ShfR%$Kli|1}yB9=P$mwCCLQ?NNckre6K z$+SaElQ)pAuWvxmliE4O1{mgp;4z8I$S);EF=TZ;a%npU8S|N1p)U{kpx*m)fn&*E zCr(>kiMzGF30n@Ti+gqq=2ZpP68K8@0Rw|W!sH)9l`$wHW{h?9Ov z>6KNG+uk>J!_}bRxLsF+Nsj5U+A5COc@1uQLNU&}d64ZUoE!zc3M~sjE6A&xn5cEe zKlFyYD@pH|&b&4`2i@3O`7~TTz``oyj5oGx%Fi86r6BvN6~;Qe#4G3xV1Q>qhP>s z-GY(ntk^Oeaob#6YH{72h_qt)@$fx>rk(TH=OK&)J(gpqIPyuVTDX|bw&pkf5W@40 zcFZx0p}PsZQue7)ST_N{wm}6IPl#CMoB5WQA)Tl(7)yl;`HiG5zXNH1+V&a9*+F7$ zcQ%iEs+T~&Wt@*i0b1T>Z89wO)M0fgJuh}#QM})-jum_7giiYfn8`+v*j|9rOxzQO zL`LT@aKpBaaMqQC%xKgTN_1hcrL-`6x>`6=(+jCAZ9_Nyv8n9-c$^fyWO^4HbcqmD zA&bvVEP5j~=SKU`7E#BbtI|+8;py|jN3kLXT^oJT zPW@xiHwk0(fT*$kp;3g5)IlJ*q|MA%nwV#4dabg}tNy*FXVu<8qdZou5r@(vWO7Rc zjCrC6ZrQsvTJX z&gStXeE=lsIUC*WD0#m!G5YG$(4OrFZP=^E+6>seYQrYLH$-XT6gG&b2XgV_%a9u8 zY9Z|<1SG>zgcWuqq`y+$hsz^TWiP*08P`z32$I_`{cNX1-HRt^CKN+e>GGcH(Fs!5 zd5JEE^J$_)HMw{LhBY*GMgWxdHo699_>U;t)+0o_EBONdBxa4*TMg@#iNV2>m1fnF ztSzAgs?OluOxkZ5L2qHL5@0tE{kLZE9V)ye)>eh|tWs45)#E#?B=g1wZ11nXhNz|Iyuq%Pek2i|+?e5<=nDjU#L>N2oHvU_!l zWZ{M881)hN9KYSPz$}K|S7KI)Z0}~TH*b6i3k(~e5alC=6v1+j9lv|Yr(@IENccRS z=SYE8j`2f6QTgkhG%?qV!nF5!^&?3Pi*iegI;Obr+q$Xx8cK9$5qA+imza+}PedMK>~t(P3*Y&FYpC z(&OvsgW~CYJu}6+deHF$QIBUS-=v(ur<(02$a!bJk(rSL@RxqzUT79#`~3oUKlFw4 z(9htGLN^wMBK6t(FM1zQA=u~r8s_@gtIg7X!pL{arUO*2S= ziBP)ZS=QYKfp=a>XG!Vx#Z`T*rnbEY@Jb&o>DxLxxU-3Ux)FAIDti zUPA)Fv+?lCvx}K*pEV>beNxotTA}%(?ghp0nNXjyIt389ksIg9DD&sLyhVPUuE*gD(RB24P8} z^idofT!efy(HqW7cD|tC4|6yHF||J@0W)?k4h$~7iU{$5hO`}dr6ev;zLAJse6_l| z1A?^)!=Eg)h_K7NqHfRV-T^u8yXqJ$q}kkm)h<#TwSfNT$ZG!|@YHXMN;%c&e-52t zAwF~@4)G|B^q_4|%>?oCQ-dR@BR!gbsNJ>L9aQV_@Y|hVb)^yV6xtsCvKVMgrYLmp zp5eesnUn15cV45M9L|w2>&OkdiD_)=3VSbZ)IZTbGtJv=9y(iJ~FOoN;nU; zVJ7RuDBd)|kQU4A=i>)cd#u9O9;4}}M!th%TAABe8gfMlVi--(%)rImVMDH8;#Pv8^T6b}&{;_%DB z42Lf}Ui$6-Rs`FLwMogQz(x77S^7u-4pIVMMjrYq#)fhP|1zZiC6N&5qtsA%tJx^C za)D}^Mr)cT-C73&93%_8`75H7zk>X~CQ9o{bX-JD@qq$_)4x27=x~5u5_KpXCC)$v zMrP!HDq3g`X^pWF5^>OUTPxX}d4XJdp$Zg1bjU+@RDhSs;kd{Tk3yx@e^a!taGsrf z578lkin|?mqY}{X7n}XFlL5a7{=Y9GV2_A#B;FoX?di>m8<#F6lL>)h%L2uQ6n%eE zH@WE-oBtD{#9anBNsJBpC1o`B5ROmneg;927Log>I{#l*;m889eo>1g#pbeFI6jqu zvRoz+DptAkKUtqFKBS2D%Xq9`oI%Zia~WnLCnyW(r^j=n9G^b^nJRY~3GTWlHD3|bE@6O~-QOP~m zyaHftBh>z>i*EfCT&K;RI{Az5`AL-%d~*v=)bv8bkNpDU_x;TwG7>uv9RRS#1px4W z?r$6%-KeCU)oVU(M+pfncs;GVt&*mGFi=TR#0|z%G8Td2kf8Z)GZfxMG zB#zm*2}hQ`57?nhuMspBuFhJDq*`I;XZ{F&u$i(hYBE`}UTxTVeb@nC#XYF@H1{R!RkWVoIX#i2<+Sh@ZMdH5nhLe;|jIM^A- zm%H)a=htZm^+MhWh7eMM$;RnL(>|rTY##MGUtmf%6*DMogQrSqLj^+-2TP#5RBi?R z$X<8!yijrb@iPR0(1+dogYG0l*i;&+=H$itRe%ja^;YHPc#MhB?S$|l;nd~_sY_dR zgjo+@wbUiIUyalIw5sZm3|m|?69=9)C0pxEd2P01V%kwaIJe*#sAb>W+ag-#FJ^{f%_$PM*0qx9|@-9{o9@eJ5sqKJmINz^iM>O#Du!OXDU7}SDVx*UjFFa$B8Ul`n@ zk6Za7>Aok@tF%>Pb^N;i2>*9RS4pqGX+kmiHj33KQUmnuO{^R^*e~y?G4eL(TzHQm zL?-$QAKTEm3MWb=f^n-uRKd5^YpKkpsfi^`Oiv}4_m&Q=I@j)RD#!5gyo za*PbiUFTLW_69zX2p=`3a^{2#jZ0!P+Ljz){cb4I2`!%!)$wCN8A|XobuV8{3~g;K zFOxSsYETK)^ON++>A5~&<1eeQuJ_hg_Qov<515uqMM7KrqD={eNtQ)nA&{wae5|`( zFC~~jFjE5gB7Dz*{n^8zzP%X@HqA}Q2MjV({L)b(t=W|kUd`q=`Lq79o?0YjDb40$ z_6_a*1%61HgsnS{o7#X<)5p)lmDtF#$~5|Sdm}LOzdu#W`XmWDnhTsTmENzGc)j~P zf1mF?cj!&Fv<&uV8QgP2i0C`8r65^(28GQ#mqsi~4S^RrAwf{fzz=19x| zNr5z@JKNB6sfZMEaz_=>>rs)fqw;Z>!Wp7-$APia^9{hS?B*QbLLASGtm@WwaesF% z&-x;?2<6lhl!M8CI`=1${_f%*q^gJ(zWJIHFAzes47l@QBm+H{y^xeF*#C&o?@4Jv z%O*uLK!ys+`5czecxY_K!TZKTP<338XwRIn$E0(E8jvkWNh&$^g@iHuAZy{C7p@XQ ztm1|RJumi)iQf*B7oBnH(e@K!GxF9OGwq zR9nK<#?i#aQD4o?&cs3Yl0`+auuB$o=;y3Cz2pJeJzi%Id$`er8_=ek^)=nl*3mX{ z`jg!IQv7)NtPbR%!zn{Sl@aD=0W)EBHwhA&z{jJ$v6?|Q(&(IIl!z{JQ-lAgu)n_G zUE_J{fh)Q))&cpEdxf9YH#QR(m2DPwLl3d$=dwJmQnP0@VT0rhA&U$12Ly6ElFT*s zUCkX&bnL{_Ldt?^_aA;y`|j==7Zzmokhor^(;~AB9bt~v+e>Z+#dESIS$&U;ke3U? z&hEJ8o5ChN*iOp36ZPVBw$H{OszzpM#2SZivI5g*CDjU15ZWak)uqF~6kxnL$rLdA z3jbIgr2L&O+j$$az2q~^o=ZB@nKro_LB`pZ#~36hwnG^|_=_+^6xAp|Lnp!b*9QqG z*8KSRqCEWXj}@+>ulDeNVgUeOwAR0)|Lo{rggZ$ z{y-`Cr)`g`TCQ#<{-Fi!9CfyW((>2l;wtoN-Tn(Yfa=`;t%_d-UafCe*-GZ BV>bW* literal 10066 zcmeHtbyQVb*Y}~jB%~XOLw89d-Ju-1I}g%zKtK=y=}tkqyF=*?QMwUnX%P7idVTeJ z?>FA@{{1|g!QNx8vwm~$J!8$e=G>|ZaPW8lL;x}X0H6Yx?5A59zyJWz@BjcV02x+S z($T@y+`-jQ)6>Zutk3FUZ%3H}56h4RfQ8Qg@9`fjfs(jE2ukylItw;^HpNV zJbQt?_n6c~I^M?i6q)K~SXWK!UiF%{U zRiZ*0n4y7d+Q<}kkv$~36?0)^39-42k6ekHxT-MxqzKd#Z3}Go(ev?YXNY`pqI%WD zi*IGfqMIQGp$eS%08q7kQA1G5=`ia1Run&1aqzHg5{X$;MpU$!G9k4+ z^KxEqhLnl`A5);;o4)=A{tlHrQ&Jy?{Bfjpc1!jtHn-p0<|?osMn!E-O|v-+z!mfdUyV>JN^gz z;BT*95~HNj&5j;&Ap146?|gb88e2laU0k-AO4BbuZXT=lWiBnrLOVSvwkB~9oQ!{~ z-`9b;1>u*U`>4)Vxl3N$!xN&Zb1w}^ymEeu#Pq-=;gNI6%3B<_>C@@6WEpu+2DjEI zrs9TzY=!2^?X^MV@Ff{ zXA<(aF+`sWDDF-qxOCrMBbV_^>Fa z^{G>H-@D)(mFcC+I`s$Da32k(d%opE=`QX&><@fFQLqS&Q2%rih)7)GF;pT=P$wY< zAj5drvHkE9cSjdn6GuneyXf`PXJDWq49ev{`zTRYQh3Xb-HP@tl-)hm4IkⅅxE$ z)(#eOe+}c@11c`RlO+}?FjwJlPdkAH;rHOZaP1C+5FCe( zhvAS-_DabEWe{-h?S9@V-b2Go0pm7`#Pl&|;PG!>FwhZ&=H^pNoit8_aNCTqKjpLm zkC6>PZDz1$J>cq@#%^g~O?^FO3ZuowZKLGe8DeIjC&l$l%ahx6#k4c;EkN*X03x2SBdKZ#`d`{%!4Nf z7VG`J(@RiI|I=icJNWU!q2}@x0RX^)&VZWC@5b_4Q(t9{9nUwj=GN%)AwR3V6^%MK zago++z74vERM{FYL0I}nO#9%AgJ$IviET7E;L(gypMTVi>0PI6l(>!gIe0r)! z63*#zwep9pLVRGQ6*`ztpwf;A5)jLL&T zie$I(^zg!m5zTo{{(fcA)jRpjLfG6u9rt)S`YQO$PWAG4ogszAgCGK1HpUt445i75?$^>h94B`%iDk@_h+aG6t)YzqUg^O8 zmk77AYCHI`b15C~@Yc00ByI!HiuwZuGGZUgoAp-RVzF@UONw{gB$x>Klb{;etSUQ# zEwP6zx3QjI@_0W(7t5$#KuEvD&RhgKn0VcC%d;WHR-<5CbypjUm0txJ@rPe^zUIrD zT(nNfPu%9>H_b`1)o%7l)Wb?&7BTzMcM@wah=#ESvS8!R=+Z~~dXc|lH@awS*yE7* zWbQuE1>NktT>SRaweh-C8OcM(1;Tx^%`fnoXGr6I2hGvNq#q`5PE#qLjxu#3*-a!2 zl(&9Ul z!XPwtz{){dC+?Z!ZbXTVL*`8!z2{6ROdKC>P1Jt`ToaOg?$wUATgbovf)um;(n17t z`{F4LLX&C!CWwx|;gG|GX$Il(9DIB8aErjB+PtDH31Gr9T=CJ)$uyg@JyzhtE@3q? zrDpnadhK*fwBQnl+zy&P2LdpUHwKiH!cqwAwo+?WQjZ=xx0F&Za|*49yF3q&wOKp> zvdvWGBsexKpN*Lq(;jc6mQI8?z^mE^G7hWZ4klcg)9reGx-iSSBDqLS3(~-2adb+nj8_Fa^*6HZ|oGAs8OJjXMtmUePu{zhwDlPt3F48O;Bl&Gi&clDJ#EdZ+;XK>@`QIa5JK zV8*9Ub$*sD^&#Ye?8Nqz_F@HTya*@DS!plklP8{3Oi0XzIJ)oKK1FCJv1dt}*c>flI zo2Dm*mf3vgkHLv0DlKHl3LpxzimRAd8_?T*(tpOd zm}ZAUt|WL>$|UAJCO4vwibIx($Uf(pIN%V z)dxxy`k0zahg3vtm}kVIFJd&xRtC!8luN-Xf|&-e$J#EFCtWT@u@KQqZRlwRW_er$%=Dg<>0sUC(4OkPd=w+t zp+BONidGKeU}ZpucHXPAYsuPUJ}=S}DzZqHCQ!B)2`)&P%yP9`l6y|GN#32@#nJy< z;);v{XLWp^JTFq;-I}3cT{3uIZK`L=Hb#VuAHuy@RCP=DXzxgVnF zvRz^Gf%j%W&4P?mrc#giK8I-2SqVPuiU$@)706@WOOuS!=beD*wT87cuXEejOVTRUR2beY z(&ouYR^KY%)gba|C?IAYDM8Uk4qQYinGe9xAH=Liv7nuBs|O!Xv80>^H_1NWrw)#) zS$rWT|xMTis4tqs|bmm2B^S${aj_wT6)zwt75M3&k1JIP9Dxw%=t zn^z+gQIMpiH!N$T;$DCiWao(ohB(4`|7JU7=vDi$fI|0c9z|c9%pO@){wPgq z3|?z9UFGFwZ9lwLRN(<4l?!|r^_-dwugyBLxfznN z$4nHFy@eb{g-3s+pg)1kZe6SD;(UE?GzWxN^X=y)m%T5mOO7s=l2W);}w=yYN0GD82!hsvjolc!05`_9mxfn> zefE25Rmcuq3{s{FHRz$(wow$Jw1_U)FGf6bWr}0%u$Q$@VSOl+vza%y&AbcETG#xc z>b<@Fe)36Kalq%pduE8RfzUO(c$$a$%1eO)p6c~C8(`q4opZwMg=B%P5m8qQq_!Q8 zc0zhKM}Oa&tDDKKgG@uiwwvRnW=D$5x;O5Qw|?KQS-zdjeLK1OVvgZ&IGOqJJW9op zLB z*x83cz{Ja0ul!ntW8nuVXzjz|%)N*sY3kcq-}n(tH3rQDM$w+*XD_p>N#um`KH$M{qndw`MV!Yc1}eus3xR>5+>l+gh`Z; zG*aCpP3um`<{J}=tVf^BIn+ALT+HVv`6^h0h%D!=n&QQ`%6Ps$Z4-Crjo;raQ7{G( z?JQutLt|$c%iq+`>n*PXJ8~}!Wck7%vdn6>ryjEfTbq|aNQf^2b=GW03m}Y2GW;d^ zTxmQ`l5lY7HZ_Hi$J;B3VO{X181rKtfFNS~Z97}6`ZHJCpR~=gmRAesNXIxgNkcXS z29alWs-yUXu!o301rreio&D2?GDL@w4!rz_KUwVk55yIy0R@>h6DLia*fa+@a+4|A zFNYBC*%-;=MsPX(AR~W}La;KE9VfFLwm$peWr+Ej#tc&!q}f4*81v(m`WTAVfzFo8 zR)R^h9b`PS(;aaS2!D~2WI?TF74_)vJ6d@Ui4DCUL8n9IwG5Tlcevzt_j=HGaV;Y;WUca@rocccW|V?bNgS(*UbjrA8j4IOCd7$%RFtE z2{uN`8{`>XHITnXjHWu$nC@h=kXXojnB|Sg!~OgiPJ`rb8`;1exJE09x>SpTt3_R_5f^RY%8xF2$)I{WC82w~u~mh0C)$?SN55-A z$XfHepnp8wZ$A{dp(}mbeWhgUi^Pjv6#tRYip)Lm%ROb|;h-$9J&j*Dh z-^q>pQ1iP|KWBuEK__JVeg6NIOof^|{7;qTm^yxc*5=^n-*u*zTYrLP-O4JI1@lF2 zS~qWFt=GK%dSCLihQ=KN&2fiF3Er!;_TW@xmW=MBwAKyTtwDF5|J;`<+C&f~Kx^3Q zr~m-oj|2d0W$a>Zrs3*h?O^%6807Q5)nSeuAidHmwj{o6pzzflPL2pZ*OT^?x~ZNRM4ZoC`HY4VNpQ%^dvlP6x8VNa?y|H$*@(qcj@j&Evj8_Uh;5#iQn2jSj2R$txrGRF;L zgG9-aeM#c?>@9;)#zZEU?EZ$C8FiLz%{9-eHg|aPcQ%%`z51P#Oqkk<8&g>~yNfDp zN)U_ReXj5-@h&ke96$tuLIXPED1S`S?IiyfpBI z_T7L$jf>UB*g$r?BrsGPvy}A5!1zFJybLf(o4iyQ!(t7;ce- z>d_dQ=HWp57G{p7sxFRBU^Y`n7xTY5?*DC9Kn*-1M&CAw9h%7^U1P;~zXZLsbcv}Bi+_6;wyA?#TNBxR zR(`d+*uH}5B$hkim^)s`a$qYZmfj+OJ|LEQUtROi;O zE>6sxCWmwl%az!=w9YxAeyYcW#1VF{s?1gjv3bB;u`!A-ZL{P+*ENL_dT?)e%ghRx zY!8K#u6eu?LOuOxox49NFk_{pXA_cIvb4|Cpy#jUrf$^9b%4@&UsfaXFQ^;p;KBum zR|DDKgR=yiKM=agN18J^vyRQn4C1stZk0=lbRpOtmsFkK3&CL-(mfGb|r*m%Cr zS;6M2TG9vzrEW&(!YsjNkwVMBHKHd;X>-GHo*Ip6_Y=LjI6fBduc5H;6-bD*z1C6j z7H2vOPVek^MR5I0VGwg$mnXFE7A7MS;vE9x-k~$~TsTc&bH_1F*=y})`bwzDAz_7- z&@g9?qDUG8XE5%Jb#_5aKYuD`M{&gF5XfxEeaqTJ-%j+cHiKO0i;AaL_qbyJ3`NPW z4KWNX3pDoq``1f9wfFP-FW)k$D*T>IsX55 zqW|RcXG8KArvqrO=a2T}pWr{+LchTIXy3uVnnZs>|EzKUg5p8@qR{*PS@Hgp!JpNL zUkr-z|L4K~QJeUa%bzLDFD@lyKi=Ta1m{l ...[/cyan]") - headers = {'Authorization': f'Bearer {load_key("whisper.whisperX_302_api_key")}'} - response = requests.request("POST", url, headers=headers, data=payload, files=files) - - response_json = response.json() + # 创建临时音频文件 + audio_file = "output/audio/raw.mp3" + try: + + # 构建curl命令 - 完全模拟你成功的命令 + api_key = load_key("whisper.whisperX_302_api_key") + + curl_command = [ + 'curl', + '--proxy', 'http://127.0.0.1:7897', + '-X', 'POST', + '-H', f'Authorization: Bearer {api_key}', + '-F', f'audio_input=@{audio_file}', + '-F', f'processing_type=align', + '-F', f'output=raw', + '-F', f'language={WHISPER_LANGUAGE}', + 'https://api.302.ai/302/whisperx' + ] + + start_time = time.time() + rprint(f"[cyan]🎤 使用curl转录音频,语言: <{WHISPER_LANGUAGE}> ...[/cyan]") + + # 打印实际执行的命令(正确格式化) + cmd_parts = [] + for arg in curl_command: + if ' ' in arg or arg.startswith('Authorization:') or arg.startswith('Content-Type:'): + cmd_parts.append(f'"{arg}"') + else: + cmd_parts.append(arg) + cmd_str = ' '.join(cmd_parts) + rprint(f"[yellow]执行命令: {cmd_str}[/yellow]") + + # 执行curl命令 + result = subprocess.run( + curl_command, + capture_output=True, + text=True, + timeout=180 + ) + print(result) + if result.returncode != 0: + rprint(f"[red]❌ curl命令失败 (返回码: {result.returncode})[/red]") + rprint(f"[red]错误信息: {result.stderr}[/red]") + if result.stdout: + rprint(f"[yellow]输出信息: {result.stdout}[/yellow]") + return None + + # 解析JSON响应 + try: + response_json = json.loads(result.stdout) + rprint(f"[green]✓ 成功获取响应[/green]") + + # 检查响应格式并转换为标准格式 + if 'segments' not in response_json and 'text' in response_json: + # 如果是简单的whisper格式,转换为segments格式 + response_json = { + 'segments': [{ + 'start': 0, + 'end': audio_duration, + 'text': response_json['text'] + }], + 'language': WHISPER_LANGUAGE + } + + rprint(f"[green]✓ 成功获取 {len(response_json.get('segments', []))} 个片段[/green]") + + except json.JSONDecodeError as e: + rprint(f"[red]❌ JSON解析失败: {e}[/red]") + rprint(f"[yellow]原始响应: {result.stdout[:500]}...[/yellow]") + return None + + except subprocess.TimeoutExpired: + rprint(f"[red]❌ 请求超时[/red]") + return None + except Exception as e: + rprint(f"[red]❌ 执行失败: {e}[/red]") + return None # 调整时间戳 - if start is not None: - for segment in response_json['segments']: + if start is not None and start > 0: + for segment in response_json.get('segments', []): segment['start'] += start segment['end'] += start for word in segment.get('words', []): @@ -76,10 +128,16 @@ def transcribe_audio_302(raw_audio_path: str, vocal_audio_path: str, start: floa json.dump(response_json, f, indent=4, ensure_ascii=False) elapsed_time = time.time() - start_time - rprint(f"[green]✓ Transcription completed in {elapsed_time:.2f} seconds[/green]") + rprint(f"[green]✓ 转录完成,耗时 {elapsed_time:.2f} 秒[/green]") return response_json if __name__ == "__main__": # 使用示例: result = transcribe_audio_302("output/audio/raw.mp3", "output/audio/raw.mp3") - rprint(result) + if result: + rprint(f"[green]成功!获得 {len(result.get('segments', []))} 个片段[/green]") + # 打印第一个片段的内容 + if result.get('segments'): + rprint(f"[cyan]第一个片段: {result['segments'][0].get('text', 'N/A')}[/cyan]") + else: + rprint("[red]失败![/red]") \ No newline at end of file diff --git a/core/step7_merge_sub_to_vid.py b/core/step7_merge_sub_to_vid.py index 085d6105..bb42d14f 100644 --- a/core/step7_merge_sub_to_vid.py +++ b/core/step7_merge_sub_to_vid.py @@ -43,9 +43,25 @@ def check_gpu_available(): # except: return False -def merge_subtitles_to_video(): +def merge_subtitles_to_video(test_mode=False, test_duration=30): + """ + 合并字幕到视频 + + Args: + test_mode (bool): 是否为测试模式,默认False + test_duration (int): 测试模式下的时长(秒),默认30秒 + """ video_file = find_video_files() - os.makedirs(os.path.dirname(OUTPUT_VIDEO), exist_ok=True) + + # 🔥 根据模式决定输出文件 + if test_mode: + output_video = f"{OUTPUT_DIR}/output_sub_test_{test_duration}s.mp4" + rprint(f"[bold yellow]🧪 测试模式:只处理前{test_duration}秒[/bold yellow]") + else: + output_video = OUTPUT_VIDEO + rprint("[bold blue]📹 正式模式:处理完整视频[/bold blue]") + + os.makedirs(os.path.dirname(output_video), exist_ok=True) # Check resolution if not load_key("burn_subtitles"): @@ -54,7 +70,7 @@ def merge_subtitles_to_video(): # Create a black frame frame = np.zeros((1080, 1920, 3), dtype=np.uint8) fourcc = cv2.VideoWriter_fourcc(*'mp4v') - out = cv2.VideoWriter(OUTPUT_VIDEO, fourcc, 1, (1920, 1080)) + out = cv2.VideoWriter(output_video, fourcc, 1, (1920, 1080)) out.write(frame) out.release() @@ -70,8 +86,23 @@ def merge_subtitles_to_video(): TARGET_HEIGHT = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) video.release() rprint(f"[bold green]Video resolution: {TARGET_WIDTH}x{TARGET_HEIGHT}[/bold green]") + + # 🔥 修复AV1问题和文件兼容性的FFmpeg命令 ffmpeg_cmd = [ - 'ffmpeg', '-i', video_file, + 'ffmpeg', + '-y', # 🔥 强制覆盖输出文件 + '-hwaccel', 'none', # 禁用硬件加速,解决AV1问题 + '-fflags', '+genpts', # 生成时间戳 + '-avoid_negative_ts', 'make_zero', # 避免时间戳问题 + '-i', video_file, + ] + + # 🔥 如果是测试模式,添加时长限制 + if test_mode: + ffmpeg_cmd.extend(['-t', str(test_duration)]) + + # 添加视频滤镜 + ffmpeg_cmd.extend([ '-vf', ( f"scale={TARGET_WIDTH}:{TARGET_HEIGHT}:force_original_aspect_ratio=decrease," f"pad={TARGET_WIDTH}:{TARGET_HEIGHT}:(ow-iw)/2:(oh-ih)/2," @@ -81,32 +112,93 @@ def merge_subtitles_to_video(): f"subtitles={TRANS_SRT}:force_style='FontSize={TRANS_FONT_SIZE},FontName={TRANS_FONT_NAME}," f"PrimaryColour={TRANS_FONT_COLOR},OutlineColour={TRANS_OUTLINE_COLOR},OutlineWidth={TRANS_OUTLINE_WIDTH}," f"BackColour={TRANS_BACK_COLOR},Alignment=2,MarginV=27,BorderStyle=4'" - ).encode('utf-8'), - ] + ), + ]) + # GPU检测和编码设置 gpu_available = check_gpu_available() - if gpu_available: + if gpu_available and not test_mode: # 测试模式使用CPU更稳定 rprint("[bold green]NVIDIA GPU encoder detected, will use GPU acceleration.[/bold green]") ffmpeg_cmd.extend(['-c:v', 'h264_nvenc']) else: rprint("[bold yellow]No NVIDIA GPU encoder detected, will use CPU instead.[/bold yellow]") + ffmpeg_cmd.extend(['-c:v', 'libx264']) + if test_mode: + ffmpeg_cmd.extend(['-preset', 'fast']) # 测试模式使用快速编码 + else: + ffmpeg_cmd.extend(['-preset', 'medium']) # 正式模式使用平衡编码 - ffmpeg_cmd.extend(['-y', OUTPUT_VIDEO]) + # 🔥 修复文件兼容性问题 + ffmpeg_cmd.extend([ + '-pix_fmt', 'yuv420p', # 🔥 确保像素格式兼容性 + '-c:a', 'aac', # 🔥 重新编码音频为AAC确保兼容性 + '-b:a', '128k', # 音频比特率 + '-movflags', '+faststart', # 🔥 优化MP4文件结构,便于播放 + output_video + ]) - print("🎬 Start merging subtitles to video...") + mode_text = f"前{test_duration}秒测试" if test_mode else "完整视频" + print(f"🎬 开始处理{mode_text}...") start_time = time.time() - process = subprocess.Popen(ffmpeg_cmd) + + # 🔥 改进错误处理,过滤AV1警告 + process = subprocess.Popen(ffmpeg_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) try: - process.wait() + stdout, stderr = process.communicate() if process.returncode == 0: - print(f"\n✅ Done! Time taken: {time.time() - start_time:.2f} seconds") + print(f"\n✅ 完成!处理时间: {time.time() - start_time:.2f} 秒") + print(f"📁 输出文件: {output_video}") + + # 🔥 验证输出文件 + if os.path.exists(output_video): + file_size = os.path.getsize(output_video) / (1024 * 1024) # MB + print(f"📊 文件大小: {file_size:.2f} MB") + + # 简单验证文件是否可读 + try: + test_video = cv2.VideoCapture(output_video) + frame_count = int(test_video.get(cv2.CAP_PROP_FRAME_COUNT)) + test_video.release() + print(f"✅ 文件验证通过,总帧数: {frame_count}") + except: + print("⚠️ 文件可能有问题,请检查") + else: - print("\n❌ FFmpeg execution error") + print(f"\n❌ FFmpeg执行错误:") + # 🔥 过滤掉AV1相关的重复警告 + filtered_errors = [] + for line in stderr.split('\n'): + if not any(keyword in line for keyword in [ + 'Missing Sequence Header', + 'hardware accelerated AV1', + 'Failed to get pixel format', + 'Your platform doesn\'t suppport' + ]): + if line.strip(): # 只保留非空行 + filtered_errors.append(line) + + # 只显示最后几行有用的错误信息 + if filtered_errors: + print('\n'.join(filtered_errors[-5:])) + else: + print("处理完成,但有一些AV1兼容性警告(已过滤)") + except Exception as e: - print(f"\n❌ Error occurred: {e}") + print(f"\n❌ 发生错误: {e}") if process.poll() is None: process.kill() +# 🔥 使用示例 if __name__ == "__main__": - merge_subtitles_to_video() \ No newline at end of file + # 测试模式:只处理前30秒 + # merge_subtitles_to_video(test_mode=True, test_duration=30) + + # 正式模式:处理完整视频 + # merge_subtitles_to_video(test_mode=False) + + # 或者简写 + merge_subtitles_to_video() # 默认正式模式 \ No newline at end of file From d35430fdb1487c84d388c54eac2ac510731580dc Mon Sep 17 00:00:00 2001 From: darkchunk Date: Mon, 29 Dec 2025 10:04:15 +0800 Subject: [PATCH 10/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/all_whisper_methods/whisperX_302.py | 56 +++++++++++++++++++----- core/step7_merge_sub_to_vid.py | 5 +-- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/core/all_whisper_methods/whisperX_302.py b/core/all_whisper_methods/whisperX_302.py index 546882f1..e3451e9c 100644 --- a/core/all_whisper_methods/whisperX_302.py +++ b/core/all_whisper_methods/whisperX_302.py @@ -30,12 +30,38 @@ def transcribe_audio_302(raw_audio_path: str, vocal_audio_path: str, start: floa start = 0 end = audio_duration - start_sample = int(start * sr) - end_sample = int(end * sr) - y_slice = y[start_sample:end_sample] - # 创建临时音频文件 - audio_file = "output/audio/raw.mp3" + # ✅ 新代码 - 使用FFmpeg切分: + if start is not None and end is not None and (start > 0 or end < audio_duration): + # 使用FFmpeg直接切分,保持原始格式 + with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file: + ffmpeg_command = [ + 'ffmpeg', + '-i', vocal_audio_path, + '-ss', str(start), + '-t', str(end - start), + '-c', 'copy', # 复制编码,不重新编码 + '-y', # 覆盖输出文件 + temp_file.name + ] + + rprint(f"[cyan]🔪 使用FFmpeg切分音频: {start}s - {end}s[/cyan]") + ffmpeg_result = subprocess.run(ffmpeg_command, capture_output=True, text=True) + if ffmpeg_result.returncode != 0: + rprint(f"[red]❌ FFmpeg切分失败: {ffmpeg_result.stderr}[/red]") + return None + + audio_file = temp_file.name + + # 检查切分后的文件 + file_size = os.path.getsize(audio_file) + rprint(f"[green]✓ 切分完成,文件大小: {file_size / 1024 / 1024:.1f}MB[/green]") + else: + # 直接使用原始文件 + audio_file = vocal_audio_path + rprint(f"[cyan]📁 使用完整音频文件[/cyan]") + # # 创建临时音频文件 + # audio_file = "output/audio/raw.mp3" try: # 构建curl命令 - 完全模拟你成功的命令 @@ -111,6 +137,12 @@ def transcribe_audio_302(raw_audio_path: str, vocal_audio_path: str, start: floa except Exception as e: rprint(f"[red]❌ 执行失败: {e}[/red]") return None + finally: + # ✅ 添加清理临时文件 + try: + os.remove(audio_file) + except: + pass # 调整时间戳 if start is not None and start > 0: @@ -134,10 +166,10 @@ def transcribe_audio_302(raw_audio_path: str, vocal_audio_path: str, start: floa if __name__ == "__main__": # 使用示例: result = transcribe_audio_302("output/audio/raw.mp3", "output/audio/raw.mp3") - if result: - rprint(f"[green]成功!获得 {len(result.get('segments', []))} 个片段[/green]") - # 打印第一个片段的内容 - if result.get('segments'): - rprint(f"[cyan]第一个片段: {result['segments'][0].get('text', 'N/A')}[/cyan]") - else: - rprint("[red]失败![/red]") \ No newline at end of file + # if result: + # rprint(f"[green]成功!获得 {len(result.get('segments', []))} 个片段[/green]") + # # 打印第一个片段的内容 + # if result.get('segments'): + # rprint(f"[cyan]第一个片段: {result['segments'][0].get('text', 'N/A')}[/cyan]") + # else: + # rprint("[red]失败![/red]") \ No newline at end of file diff --git a/core/step7_merge_sub_to_vid.py b/core/step7_merge_sub_to_vid.py index bb42d14f..3c04d63b 100644 --- a/core/step7_merge_sub_to_vid.py +++ b/core/step7_merge_sub_to_vid.py @@ -142,10 +142,7 @@ def merge_subtitles_to_video(test_mode=False, test_duration=30): start_time = time.time() # 🔥 改进错误处理,过滤AV1警告 - process = subprocess.Popen(ffmpeg_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) + process = subprocess.Popen(ffmpeg_cmd) try: stdout, stderr = process.communicate() From 1a33c653c90988fdf0d3c90cc11343e463d839f5 Mon Sep 17 00:00:00 2001 From: darkchunk Date: Sat, 3 Jan 2026 14:33:53 +0800 Subject: [PATCH 11/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.yaml | 18 ++++++------- core/all_whisper_methods/audio_preprocess.py | 1 + core/all_whisper_methods/demucs_vl.py | 4 +++ core/all_whisper_methods/whisperX_302.py | 27 ++++++++++---------- core/step2_whisperX.py | 22 ++++++++++++++-- 5 files changed, 48 insertions(+), 24 deletions(-) diff --git a/config.yaml b/config.yaml index da677a84..c710d17c 100644 --- a/config.yaml +++ b/config.yaml @@ -6,12 +6,12 @@ display_language: "zh-CN" # API settings api: - key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' + key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' base_url: 'https://api.302.ai' model: 'gemini-2.0-flash' # Language settings, written into the prompt, can be described in natural language -target_language: 'Chinese' +target_language: 'zh' # Whether to use Demucs for vocal separation before transcription demucs: true @@ -23,9 +23,9 @@ whisper: language: 'en' detected_language: 'en' # Whisper running mode ["local", "cloud", "elevenlabs"]. Specifies where to run, cloud uses 302.ai API - runtime: 'local' + runtime: 'cloud' # 302.ai API key - whisperX_302_api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' + whisperX_302_api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' # ElevenLabs API key elevenlabs_api_key: 'your_elevenlabs_api_key' @@ -67,7 +67,7 @@ tts_method: 'f5tts' # SiliconFlow FishTTS sf_fish_tts: # SiliconFlow API key - api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' + api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' # only for mode "preset" voice: 'anna' # *only for mode "custom", dont set manually @@ -78,17 +78,17 @@ sf_fish_tts: # OpenAI TTS-1 API configuration, 302.ai API only openai_tts: - api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' + api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' voice: 'alloy' # Azure configuration, 302.ai API only azure_tts: - api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' + api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' voice: 'zh-CN-YunfengNeural' # FishTTS configuration, 302.ai API only fish_tts: - api_key: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' + api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' character: 'AD学姐' character_id_dict: 'AD学姐': '7f92f8afb8ec43bf81429cc1c9199cb1' @@ -108,7 +108,7 @@ gpt_sovits: refer_mode: 3 f5tts: - 302_api: 'sk-s7pvNTkGHVFkpGRW7SMWsYFCXO01CpnJr7C8jZD8kIpAck2k' + 302_api: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' # *Audio speed range speed_factor: diff --git a/core/all_whisper_methods/audio_preprocess.py b/core/all_whisper_methods/audio_preprocess.py index b722df1f..58cd903a 100644 --- a/core/all_whisper_methods/audio_preprocess.py +++ b/core/all_whisper_methods/audio_preprocess.py @@ -147,6 +147,7 @@ def save_results(df: pd.DataFrame): # Remove rows where 'text' is empty initial_rows = len(df) + df = df[df['text'].str.len() > 0] removed_rows = initial_rows - len(df) if removed_rows > 0: diff --git a/core/all_whisper_methods/demucs_vl.py b/core/all_whisper_methods/demucs_vl.py index de975fa9..58d9dbe5 100644 --- a/core/all_whisper_methods/demucs_vl.py +++ b/core/all_whisper_methods/demucs_vl.py @@ -10,6 +10,9 @@ from demucs.api import Separator from demucs.apply import BagOfModels import gc +import logging + +log = logging.getLogger(__name__) AUDIO_DIR = "output/audio" RAW_AUDIO_FILE = os.path.join(AUDIO_DIR, "raw.mp3") @@ -43,6 +46,7 @@ def demucs_main(): "clip": "rescale", "as_float": False, "bits_per_sample": 16} console.print("🎤 Saving vocals track...") + log.info(f"vocals shape: {outputs['vocals'].shape}") save_audio(outputs['vocals'].cpu(), VOCAL_AUDIO_FILE, **kwargs) console.print("🎹 Saving background music...") diff --git a/core/all_whisper_methods/whisperX_302.py b/core/all_whisper_methods/whisperX_302.py index e3451e9c..6793645e 100644 --- a/core/all_whisper_methods/whisperX_302.py +++ b/core/all_whisper_methods/whisperX_302.py @@ -15,24 +15,25 @@ def transcribe_audio_302(raw_audio_path: str, vocal_audio_path: str, start: float = None, end: float = None): os.makedirs(OUTPUT_LOG_DIR, exist_ok=True) LOG_FILE = f"{OUTPUT_LOG_DIR}/whisperx302.json" - if os.path.exists(LOG_FILE): - with open(LOG_FILE, "r", encoding="utf-8") as f: - return json.load(f) + WHISPER_LANGUAGE = load_key("whisper.language") save_language(WHISPER_LANGUAGE) # 加载音频并处理start和end参数 y, sr = librosa.load(vocal_audio_path, sr=16000) - audio_duration = len(y) / sr - if not start or not end: + if start is None or end is None : start = 0 end = audio_duration + # 如果文件是属于 只传一次的话 + if os.path.exists(LOG_FILE): + with open(LOG_FILE, "r", encoding="utf-8") as f: + return json.load(f) # ✅ 新代码 - 使用FFmpeg切分: - if start is not None and end is not None and (start > 0 or end < audio_duration): + if start is not None and end is not None and (start >= 0 or end <= audio_duration): # 使用FFmpeg直接切分,保持原始格式 with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as temp_file: ffmpeg_command = [ @@ -138,12 +139,12 @@ def transcribe_audio_302(raw_audio_path: str, vocal_audio_path: str, start: floa rprint(f"[red]❌ 执行失败: {e}[/red]") return None finally: - # ✅ 添加清理临时文件 - try: - os.remove(audio_file) - except: - pass - + # ✅ 修复:只删除临时文件 + if audio_file != vocal_audio_path: + try: + os.remove(audio_file) + except: + pass # 调整时间戳 if start is not None and start > 0: for segment in response_json.get('segments', []): @@ -165,7 +166,7 @@ def transcribe_audio_302(raw_audio_path: str, vocal_audio_path: str, start: floa if __name__ == "__main__": # 使用示例: - result = transcribe_audio_302("output/audio/raw.mp3", "output/audio/raw.mp3") + result = transcribe_audio_302("output/audio/vocal.mp3", "output/audio/vocal.mp3") # if result: # rprint(f"[green]成功!获得 {len(result.get('segments', []))} 个片段[/green]") # # 打印第一个片段的内容 diff --git a/core/step2_whisperX.py b/core/step2_whisperX.py index df439d80..df420599 100644 --- a/core/step2_whisperX.py +++ b/core/step2_whisperX.py @@ -8,6 +8,8 @@ from core.all_whisper_methods.audio_preprocess import process_transcription, convert_video_to_audio, split_audio, save_results, CLEANED_CHUNKS_EXCEL_PATH, normalize_audio_volume from core.step1_ytdlp import find_video_files +import json + def transcribe(): if os.path.exists(CLEANED_CHUNKS_EXCEL_PATH): rprint("[yellow]⚠️ Transcription results already exist, skipping transcription step.[/yellow]") @@ -24,9 +26,13 @@ def transcribe(): else: vocal_audio = RAW_AUDIO_FILE - # step2 Extract audio + # # step2 Extract audio segments = split_audio(RAW_AUDIO_FILE) + # 输出数组到JSON文件 + # with open('log/segments.json', 'w', encoding='utf-8') as f: + # json.dump(segments, f, indent=4, ensure_ascii=False, default=str) + # step3 Transcribe audio all_results = [] runtime = load_key("whisper.runtime") @@ -41,16 +47,28 @@ def transcribe(): rprint("[cyan]🎤 Transcribing audio with ElevenLabs API...[/cyan]") for start, end in segments: - result = ts(RAW_AUDIO_FILE, vocal_audio, start, end) + result = ts(RAW_AUDIO_FILE, vocal_audio,start, end) all_results.append(result) + + # # 输出数组到JSON文件 + # with open('log/all_results.json', 'w', encoding='utf-8') as f: + # json.dump(all_results, f, indent=4, ensure_ascii=False, default=str) # step4 Combine results combined_result = {'segments': []} for result in all_results: combined_result['segments'].extend(result['segments']) + # with open('log/combined_result.json', 'w', encoding='utf-8') as f: + # json.dump(combined_result, f, indent=4, ensure_ascii=False, default=str) + # step5 Process df df = process_transcription(combined_result) + + # print(len(df['start'].unique())) + # print(len(df['start'])) + # print(len(df['start'].unique()) == len(df['start']) ) + # df.to_excel(CLEANED_CHUNKS_EXCEL_PATH, index=False) save_results(df) if __name__ == "__main__": From 1e52a75276fb9a88d4729e4aba627a1755f682a6 Mon Sep 17 00:00:00 2001 From: luogaiyu <1449528975@qq.com> Date: Thu, 8 Jan 2026 18:12:41 +0800 Subject: [PATCH 12/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=89=B9=E9=87=8F?= =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E8=A7=86=E9=A2=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../batch_processor_get_title_introduction.py | 5 +- .../upload_video_2_bilibili.py | 259 ++++++++++++++++++ core/prompts_storage.py | 6 +- 3 files changed, 265 insertions(+), 5 deletions(-) create mode 100644 batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py diff --git a/batch/utils/batch_processor_get_title_introduction.py b/batch/utils/batch_processor_get_title_introduction.py index 28ca5d72..e554b8a8 100644 --- a/batch/utils/batch_processor_get_title_introduction.py +++ b/batch/utils/batch_processor_get_title_introduction.py @@ -90,7 +90,7 @@ def get_tasks_setting_info(): df = pd.read_excel(base_path) return df -def json_valid(response_data): +def f(response_data): try: json.loads(response_data) return response_data @@ -99,8 +99,7 @@ def json_valid(response_data): def get_title_introduction_batch(): responses = [] - all_trans_srt =read_all_trans_srt() - + trans_srtall_trans_srt =read_all_trans_srt() trans_srt_len = len(all_trans_srt) for i in range(trans_srt_len): trans_srt = all_trans_srt[i] diff --git a/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py b/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py new file mode 100644 index 00000000..6701e8d9 --- /dev/null +++ b/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py @@ -0,0 +1,259 @@ +import os +import time +import subprocess +from pathlib import Path +import pandas as pd +from rich.console import Console +from rich.panel import Panel +import json +import re +from core.prompts_storage import get_title_introduction_prompt +from core.ask_gpt import ask_gpt +from core.config_utils import load_key +import pexpect +import sys + +console = Console() + +##############参数控制################## + +TID=36 # 野生技术协会 +################################ + +EXCEL_DEFAULT_PATH = os.path.join("batch", "output", "bilibili_upload_tasks.xlsx") + +def method1_upload(video_path, title, tags, introduction, schedule_time, partition, collection=None, cookies_path="cookies.json"): + # 如果当前的 biliup 不存在 就进行安装 + from shutil import which + if which("biliup") is None: + os.system('pip install biliup') + # biliup login 首先进行bilibili登陆操作 + os.system('biliup login') + # biliup 进行视频上传操作 + if not video_path or not os.path.exists(video_path): + raise ValueError(f"视频路径不存在: {video_path}") + args = [video_path, "--title", "\"" + (title or Path(video_path).parent.name) + "\""] + + + if introduction: + args += ["--desc", "\""+ introduction + "\""] + if tags: + args += ["--tag", "\"" + tags + "\""] + if partition and str(partition).strip().isdigit(): + args += ["--tid", "\"" + str(int(partition)) + "\""] + if schedule_time and str(schedule_time).strip().isdigit(): + args += ["--dtime", "\"" + str(int(schedule_time)) + "\""] + # 合集 + if collection: + args += ["--collection", "\"" + str(int(collection)) + "\"" ] + + # 需要先运行这个命令,阻塞当前的进程 + cmd = ["biliup"] + if cookies_path and os.path.exists(cookies_path): + cmd += ["-u", cookies_path] + cmd += ["upload"] + args + print("cmd: " + ' '.join(cmd)) + exit_code = os.system(' '.join(cmd)) + + # 在 Unix 系统中,0 表示成功 + if exit_code == 0: + print("✅ biliup login 执行成功") + return True + else: + print(f"❌ biliup login 执行失败,退出码: {exit_code}") + return False + + +def method2_generate_excel(output_root="batch/output", excel_path=EXCEL_DEFAULT_PATH): + base = Path(output_root) + rows = [] + if base.exists(): + for child in base.iterdir(): + if child.is_dir(): + preferred = child / "output_sub.mp4" + if preferred.exists(): + video_path = str(preferred) + else: + mp4s = list(child.glob("*.mp4")) + video_path = str(mp4s[0]) if mp4s else "" + desc_path = child / "log" / "sentence_splitbynlp.txt" + desc = "" + + try: + if desc_path.exists(): + desc = desc_path.read_text(encoding="utf-8").strip() + except Exception: + desc = "" + + prompt = get_title_introduction_prompt(desc); + # 通过调用当前的 gpt的方法来进行 标题和简介的生成 + try: + desc = ask_gpt(prompt, response_json=True, log_title='subtitle_trim') + except Exception as e: + print(f"Error: {e}") + # DEBUG + # print("测试 : ") + # print(desc) + # DEBUG + title = desc['title'] + introduction = desc['introduction'] + tags = desc['tags'] + rows.append({ + "视频路径": video_path, + "标题": title, + "标签": tags, + "描述简介": introduction, + "版权声明": 1, + "定时发布时间戳": "", + "分区": TID, + "加入合集": "" + }) + df = pd.DataFrame(rows) + os.makedirs(os.path.dirname(excel_path), exist_ok=True) + df.to_excel(excel_path, index=False, engine="openpyxl") + console.print(Panel(f"Excel 生成完成: {excel_path}", title="[bold green]方法2[/bold green]")) + + return excel_path + +def method3_upload_from_excel(excel_path=EXCEL_DEFAULT_PATH, cookies=None): + df = pd.read_excel(excel_path) + status_col = "Status" + if status_col not in df.columns: + df[status_col] = "" + try: + df[status_col] = df[status_col].astype(str) + except Exception: + pass + for idx, row in df.iterrows(): + if str(df.at[idx, status_col]).strip().lower() == "done": + continue + try: + video_path = str(row.get("视频路径", "")).strip() + title = str(row.get("标题", "")) + tags = str(row.get("标签", "")) + introduction = str(row.get("描述简介", "")) + description = str(row.get("版权声明", "")) + schedule_time = str(row.get("定时发布时间戳", "")) + partition = str(row.get("分区", "")) + collection = str(row.get("加入合集", "")) + # + cookies_use = cookies if (cookies and os.path.exists(str(cookies))) else None + console.print(Panel( + f"视频路径: {video_path}\n" + f"标题: {title}\n" + f"标签: {tags}\n" + f"描述简介: {introduction}\n" + f"版权声明/描述: {description}\n" + f"定时发布时间戳: {schedule_time}\n" + f"分区:{partition}\n" + f"加入合集: {collection}", + title="[bold blue]上传参数[/bold blue]" + )) + # + method1_upload( + video_path=video_path, title=title, tags=tags, introduction=introduction, schedule_time=schedule_time, partition=partition, collection=None, cookies_path="cookies.json" + ) + + df.at[idx, status_col] = "Done" + console.print(Panel(f"上传完成: {row.get('视频路径', '')}", title="[bold green]方法3[/bold green]")) + except Exception as e: + msg = re.sub(r"[\x00-\x08\x0b-\x0c\x0e-\x1f]", "", str(e)).replace("\n", " ").strip() + df.at[idx, status_col] = f"Error: {msg}" + console.print(Panel(str(e), title="[bold red]上传失败[/bold red]")) + finally: + df.to_excel(excel_path, index=False, engine="openpyxl") + return True + +# 生产环境 +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + sub = parser.add_subparsers(dest="cmd") + p1 = sub.add_parser("upload-video") + p1.add_argument("--video", required=True) + p1.add_argument("--cover", default="") + p1.add_argument("--partition_tid", default="") + p1.add_argument("--tags", default="") + p1.add_argument("--description", default="") + p1.add_argument("--schedule_time", default="") + p1.add_argument("--collection", default="") + p1.add_argument("--cookies", default="cookies.json") + p1.add_argument("--proxy", default=None) + p1.add_argument("--title", default=None) + p2 = sub.add_parser("generate-excel") + p2.add_argument("--output-root", default="batch/output") + p2.add_argument("--excel", default=EXCEL_DEFAULT_PATH) + p3 = sub.add_parser("upload-excel") + p3.add_argument("--excel", default=EXCEL_DEFAULT_PATH) + p3.add_argument("--cookies", default="cookies.json") + p3.add_argument("--proxy", default=None) + args = parser.parse_args() + if args.cmd == "upload-video": + method1_upload( + video_path=args.video, + cover=args.cover, + partition_tid=args.partition_tid, + tags=args.tags, + description=args.description, + schedule_time=args.schedule_time, + collection=args.collection, + cookies_path=args.cookies, + proxy=args.proxy, + title=args.title + ) + elif args.cmd == "generate-excel": + method2_generate_excel(output_root=args.output_root, excel_path=args.excel) + elif args.cmd == "upload-excel": + method3_upload_from_excel(excel_path=args.excel, cookies=args.cookies, proxy=args.proxy) + else: + parser.print_help() +## 测试环境 +# if __name__ == '__main__': + # method3_upload_from_excel() + # method2_generate_excel() +# method1_upload( +# video_path="batch/output/segment_02/output_sub.mp4", +# cover="", +# partition_tid="", +# tags="第1章:[智能合约] 无需信任-透明协议-价值互联", +# description="""🌐 区块链的信任危机与解决方案: + +# 你是否曾因不信任中介机构而感到焦虑?麦当劳彩票舞弊、银行倒闭事件、Robinhood限制交易……历史一次次证明,承诺往往不堪一击。区块链智能合约应运而生,它能否终结“不信任”的怪圈? + +# 🔑 智能合约:信任的基石 + +# 智能合约是一种部署在去中心化区块链上的协议,一旦部署,便不可篡改。它像一个自动执行的数字协议,公开透明,无需人为干预。通过密码学和代码,智能合约确保了协议的公平执行,让信任不再依赖于人品,而是依赖于数学。 + +# 💡 智能合约如何解决现实问题? + +# * 麦当劳彩票舞弊:将彩票代码部署到区块链上,每次黑客尝试篡改,所有人都会收到通知,且无法更改。 +# * Robinhood限制交易:使用去中心化交易所,无需中心化机构,避免单方面限制交易。 +# * 银行倒闭:通过透明的偿付能力检查,构建类似银行的智能合约,防止资不抵债。 + +# 🌟 智能合约的优势 + +# * 去中心化:无需信任中介机构,协议由去中心化网络执行。 +# * 透明性:所有交易和代码公开可查,杜绝暗箱操作。 +# * 高效性:交易瞬间完成,无需漫长的清算和结算。 +# * 安全性:难以篡改,保护资产安全。 + +# 🌱 智能合约的应用 + +# * DeFi (去中心化金融):提供无需信任的金融服务。 +# * DAO (去中心化自治组织):通过智能合约实现社区自治。 +# * NFT (非同质化代币):赋予数字资产独一无二的价值。 + +# 🚀 加入智能合约的未来 + +# 智能合约正在重塑各行各业,从金融到艺术,再到供应链管理。现在就加入这场革命,探索智能合约的无限可能! + +# #智能合约 #区块链 #去中心化 #DeFi #信任危机 #技术未来""", +# schedule_time="", +# collection="", +# cookies_path="cookies.json", +# proxy=None, +# title=None +# ) + + +# 测试命令: biliup upload /Users/luogaiyu/code/VideoLingo/batch/output/segment_02/output_sub.mp4 --title "测试视频" --tag "测试,视频" --desc "这是一个测试视频" --copyright 1 --dtime 1767862800 --tid 36 diff --git a/core/prompts_storage.py b/core/prompts_storage.py index f8f4dc82..90072934 100644 --- a/core/prompts_storage.py +++ b/core/prompts_storage.py @@ -389,6 +389,7 @@ def get_title_introduction_prompt(text): ## Format Requirements - Title format: 第X章:[核心主题] 关键词1-关键词2-关键词3 (总长度不超过35字) - Introduction format: 至少400字的简洁介绍,要求有吸引力,并且段落清晰 +- Tags: 输出当前的tags ## Examples Good title: 第20章:[Raft算法] 日志复制-选举机制-一致性保证 @@ -420,7 +421,7 @@ def get_title_introduction_prompt(text): 数据分析从业者 对统计学感兴趣的观众 #数学教育 #概率论 #MIT公开课 #统计学 #数据分析 - +Good tags: 数学教育,概率论,MIT公开课,统计学,数据分析 ## INPUT Format The input contains: file_path||original_title||srt_content @@ -436,6 +437,7 @@ def get_title_introduction_prompt(text): {{ "file_path": "提取的完整文件路径", "title": "第X章:[核心主题] 关键词1-关键词2-关键词3", - "introduction": "至少400字的简洁介绍,要求有吸引力,并且段落清晰,参考上面的例子" + "introduction": "至少400字的简洁介绍,要求有吸引力,并且段落清晰,参考上面的例子", + "tags":"要有三个不同的标签,要求高度概括当前的视频的内容,根据字幕中的内容" }} ''' \ No newline at end of file From 644ded2ccc6098d7bb0b9a0c149ab831b2b8cc41 Mon Sep 17 00:00:00 2001 From: luogaiyu <1449528975@qq.com> Date: Thu, 8 Jan 2026 19:46:09 +0800 Subject: [PATCH 13/15] =?UTF-8?q?=E6=96=B0=E5=A2=9Eexcel=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../upload_video_2_bilibili.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py b/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py index 6701e8d9..68d8faa0 100644 --- a/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py +++ b/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py @@ -12,6 +12,8 @@ from core.config_utils import load_key import pexpect import sys +import datetime + console = Console() @@ -67,6 +69,17 @@ def method1_upload(video_path, title, tags, introduction, schedule_time, partiti def method2_generate_excel(output_root="batch/output", excel_path=EXCEL_DEFAULT_PATH): base = Path(output_root) rows = [] + + # 获取当前时间 + now = datetime.datetime.now() + # 获取明天的日期,时间设为18:00:00 + tomorrow_6pm = now.replace(hour=18, minute=0, second=0, microsecond=0) + datetime.timedelta(days=1) + # 转换为时间戳 + base_timestamp = int(tomorrow_6pm.timestamp()) + # Debug + # print(base_timestamp) + # print(tomorrow_6pm) + # print(base) if base.exists(): for child in base.iterdir(): if child.is_dir(): @@ -104,15 +117,15 @@ def method2_generate_excel(output_root="batch/output", excel_path=EXCEL_DEFAULT_ "标签": tags, "描述简介": introduction, "版权声明": 1, - "定时发布时间戳": "", + "定时发布时间戳": base_timestamp, "分区": TID, "加入合集": "" }) + base_timestamp += 86400 df = pd.DataFrame(rows) os.makedirs(os.path.dirname(excel_path), exist_ok=True) df.to_excel(excel_path, index=False, engine="openpyxl") console.print(Panel(f"Excel 生成完成: {excel_path}", title="[bold green]方法2[/bold green]")) - return excel_path def method3_upload_from_excel(excel_path=EXCEL_DEFAULT_PATH, cookies=None): From f97233d1ee18d5718a3a23f31bbfddeaa32791d7 Mon Sep 17 00:00:00 2001 From: luogaiyu <1449528975@qq.com> Date: Thu, 8 Jan 2026 23:04:27 +0800 Subject: [PATCH 14/15] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E5=90=88=E9=9B=86?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 8 +++++++- .../upload_video_2_bilibili/upload_video_2_bilibili.py | 8 +++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index ca46faeb..2f5d37ca 100644 --- a/.gitignore +++ b/.gitignore @@ -171,4 +171,10 @@ config.backup.yaml # runtime runtime/ dev/ -installer_files/ \ No newline at end of file +installer_files/ + +# config +config.yaml +# bilibili upload +cookies.json +qrcode.png diff --git a/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py b/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py index 68d8faa0..ed5d33d5 100644 --- a/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py +++ b/batch/utils/upload_video_2_bilibili/upload_video_2_bilibili.py @@ -2,6 +2,7 @@ import time import subprocess from pathlib import Path +from typing import Collection import pandas as pd from rich.console import Console from rich.panel import Panel @@ -20,6 +21,7 @@ ##############参数控制################## TID=36 # 野生技术协会 +COLLECTION="[Web3]智能合约开发教程" ################################ EXCEL_DEFAULT_PATH = os.path.join("batch", "output", "bilibili_upload_tasks.xlsx") @@ -47,7 +49,7 @@ def method1_upload(video_path, title, tags, introduction, schedule_time, partiti args += ["--dtime", "\"" + str(int(schedule_time)) + "\""] # 合集 if collection: - args += ["--collection", "\"" + str(int(collection)) + "\"" ] + args += ["--collection", "\"" + str(collection) + "\"" ] # 需要先运行这个命令,阻塞当前的进程 cmd = ["biliup"] @@ -119,7 +121,7 @@ def method2_generate_excel(output_root="batch/output", excel_path=EXCEL_DEFAULT_ "版权声明": 1, "定时发布时间戳": base_timestamp, "分区": TID, - "加入合集": "" + "加入合集": COLLECTION }) base_timestamp += 86400 df = pd.DataFrame(rows) @@ -164,7 +166,7 @@ def method3_upload_from_excel(excel_path=EXCEL_DEFAULT_PATH, cookies=None): )) # method1_upload( - video_path=video_path, title=title, tags=tags, introduction=introduction, schedule_time=schedule_time, partition=partition, collection=None, cookies_path="cookies.json" + video_path=video_path, title=title, tags=tags, introduction=introduction, schedule_time=schedule_time, partition=partition, collection=collection, cookies_path="cookies.json" ) df.at[idx, status_col] = "Done" From de4e7056063b23c023e5ad7f0b906b797fafc2d3 Mon Sep 17 00:00:00 2001 From: DarkChunk Date: Thu, 19 Mar 2026 17:15:18 +0800 Subject: [PATCH 15/15] Update config.yaml --- config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/config.yaml b/config.yaml index c710d17c..bd1213b7 100644 --- a/config.yaml +++ b/config.yaml @@ -6,7 +6,7 @@ display_language: "zh-CN" # API settings api: - key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' + key: '' base_url: 'https://api.302.ai' model: 'gemini-2.0-flash' @@ -67,7 +67,7 @@ tts_method: 'f5tts' # SiliconFlow FishTTS sf_fish_tts: # SiliconFlow API key - api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' + api_key: '' # only for mode "preset" voice: 'anna' # *only for mode "custom", dont set manually @@ -78,17 +78,17 @@ sf_fish_tts: # OpenAI TTS-1 API configuration, 302.ai API only openai_tts: - api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' + api_key: '' voice: 'alloy' # Azure configuration, 302.ai API only azure_tts: - api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' + api_key: '' voice: 'zh-CN-YunfengNeural' # FishTTS configuration, 302.ai API only fish_tts: - api_key: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' + api_key: '' character: 'AD学姐' character_id_dict: 'AD学姐': '7f92f8afb8ec43bf81429cc1c9199cb1' @@ -108,7 +108,7 @@ gpt_sovits: refer_mode: 3 f5tts: - 302_api: 'sk-SZZ4FDEHYZN7vSbw45VkhOnfkkz6NXeNDEwemvc0H2jQF1SC' + 302_api: '' # *Audio speed range speed_factor: