Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8d27e9f
feat(asr): implement ASR subtitle generation features
mkdir700 Oct 19, 2025
af623fa
chore: update .gitignore to include .cursor file
mkdir700 Oct 19, 2025
ee43a55
refactor(settings): enhance ASR settings UI with HelpText component
mkdir700 Oct 19, 2025
7e88bf5
fix(settings): improve API key validation handling in ASR settings
mkdir700 Oct 19, 2025
4349bf0
refactor(asr): prioritize errorCode over string matching in error han…
mkdir700 Oct 19, 2025
b3e1e0f
fix(asr): make DeepgramResponse optional in TranscriptSegment and han…
mkdir700 Oct 19, 2025
e164a73
refactor(asr): add auto language detection option
mkdir700 Oct 19, 2025
4bc6d2c
fix(asr): prevent errors when sending progress updates
mkdir700 Oct 19, 2025
48fb1c9
refactor(asr): replace sync file reading with streaming for Deepgram API
mkdir700 Oct 19, 2025
6784cb4
feat(asr): implement real Deepgram API key validation
mkdir700 Oct 19, 2025
6f2335d
fix(asr): replace hardcoded progress stage strings with ASRProgressSt…
mkdir700 Oct 19, 2025
7a99225
feat(asr): implement persistent subtitle storage and improve audio pr…
mkdir700 Oct 19, 2025
2b14b10
refactor(config): improve ASR configuration management and code consi…
mkdir700 Oct 19, 2025
61131f9
feat(preload): add ASR progress tracking and type safety improvements
mkdir700 Oct 19, 2025
de1ff02
refactor(audio): add FFmpeg duration parser with comprehensive test c…
mkdir700 Oct 19, 2025
ab6bb8b
refactor(asr): replace generic IPC listeners with dedicated ASR progr…
mkdir700 Oct 19, 2025
0e4768b
refactor(asr): improve task cancellation handling throughout ASR pipe…
mkdir700 Oct 19, 2025
c9ea439
refactor(asr): optimize error handling to prioritize errorCode over s…
mkdir700 Oct 19, 2025
895d51d
test: fix unused parameter warnings in DeepgramTranscriber tests
mkdir700 Oct 19, 2025
e173b82
feat(asr): add strongly typed error codes for ASR operations
mkdir700 Oct 19, 2025
a33fb27
fix(asr): replace incorrect activeRequests.size check with explicit A…
mkdir700 Oct 19, 2025
8756a61
style(ui): standardize font sizes and spacing using theme constants
mkdir700 Oct 19, 2025
7b150ad
feat(asr): add auto-detect option to language selector
mkdir700 Oct 19, 2025
52bf37b
refactor(asr): remove redundant offProgress method from preload API
mkdir700 Oct 19, 2025
de9e748
fix(asr): resolve audio format and codec mismatch issue
mkdir700 Oct 19, 2025
95cedc3
Update src/renderer/src/pages/player/components/ASRProgressModal.tsx
mkdir700 Oct 19, 2025
cdffec5
fix(asr): improve ASR progress modal styling and add failed state
mkdir700 Oct 19, 2025
6049a6d
test(SubtitleDictionaryLookup): improve test reliability with waitFor…
mkdir700 Oct 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,4 @@ resources/ffprobe/
resources/media-server
.ffmpeg-cache
.ffprobe-cache
.cursor
6 changes: 6 additions & 0 deletions packages/shared/IpcChannel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ export enum IpcChannel {
Media_ExtractSubtitle = 'media:extract-subtitle',
SubtitleExtractor_CleanupTemp = 'subtitle-extractor:cleanup-temp',

// ASR 字幕生成相关 IPC 通道 / ASR subtitle generation related IPC channels
ASR_Generate = 'asr:generate',
ASR_Progress = 'asr:progress',
ASR_Cancel = 'asr:cancel',
ASR_ValidateApiKey = 'asr:validate-api-key',

// 文件系统相关 IPC 通道 / File system related IPC channels
Fs_CheckFileExists = 'fs:check-file-exists',
Fs_ReadFile = 'fs:read-file',
Expand Down
223 changes: 223 additions & 0 deletions packages/shared/types/asr.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
/**
* ASR 字幕生成相关类型定义
*/

export type ASRErrorCode =
| 'NO_API_KEY'
| 'INVALID_API_KEY'
| 'QUOTA_EXCEEDED'
| 'NETWORK_ERROR'
| 'AUDIO_EXTRACTION_FAILED'
| 'SUBTITLE_EXTRACTION_FAILED'
| 'TASK_CANCELLED'
| 'UNKNOWN_ERROR'

/**
* ASR 生成的字幕条目(简化版,后续需要转换为 SubtitleItem)
*/
export interface ASRSubtitleItem {
/** 索引 */
index: number
/** 开始时间(秒) */
startTime: number
/** 结束时间(秒) */
endTime: number
/** 文本 */
text: string
/** 单词级时间戳(可选) */
words?: DeepgramWord[]
}

/**
* ASR 生成选项
*/
export interface ASRGenerateOptions {
/** 视频文件路径 */
videoPath: string
/** 视频 ID(用于保存字幕记录) */
videoId: number
/** 目标语言(ISO 639-1 代码,如 'en', 'zh', 'ja',或 'auto' 进行自动语言检测) */
language?: string | 'auto'
/** Deepgram 模型选择 */
model?: 'nova-2' | 'nova-3'
/** 输出格式 */
outputFormat?: 'srt' | 'vtt'
}
Comment thread
mkdir700 marked this conversation as resolved.
Comment on lines +34 to +45
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

language?: string | 'auto' 等同于 string,类型约束没有实际收紧

若希望在类型层面强调 'auto',建议引入受控字面量集合或品牌类型;否则可以仅留注释避免造成误导。

可选修正示例(不必枚举全部语言,先引入有限集合并退回到宽松字符串):

+export type ASRLanguage =
+  | 'auto'
+  | 'en'
+  | 'zh'
+  | 'ja'
+  | (string & {}) // 允许其它合法代码
 export interface ASRGenerateOptions {
@@
-  language?: string | 'auto'
+  language?: ASRLanguage
}
🤖 Prompt for AI Agents
In packages/shared/types/asr.ts around lines 34 to 45, the declared type
language?: string | 'auto' is redundant because 'auto' is already included by
string; fix by either (A) making the field a plain optional string (remove the
literal union) or (B) introduce a controlled union type that lists the allowed
language literals including 'auto' and also a permissive fallback (e.g., a union
of specific ISO codes plus 'auto' and a fallback string type), then use that
alias here; update the interface to reference the new alias and adjust any
callers/types accordingly.


/**
* ASR 进度阶段
*/
export enum ASRProgressStage {
/** 初始化 */
Initializing = 'initializing',
/** 音频提取 */
ExtractingAudio = 'extracting_audio',
/** 转写中 */
Transcribing = 'transcribing',
/** 格式化 */
Formatting = 'formatting',
/** 保存 */
Saving = 'saving',
/** 完成 */
Complete = 'complete',
/** 失败 */
Failed = 'failed'
}

/**
* ASR 进度信息
*/
export interface ASRProgress {
/** 任务 ID */
taskId: string
/** 当前阶段 */
stage: ASRProgressStage
/** 进度百分比 (0-100) */
percent: number
/** 当前处理的段索引(转写阶段) */
current?: number
/** 总段数(转写阶段) */
total?: number
/** 阶段消息 */
message?: string
/** 预计剩余时间(秒) */
eta?: number
}

/**
* ASR 生成结果
*/
export interface ASRResult {
/** 是否成功 */
success: boolean
/** 生成的字幕数据 */
subtitles?: ASRSubtitleItem[]
/** 输出文件路径(SRT/VTT) */
outputPath?: string
/** 字幕库记录 ID */
subtitleLibraryId?: number
/** 错误信息 */
error?: string
/** 错误代码 */
errorCode?: ASRErrorCode
/** 统计信息 */
stats?: {
/** 音频时长(秒) */
duration: number
/** 处理时长(秒) */
processingTime: number
/** 段数 */
segmentCount: number
/** 字幕条数 */
subtitleCount: number
}
}
Comment thread
mkdir700 marked this conversation as resolved.

/**
* 音频段信息
*/
export interface AudioSegment {
/** 段索引 */
index: number
/** 开始时间(秒) */
start: number
/** 结束时间(秒) */
end: number
/** 时长(秒) */
duration: number
/** 音频文件路径 */
filePath: string
}

/**
* Deepgram 词级时间戳
*/
export interface DeepgramWord {
/** 词文本 */
word: string
/** 开始时间(秒) */
start: number
/** 结束时间(秒) */
end: number
/** 置信度 (0-1) */
confidence: number
/** 带标点的词形 */
punctuated_word?: string
}

/**
* Deepgram 句段信息
*/
export interface DeepgramUtterance {
/** 开始时间(秒) */
start: number
/** 结束时间(秒) */
end: number
/** 句段文本 */
transcript: string
/** 置信度 (0-1) */
confidence: number
/** 词数组 */
words: DeepgramWord[]
}

/**
* Deepgram API 响应(简化)
*/
export interface DeepgramResponse {
/** 结果数组 */
results: {
/** 通道数组 */
channels: Array<{
/** 备选结果 */
alternatives: Array<{
/** 完整转录文本 */
transcript: string
/** 置信度 */
confidence: number
/** 词数组 */
words: DeepgramWord[]
}>
/** 句段数组(utterances=true 时) */
utterances?: DeepgramUtterance[]
}>
}
/** 元数据 */
metadata: {
/** 请求 ID */
request_id: string
/** 音频时长 */
duration: number
/** 通道数 */
channels: number
}
}

/**
* 转录段结果
*/
export interface TranscriptSegment {
/** 原始音频段信息 */
audioSegment: AudioSegment
/** Deepgram 响应 */
response?: DeepgramResponse
/** 是否成功 */
success: boolean
/** 错误信息 */
error?: string
}
Comment thread
mkdir700 marked this conversation as resolved.

/**
* API 密钥验证结果
*/
export interface ApiKeyValidationResult {
/** 是否有效 */
valid: boolean
/** 错误消息 */
error?: string
/** 账户信息(可选) */
account?: {
/** 剩余配额 */
remainingBalance?: number
}
}
1 change: 1 addition & 0 deletions packages/shared/types/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* Shared types index
*/
export * from './asr'
export * from './database'
export * from './media-server'
export * from './mediainfo'
Expand Down
29 changes: 29 additions & 0 deletions src/main/ipc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import fs from 'node:fs'

import { UpgradeChannel } from '@shared/config/constant'
import { IpcChannel } from '@shared/IpcChannel'
import type { ASRGenerateOptions } from '@shared/types'
import { Notification, Shortcut, ThemeMode } from '@types'
import {
BrowserWindow,
Expand All @@ -19,6 +20,7 @@ import { isLinux, isMac, isPortable, isWin } from './constant'
import { db } from './db/dao'
import appService from './services/AppService'
import AppUpdater from './services/AppUpdater'
import ASRSubtitleService from './services/ASRSubtitleService'
import { configManager } from './services/ConfigManager'
import DictionaryService from './services/DictionaryService'
import FFmpegService from './services/FFmpegService'
Expand All @@ -43,6 +45,7 @@ const dictionaryService = new DictionaryService()
const ffmpegService = new FFmpegService()
const mediaParserService = new MediaParserService()
const subtitleExtractorService = new SubtitleExtractorService()
const asrSubtitleService = new ASRSubtitleService()

/**
* Registers all ipcMain handlers used by the main process.
Expand Down Expand Up @@ -713,6 +716,32 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
return count
})

// ASR 字幕生成相关 IPC 处理程序 / ASR subtitle generation related IPC handlers
ipcMain.handle(IpcChannel.ASR_Generate, async (_, options: ASRGenerateOptions) => {
logger.info('收到 ASR 字幕生成请求', { videoId: options.videoId })
return await asrSubtitleService.generateSubtitle(options, (progress) => {
try {
if (!mainWindow.isDestroyed() && !mainWindow.webContents.isDestroyed()) {
mainWindow.webContents.send(IpcChannel.ASR_Progress, progress)
}
} catch (err) {
logger.warn('ASR 进度事件发送失败', {
error: err instanceof Error ? err.message : String(err)
})
}
})
})

ipcMain.handle(IpcChannel.ASR_Cancel, async (_, taskId: string) => {
logger.info('取消 ASR 任务', { taskId })
return await asrSubtitleService.cancelTask(taskId)
})

ipcMain.handle(IpcChannel.ASR_ValidateApiKey, async (_, apiKey: string) => {
logger.info('验证 Deepgram API Key')
return await asrSubtitleService.validateApiKey(apiKey)
})
Comment thread
mkdir700 marked this conversation as resolved.

// 文件系统相关 IPC 处理程序 / File system-related IPC handlers
ipcMain.handle(IpcChannel.Fs_CheckFileExists, async (_, filePath: string) => {
try {
Expand Down
Loading