diff --git a/.gitignore b/.gitignore
index 1b019672..508717b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,3 +54,4 @@ resources/ffprobe/
 resources/media-server
 .ffmpeg-cache
 .ffprobe-cache
+.cursor
diff --git a/packages/shared/IpcChannel.ts b/packages/shared/IpcChannel.ts
index 9c665151..920a3203 100644
--- a/packages/shared/IpcChannel.ts
+++ b/packages/shared/IpcChannel.ts
@@ -140,6 +140,12 @@ export enum IpcChannel {
   Media_ExtractSubtitle = 'media:extract-subtitle',
   SubtitleExtractor_CleanupTemp = 'subtitle-extractor:cleanup-temp',
 
+  // ASR 字幕生成相关 IPC 通道 / ASR subtitle generation related IPC channels
+  ASR_Generate = 'asr:generate',
+  ASR_Progress = 'asr:progress',
+  ASR_Cancel = 'asr:cancel',
+  ASR_ValidateApiKey = 'asr:validate-api-key',
+
   // 文件系统相关 IPC 通道 / File system related IPC channels
   Fs_CheckFileExists = 'fs:check-file-exists',
   Fs_ReadFile = 'fs:read-file',
diff --git a/packages/shared/types/asr.ts b/packages/shared/types/asr.ts
new file mode 100644
index 00000000..555a065a
--- /dev/null
+++ b/packages/shared/types/asr.ts
@@ -0,0 +1,223 @@
+/**
+ * ASR 字幕生成相关类型定义
+ */
+
+export type ASRErrorCode =
+  | 'NO_API_KEY'
+  | 'INVALID_API_KEY'
+  | 'QUOTA_EXCEEDED'
+  | 'NETWORK_ERROR'
+  | 'AUDIO_EXTRACTION_FAILED'
+  | 'SUBTITLE_EXTRACTION_FAILED'
+  | 'TASK_CANCELLED'
+  | 'UNKNOWN_ERROR'
+
+/**
+ * ASR 生成的字幕条目（简化版，后续需要转换为 SubtitleItem）
+ */
+export interface ASRSubtitleItem {
+  /** 索引 */
+  index: number
+  /** 开始时间（秒） */
+  startTime: number
+  /** 结束时间（秒） */
+  endTime: number
+  /** 文本 */
+  text: string
+  /** 单词级时间戳（可选） */
+  words?: DeepgramWord[]
+}
+
+/**
+ * ASR 生成选项
+ */
+export interface ASRGenerateOptions {
+  /** 视频文件路径 */
+  videoPath: string
+  /** 视频 ID（用于保存字幕记录） */
+  videoId: number
+  /** 目标语言（ISO 639-1 代码，如 'en', 'zh', 'ja'，或 'auto' 进行自动语言检测） */
+  language?: string | 'auto'
+  /** Deepgram 模型选择 */
+  model?: 'nova-2' | 'nova-3'
+  /** 输出格式 */
+  outputFormat?: 'srt' | 'vtt'
+}
+
+/**
+ * ASR 进度阶段
+ */
+export enum ASRProgressStage {
+  /** 初始化 */
+  Initializing = 'initializing',
+  /** 音频提取 */
+  ExtractingAudio = 'extracting_audio',
+  /** 转写中 */
+  Transcribing = 'transcribing',
+  /** 格式化 */
+  Formatting = 'formatting',
+  /** 保存 */
+  Saving = 'saving',
+  /** 完成 */
+  Complete = 'complete',
+  /** 失败 */
+  Failed = 'failed'
+}
+
+/**
+ * ASR 进度信息
+ */
+export interface ASRProgress {
+  /** 任务 ID */
+  taskId: string
+  /** 当前阶段 */
+  stage: ASRProgressStage
+  /** 进度百分比 (0-100) */
+  percent: number
+  /** 当前处理的段索引（转写阶段） */
+  current?: number
+  /** 总段数（转写阶段） */
+  total?: number
+  /** 阶段消息 */
+  message?: string
+  /** 预计剩余时间（秒） */
+  eta?: number
+}
+
+/**
+ * ASR 生成结果
+ */
+export interface ASRResult {
+  /** 是否成功 */
+  success: boolean
+  /** 生成的字幕数据 */
+  subtitles?: ASRSubtitleItem[]
+  /** 输出文件路径（SRT/VTT） */
+  outputPath?: string
+  /** 字幕库记录 ID */
+  subtitleLibraryId?: number
+  /** 错误信息 */
+  error?: string
+  /** 错误代码 */
+  errorCode?: ASRErrorCode
+  /** 统计信息 */
+  stats?: {
+    /** 音频时长（秒） */
+    duration: number
+    /** 处理时长（秒） */
+    processingTime: number
+    /** 段数 */
+    segmentCount: number
+    /** 字幕条数 */
+    subtitleCount: number
+  }
+}
+
+/**
+ * 音频段信息
+ */
+export interface AudioSegment {
+  /** 段索引 */
+  index: number
+  /** 开始时间（秒） */
+  start: number
+  /** 结束时间（秒） */
+  end: number
+  /** 时长（秒） */
+  duration: number
+  /** 音频文件路径 */
+  filePath: string
+}
+
+/**
+ * Deepgram 词级时间戳
+ */
+export interface DeepgramWord {
+  /** 词文本 */
+  word: string
+  /** 开始时间（秒） */
+  start: number
+  /** 结束时间（秒） */
+  end: number
+  /** 置信度 (0-1) */
+  confidence: number
+  /** 带标点的词形 */
+  punctuated_word?: string
+}
+
+/**
+ * Deepgram 句段信息
+ */
+export interface DeepgramUtterance {
+  /** 开始时间（秒） */
+  start: number
+  /** 结束时间（秒） */
+  end: number
+  /** 句段文本 */
+  transcript: string
+  /** 置信度 (0-1) */
+  confidence: number
+  /** 词数组 */
+  words: DeepgramWord[]
+}
+
+/**
+ * Deepgram API 响应（简化）
+ */
+export interface DeepgramResponse {
+  /** 结果数组 */
+  results: {
+    /** 通道数组 */
+    channels: Array<{
+      /** 备选结果 */
+      alternatives: Array<{
+        /** 完整转录文本 */
+        transcript: string
+        /** 置信度 */
+        confidence: number
+        /** 词数组 */
+        words: DeepgramWord[]
+      }>
+      /** 句段数组（utterances=true 时） */
+      utterances?: DeepgramUtterance[]
+    }>
+  }
+  /** 元数据 */
+  metadata: {
+    /** 请求 ID */
+    request_id: string
+    /** 音频时长 */
+    duration: number
+    /** 通道数 */
+    channels: number
+  }
+}
+
+/**
+ * 转录段结果
+ */
+export interface TranscriptSegment {
+  /** 原始音频段信息 */
+  audioSegment: AudioSegment
+  /** Deepgram 响应 */
+  response?: DeepgramResponse
+  /** 是否成功 */
+  success: boolean
+  /** 错误信息 */
+  error?: string
+}
+
+/**
+ * API 密钥验证结果
+ */
+export interface ApiKeyValidationResult {
+  /** 是否有效 */
+  valid: boolean
+  /** 错误消息 */
+  error?: string
+  /** 账户信息（可选） */
+  account?: {
+    /** 剩余配额 */
+    remainingBalance?: number
+  }
+}
diff --git a/packages/shared/types/index.ts b/packages/shared/types/index.ts
index 4eacab43..3c5eaa14 100644
--- a/packages/shared/types/index.ts
+++ b/packages/shared/types/index.ts
@@ -1,6 +1,7 @@
 /**
  * Shared types index
  */
+export * from './asr'
 export * from './database'
 export * from './media-server'
 export * from './mediainfo'
diff --git a/src/main/ipc.ts b/src/main/ipc.ts
index df15bc05..b7de6de7 100644
--- a/src/main/ipc.ts
+++ b/src/main/ipc.ts
@@ -2,6 +2,7 @@ import fs from 'node:fs'
 
 import { UpgradeChannel } from '@shared/config/constant'
 import { IpcChannel } from '@shared/IpcChannel'
+import type { ASRGenerateOptions } from '@shared/types'
 import { Notification, Shortcut, ThemeMode } from '@types'
 import {
   BrowserWindow,
@@ -19,6 +20,7 @@ import { isLinux, isMac, isPortable, isWin } from './constant'
 import { db } from './db/dao'
 import appService from './services/AppService'
 import AppUpdater from './services/AppUpdater'
+import ASRSubtitleService from './services/ASRSubtitleService'
 import { configManager } from './services/ConfigManager'
 import DictionaryService from './services/DictionaryService'
 import FFmpegService from './services/FFmpegService'
@@ -43,6 +45,7 @@ const dictionaryService = new DictionaryService()
 const ffmpegService = new FFmpegService()
 const mediaParserService = new MediaParserService()
 const subtitleExtractorService = new SubtitleExtractorService()
+const asrSubtitleService = new ASRSubtitleService()
 
 /**
  * Registers all ipcMain handlers used by the main process.
@@ -713,6 +716,32 @@ export function registerIpc(mainWindow: BrowserWindow, app: Electron.App) {
     return count
   })
 
+  // ASR 字幕生成相关 IPC 处理程序 / ASR subtitle generation related IPC handlers
+  ipcMain.handle(IpcChannel.ASR_Generate, async (_, options: ASRGenerateOptions) => {
+    logger.info('收到 ASR 字幕生成请求', { videoId: options.videoId })
+    return await asrSubtitleService.generateSubtitle(options, (progress) => {
+      try {
+        if (!mainWindow.isDestroyed() && !mainWindow.webContents.isDestroyed()) {
+          mainWindow.webContents.send(IpcChannel.ASR_Progress, progress)
+        }
+      } catch (err) {
+        logger.warn('ASR 进度事件发送失败', {
+          error: err instanceof Error ? err.message : String(err)
+        })
+      }
+    })
+  })
+
+  ipcMain.handle(IpcChannel.ASR_Cancel, async (_, taskId: string) => {
+    logger.info('取消 ASR 任务', { taskId })
+    return await asrSubtitleService.cancelTask(taskId)
+  })
+
+  ipcMain.handle(IpcChannel.ASR_ValidateApiKey, async (_, apiKey: string) => {
+    logger.info('验证 Deepgram API Key')
+    return await asrSubtitleService.validateApiKey(apiKey)
+  })
+
   // 文件系统相关 IPC 处理程序 / File system-related IPC handlers
   ipcMain.handle(IpcChannel.Fs_CheckFileExists, async (_, filePath: string) => {
     try {
diff --git a/src/main/services/ASRSubtitleService.ts b/src/main/services/ASRSubtitleService.ts
new file mode 100644
index 00000000..038296f5
--- /dev/null
+++ b/src/main/services/ASRSubtitleService.ts
@@ -0,0 +1,542 @@
+/**
+ * ASR 字幕生成主服务
+ * 简化版：直接上传完整音频到 Deepgram，无需分段
+ */
+
+import type {
+  ASRErrorCode,
+  ASRGenerateOptions,
+  ASRProgress,
+  ASRResult,
+  ASRSubtitleItem,
+  DeepgramResponse,
+  DeepgramUtterance,
+  DeepgramWord
+} from '@shared/types'
+import { ASRProgressStage } from '@shared/types'
+import { app } from 'electron'
+import * as fs from 'fs'
+import * as path from 'path'
+import { v4 as uuidv4 } from 'uuid'
+
+import { db } from '../db/dao'
+import DeepgramTranscriber from './asr/DeepgramTranscriber'
+import SubtitleFormatter from './asr/SubtitleFormatter'
+import AudioPreprocessor from './audio/AudioPreprocessor'
+import { configManager } from './ConfigManager'
+import { loggerService } from './LoggerService'
+
+const logger = loggerService.withContext('ASRSubtitleService')
+
+export type ASRProgressCallback = (progress: ASRProgress) => void
+
+class ASRSubtitleService {
+  private audioPreprocessor: AudioPreprocessor
+  private subtitleFormatter: SubtitleFormatter
+
+  // 当前运行的任务
+  private activeTasks: Map<string, { transcriber: DeepgramTranscriber; cancelled: boolean }> =
+    new Map()
+
+  constructor() {
+    this.audioPreprocessor = new AudioPreprocessor()
+    this.subtitleFormatter = new SubtitleFormatter()
+
+    logger.info('ASR 字幕服务初始化完成')
+  }
+
+  /**
+   * 创建持久化字幕文件路径
+   */
+  private async createPersistentSubtitlePath(
+    videoId: string,
+    taskId: string,
+    outputFormat: string
+  ): Promise<string> {
+    const userDataPath = app.getPath('userData')
+    const subtitlesDir = path.join(userDataPath, 'subtitles', videoId)
+
+    // 确保目录存在（异步操作）
+    try {
+      await fs.promises.access(subtitlesDir)
+    } catch {
+      await fs.promises.mkdir(subtitlesDir, { recursive: true })
+      logger.debug('创建字幕目录', { subtitlesDir })
+    }
+
+    return path.join(subtitlesDir, `${taskId}.${outputFormat}`)
+  }
+
+  /**
+   * 生成字幕（简化版）
+   */
+  public async generateSubtitle(
+    options: ASRGenerateOptions,
+    progressCallback?: ASRProgressCallback
+  ): Promise<ASRResult> {
+    const taskId = uuidv4()
+    const startTime = Date.now()
+
+    logger.info('开始生成 ASR 字幕', {
+      taskId,
+      videoPath: options.videoPath,
+      language: options.language
+    })
+
+    // 创建临时目录
+    const tempDir = this.audioPreprocessor.createTempDir(`asr-${taskId}-`)
+
+    try {
+      // 检查 API Key
+      const apiKey = configManager.getDeepgramApiKey()
+      if (!apiKey) {
+        throw new Error('NO_API_KEY')
+      }
+
+      // 获取配置
+      const language = options.language || configManager.getASRDefaultLanguage()
+      const model = (options.model || configManager.getASRModel()) as 'nova-2' | 'nova-3'
+      const outputFormat = options.outputFormat || 'srt'
+
+      // 阶段 1: 初始化
+      this.reportProgress(taskId, ASRProgressStage.Initializing, 0, progressCallback)
+
+      // 阶段 2: 提取音频
+      this.reportProgress(taskId, ASRProgressStage.ExtractingAudio, 5, progressCallback)
+      logger.info('开始提取音频')
+
+      const extractResult = await this.audioPreprocessor.extractAudioTrack(
+        options.videoPath,
+        tempDir,
+        {
+          sampleRate: 16000,
+          channels: 1
+        }
+      )
+
+      if (!extractResult.success || !extractResult.audioPath) {
+        throw new Error('AUDIO_EXTRACTION_FAILED')
+      }
+
+      const audioDuration = extractResult.duration || 0
+      logger.info('音频提取成功', { duration: audioDuration })
+
+      // 阶段 3: 转写完整音频
+      this.reportProgress(taskId, ASRProgressStage.Transcribing, 15, progressCallback)
+      logger.info('开始转写音频')
+
+      const transcriber = new DeepgramTranscriber(1)
+      this.activeTasks.set(taskId, { transcriber, cancelled: false })
+
+      const deepgramResponse = await transcriber.transcribeFile(extractResult.audioPath, {
+        apiKey,
+        model,
+        language,
+        smartFormat: true,
+        utterances: true,
+        utteranceEndMs: 1000
+      })
+
+      // 检查是否被取消
+      if (this.activeTasks.get(taskId)?.cancelled) {
+        throw new Error('TASK_CANCELLED')
+      }
+
+      logger.info('音频转写完成')
+
+      // 再次检查是否被取消（在转写完成后）
+      if (this.activeTasks.get(taskId)?.cancelled) {
+        throw new Error('TASK_CANCELLED')
+      }
+
+      // 阶段 4: 提取字幕数据
+      this.reportProgress(taskId, ASRProgressStage.Formatting, 85, progressCallback)
+      logger.info('开始格式化字幕')
+
+      // 检查是否被取消（在格式化前）
+      if (this.activeTasks.get(taskId)?.cancelled) {
+        throw new Error('TASK_CANCELLED')
+      }
+
+      // 从 Deepgram 响应中提取字幕
+      const rawSubtitles = this.extractSubtitlesFromResponse(deepgramResponse)
+      const formattedSubtitles = rawSubtitles
+      // 格式化字幕（如需要可以启用）
+      // const formattedSubtitles = this.subtitleFormatter.formatSubtitles(rawSubtitles, {
+      //   maxDuration: 8,
+      //   maxCharsPerLine: 42
+      // })
+
+      // 检查是否被取消（在提取完成后）
+      if (this.activeTasks.get(taskId)?.cancelled) {
+        throw new Error('TASK_CANCELLED')
+      }
+
+      // 阶段 5: 导出文件
+      this.reportProgress(taskId, ASRProgressStage.Saving, 90, progressCallback)
+      logger.info('开始导出字幕文件')
+
+      // 检查是否被取消（在导出前）
+      if (this.activeTasks.get(taskId)?.cancelled) {
+        throw new Error('TASK_CANCELLED')
+      }
+
+      // 直接生成到持久化目录
+      const outputPath = await this.createPersistentSubtitlePath(
+        String(options.videoId),
+        taskId,
+        outputFormat
+      )
+      if (outputFormat === 'srt') {
+        await this.subtitleFormatter.exportToSRT(formattedSubtitles, outputPath)
+      } else {
+        await this.subtitleFormatter.exportToVTT(formattedSubtitles, outputPath)
+      }
+
+      // 检查是否被取消（在导出完成后）
+      if (this.activeTasks.get(taskId)?.cancelled) {
+        throw new Error('TASK_CANCELLED')
+      }
+
+      // 阶段 6: 保存到数据库
+      this.reportProgress(taskId, ASRProgressStage.Saving, 95, progressCallback)
+      logger.info('开始保存字幕到数据库')
+
+      // 转换 ASRSubtitleItem 到 SubtitleItem 格式
+      const subtitleItems = formattedSubtitles.map((item) => ({
+        id: `${taskId}-${item.index}`,
+        startTime: item.startTime,
+        endTime: item.endTime,
+        originalText: item.text,
+        translatedText: undefined,
+        words: item.words // 保存单词级时间戳
+      }))
+
+      // 保存到数据库
+      let subtitleLibraryId: number | undefined
+      try {
+        const result = await db.subtitleLibrary.addSubtitle({
+          videoId: options.videoId,
+          filePath: outputPath, // 直接使用持久化路径
+          subtitles: JSON.stringify(subtitleItems),
+          parsed_at: Date.now()
+        })
+        subtitleLibraryId = result.id
+        logger.info('字幕保存到数据库成功', { subtitleLibraryId })
+      } catch (error) {
+        logger.error('保存字幕到数据库失败', {
+          error: error instanceof Error ? error.message : String(error)
+        })
+        // 不抛出错误，继续返回结果
+      }
+
+      // 完成
+      const processingTime = (Date.now() - startTime) / 1000
+      this.reportProgress(taskId, ASRProgressStage.Complete, 100, progressCallback)
+
+      logger.info('ASR 字幕生成完成', {
+        taskId,
+        subtitleCount: formattedSubtitles.length,
+        processingTime: `${processingTime.toFixed(2)}s`,
+        subtitleLibraryId
+      })
+
+      // 清理任务
+      this.activeTasks.delete(taskId)
+
+      return {
+        success: true,
+        subtitles: formattedSubtitles,
+        outputPath,
+        subtitleLibraryId,
+        stats: {
+          duration: audioDuration,
+          processingTime,
+          segmentCount: 1,
+          subtitleCount: formattedSubtitles.length
+        }
+      }
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error)
+      const errorCode = this.getErrorCode(errorMessage)
+
+      // 如果是用户取消，使用 info 级别日志
+      if (errorCode === 'TASK_CANCELLED') {
+        logger.info('用户取消了 ASR 字幕生成', { taskId })
+      } else {
+        logger.error('ASR 字幕生成失败', {
+          taskId,
+          error: errorMessage
+        })
+      }
+
+      this.reportProgress(taskId, ASRProgressStage.Failed, 0, progressCallback)
+      this.activeTasks.delete(taskId)
+
+      return {
+        success: false,
+        error: errorMessage,
+        errorCode
+      }
+    } finally {
+      // 清理临时目录
+      try {
+        await this.audioPreprocessor.cleanupTempDir(tempDir)
+        logger.debug('临时目录清理成功', { tempDir })
+      } catch (error) {
+        logger.error('临时目录清理失败', {
+          tempDir,
+          error: error instanceof Error ? error.message : String(error)
+        })
+      }
+    }
+  }
+
+  /**
+   * 从 Deepgram 响应中提取字幕
+   */
+  private extractSubtitlesFromResponse(response?: DeepgramResponse): ASRSubtitleItem[] {
+    const subtitles: ASRSubtitleItem[] = []
+
+    try {
+      // 处理响应为空的情况
+      if (!response) {
+        logger.warn('Deepgram 响应为空，无法提取字幕')
+        return subtitles
+      }
+
+      // 优先使用 utterances（句段）
+      const channel = response.results?.channels?.[0]
+      const utterances = channel?.utterances as DeepgramUtterance[] | undefined
+
+      if (utterances && utterances.length > 0) {
+        logger.info('使用 utterances 提取字幕', { count: utterances.length })
+
+        utterances.forEach((utterance, index) => {
+          subtitles.push({
+            index,
+            startTime: utterance.start,
+            endTime: utterance.end,
+            text: utterance.transcript,
+            words: utterance.words // 保存单词级时间戳
+          })
+        })
+      } else {
+        // 降级：使用 words（词级）智能分段
+        const words = channel?.alternatives?.[0]?.words as DeepgramWord[] | undefined
+        if (words && words.length > 0) {
+          logger.info('使用 words 提取字幕（智能分段）', { count: words.length })
+          const grouped = this.groupWordsIntoSentences(words)
+          subtitles.push(...grouped)
+        }
+      }
+
+      logger.info('字幕提取完成', { count: subtitles.length })
+    } catch (error) {
+      logger.error('提取字幕失败', {
+        error: error instanceof Error ? error.message : String(error)
+      })
+      throw new Error('SUBTITLE_EXTRACTION_FAILED')
+    }
+
+    return subtitles
+  }
+
+  /**
+   * 将单词智能分组为句子
+   * 基于标点符号、停顿时长和时长限制
+   */
+  private groupWordsIntoSentences(words: DeepgramWord[]): ASRSubtitleItem[] {
+    const sentences: ASRSubtitleItem[] = []
+    let currentWords: DeepgramWord[] = []
+    let sentenceStartTime = 0
+
+    for (let i = 0; i < words.length; i++) {
+      const word = words[i]
+      const nextWord = words[i + 1]
+
+      // 初始化句子开始时间
+      if (currentWords.length === 0) {
+        sentenceStartTime = word.start
+      }
+
+      currentWords.push(word)
+
+      // 判断是否应该结束当前句子
+      const shouldBreak = this.shouldBreakSentence(
+        word,
+        nextWord,
+        sentenceStartTime,
+        i === words.length - 1
+      )
+
+      if (shouldBreak) {
+        // 创建字幕条目
+        const text = currentWords.map((w) => w.punctuated_word || w.word).join(' ')
+        sentences.push({
+          index: sentences.length,
+          startTime: sentenceStartTime,
+          endTime: word.end,
+          text,
+          words: currentWords
+        })
+
+        currentWords = []
+      }
+    }
+
+    logger.info('智能分段完成', {
+      totalWords: words.length,
+      sentenceCount: sentences.length,
+      avgWordsPerSentence: (words.length / sentences.length).toFixed(1)
+    })
+
+    return sentences
+  }
+
+  /**
+   * 判断是否应该在当前位置断句
+   * 策略：句末标点优先，信任 Deepgram 的标点识别
+   */
+  private shouldBreakSentence(
+    currentWord: DeepgramWord,
+    nextWord: DeepgramWord | undefined,
+    sentenceStartTime: number,
+    isLastWord: boolean
+  ): boolean {
+    // 1. 最后一个单词，必须断句
+    if (isLastWord) {
+      return true
+    }
+
+    // 2. 检测句末标点符号 (., !, ?, 。, ！, ？) - 直接断句
+    const punctuatedWord = currentWord.punctuated_word || currentWord.word
+    const hasSentenceEndingPunctuation = /[.!?。！？]$/.test(punctuatedWord)
+
+    if (hasSentenceEndingPunctuation) {
+      logger.debug('断句：句末标点', {
+        word: punctuatedWord
+      })
+      return true
+    }
+
+    // 3. 计算停顿时长（下一个单词的开始时间 - 当前单词的结束时间）
+    const pauseDuration = nextWord ? nextWord.start - currentWord.end : 0
+
+    // 4. 停顿时间 > 800ms：长停顿，可能是句子边界
+    if (pauseDuration > 0.8) {
+      logger.debug('断句：长停顿', {
+        word: punctuatedWord,
+        pauseDuration: pauseDuration.toFixed(3)
+      })
+      return true
+    }
+
+    // 5. 计算当前句子的时长
+    const sentenceDuration = currentWord.end - sentenceStartTime
+
+    // 6. 句子时长 > 8 秒：强制断句，避免过长
+    if (sentenceDuration > 8) {
+      // 如果有标点符号（逗号、分号、冒号），优先在标点处断句
+      if (/[,;:，；：]$/.test(punctuatedWord)) {
+        logger.debug('断句：超时 + 标点', {
+          word: punctuatedWord,
+          duration: sentenceDuration.toFixed(2)
+        })
+        return true
+      }
+      // 如果有停顿，在停顿处断句
+      if (pauseDuration > 0.2) {
+        logger.debug('断句：超时 + 短停顿', {
+          word: punctuatedWord,
+          duration: sentenceDuration.toFixed(2)
+        })
+        return true
+      }
+    }
+
+    // 7. 句子时长 > 10 秒：强制断句（无论是否有标点）
+    if (sentenceDuration > 10) {
+      logger.debug('断句：强制超时', {
+        word: punctuatedWord,
+        duration: sentenceDuration.toFixed(2)
+      })
+      return true
+    }
+
+    return false
+  }
+
+  /**
+   * 取消任务
+   */
+  public async cancelTask(taskId: string): Promise<boolean> {
+    const task = this.activeTasks.get(taskId)
+    if (!task) {
+      return false
+    }
+
+    logger.info('取消 ASR 任务', { taskId })
+    task.cancelled = true
+    await task.transcriber.cancelAll()
+    this.activeTasks.delete(taskId)
+
+    return true
+  }
+
+  /**
+   * 验证 API Key
+   */
+  public async validateApiKey(apiKey: string): Promise<{ valid: boolean; error?: string }> {
+    logger.info('验证 Deepgram API Key')
+
+    try {
+      const transcriber = new DeepgramTranscriber(1)
+      const result = await transcriber.validateApiKey(apiKey)
+      return result
+    } catch (error) {
+      return {
+        valid: false,
+        error: error instanceof Error ? error.message : 'API Key 验证失败'
+      }
+    }
+  }
+
+  /**
+   * 报告进度
+   */
+  private reportProgress(
+    taskId: string,
+    stage: ASRProgressStage,
+    percent: number,
+    callback?: ASRProgressCallback,
+    current?: number,
+    total?: number
+  ): void {
+    if (callback) {
+      callback({
+        taskId,
+        stage,
+        percent: Math.round(percent),
+        current,
+        total
+      })
+    }
+  }
+
+  /**
+   * 获取错误代码
+   */
+  private getErrorCode(errorMessage: string): ASRErrorCode {
+    if (errorMessage.includes('NO_API_KEY')) return 'NO_API_KEY'
+    if (errorMessage.includes('API Key 无效')) return 'INVALID_API_KEY'
+    if (errorMessage.includes('配额')) return 'QUOTA_EXCEEDED'
+    if (errorMessage.includes('网络')) return 'NETWORK_ERROR'
+    if (errorMessage.includes('AUDIO_EXTRACTION_FAILED')) return 'AUDIO_EXTRACTION_FAILED'
+    if (errorMessage.includes('TASK_CANCELLED')) return 'TASK_CANCELLED'
+    if (errorMessage.includes('REQUEST_CANCELLED')) return 'TASK_CANCELLED'
+    return 'UNKNOWN_ERROR'
+  }
+}
+
+export default ASRSubtitleService
diff --git a/src/main/services/ConfigManager.ts b/src/main/services/ConfigManager.ts
index 8798e76c..ad8b3de2 100644
--- a/src/main/services/ConfigManager.ts
+++ b/src/main/services/ConfigManager.ts
@@ -39,7 +39,11 @@ export enum ConfigKeys {
   TestChannel = 'testChannel',
   TestPlan = 'testPlan',
   SpellCheckLanguages = 'spellCheckLanguages',
-  DisableHardwareAcceleration = 'disableHardwareAcceleration'
+  DisableHardwareAcceleration = 'disableHardwareAcceleration',
+  // ASR 相关配置
+  DeepgramApiKey = 'deepgramApiKey',
+  ASRDefaultLanguage = 'asrDefaultLanguage',
+  ASRModel = 'asrModel'
 }
 
 // 获取基于版本的动态默认值
@@ -56,7 +60,11 @@ const defaultValues: Record<ConfigKeys, any> = {
   [ConfigKeys.TestChannel]: versionBasedDefaults.testChannel,
   [ConfigKeys.TestPlan]: versionBasedDefaults.testPlan,
   [ConfigKeys.SpellCheckLanguages]: [] as string[],
-  [ConfigKeys.DisableHardwareAcceleration]: false
+  [ConfigKeys.DisableHardwareAcceleration]: false,
+  // ASR 默认配置
+  [ConfigKeys.DeepgramApiKey]: '',
+  [ConfigKeys.ASRDefaultLanguage]: 'en',
+  [ConfigKeys.ASRModel]: 'nova-3'
 }
 
 export class ConfigManager {
@@ -206,7 +214,32 @@ export class ConfigManager {
    * @param defaultValue 默认值
    */
   get<T>(key: string, defaultValue?: T) {
-    return this.store.get(key, defaultValue ? defaultValue : defaultValues[key]) as T
+    return this.store.get(key, defaultValue ?? defaultValues[key]) as T
+  }
+
+  // ASR 相关配置方法
+  getDeepgramApiKey(): string {
+    return this.get(ConfigKeys.DeepgramApiKey, '')
+  }
+
+  setDeepgramApiKey(apiKey: string) {
+    this.setAndNotify(ConfigKeys.DeepgramApiKey, apiKey)
+  }
+
+  getASRDefaultLanguage(): string {
+    return this.get(ConfigKeys.ASRDefaultLanguage, 'en')
+  }
+
+  setASRDefaultLanguage(language: string) {
+    this.setAndNotify(ConfigKeys.ASRDefaultLanguage, language)
+  }
+
+  getASRModel(): string {
+    return this.get(ConfigKeys.ASRModel, 'nova-3')
+  }
+
+  setASRModel(model: string) {
+    this.setAndNotify(ConfigKeys.ASRModel, model)
   }
 }
 
diff --git a/src/main/services/__tests__/ASRSubtitleService.shouldBreakSentence.test.ts b/src/main/services/__tests__/ASRSubtitleService.shouldBreakSentence.test.ts
new file mode 100644
index 00000000..53d89af8
--- /dev/null
+++ b/src/main/services/__tests__/ASRSubtitleService.shouldBreakSentence.test.ts
@@ -0,0 +1,430 @@
+import type { DeepgramWord } from '@shared/types'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+// Mock logger using vi.hoisted to ensure it's available before imports
+const mockLogger = vi.hoisted(() => ({
+  info: vi.fn(),
+  debug: vi.fn(),
+  warn: vi.fn(),
+  error: vi.fn()
+}))
+
+vi.mock('../LoggerService', () => ({
+  loggerService: {
+    withContext: () => mockLogger
+  }
+}))
+
+// Mock Electron
+vi.mock('electron', () => ({
+  app: {
+    getPath: vi.fn(() => '/mock/path'),
+    getVersion: vi.fn(() => '1.0.0')
+  },
+  ipcMain: {
+    on: vi.fn(),
+    handle: vi.fn()
+  }
+}))
+
+// Mock electron-conf
+vi.mock('electron-conf/main', () => ({
+  Conf: vi.fn().mockImplementation(() => ({
+    get: vi.fn(),
+    set: vi.fn(),
+    has: vi.fn()
+  }))
+}))
+
+// Mock other dependencies
+vi.mock('../ConfigManager', () => ({
+  configManager: {
+    getDeepgramApiKey: vi.fn(() => 'mock-api-key'),
+    getASRDefaultLanguage: vi.fn(() => 'en'),
+    getASRModel: vi.fn(() => 'nova-2')
+  }
+}))
+
+vi.mock('../../db/dao', () => ({
+  db: {
+    subtitleLibrary: {
+      addSubtitle: vi.fn()
+    }
+  }
+}))
+
+// Mock AudioPreprocessor
+vi.mock('../audio/AudioPreprocessor', () => ({
+  default: vi.fn().mockImplementation(() => ({
+    createTempDir: vi.fn(() => '/mock/temp'),
+    extractAudioTrack: vi.fn(),
+    cleanupTempDir: vi.fn()
+  }))
+}))
+
+// Mock SubtitleFormatter
+vi.mock('../asr/SubtitleFormatter', () => ({
+  default: vi.fn().mockImplementation(() => ({
+    formatSubtitles: vi.fn(),
+    exportToSRT: vi.fn(),
+    exportToVTT: vi.fn()
+  }))
+}))
+
+// Mock DeepgramTranscriber
+vi.mock('../asr/DeepgramTranscriber', () => ({
+  default: vi.fn().mockImplementation(() => ({
+    transcribeFile: vi.fn(),
+    validateApiKey: vi.fn(),
+    cancelAll: vi.fn()
+  }))
+}))
+
+describe('ASRSubtitleService - shouldBreakSentence', () => {
+  let ASRSubtitleService: any
+  let service: any
+
+  beforeEach(async () => {
+    vi.clearAllMocks()
+
+    // Dynamically import the service
+    const module = await import('../ASRSubtitleService')
+    ASRSubtitleService = module.default
+    service = new ASRSubtitleService()
+  })
+
+  /**
+   * Helper function to access the private shouldBreakSentence method
+   */
+  const testShouldBreakSentence = (
+    currentWord: DeepgramWord,
+    nextWord: DeepgramWord | undefined,
+    sentenceStartTime: number,
+    isLastWord: boolean
+  ): boolean => {
+    // Access private method using bracket notation
+    return service['shouldBreakSentence'](currentWord, nextWord, sentenceStartTime, isLastWord)
+  }
+
+  /**
+   * Helper function to create a mock DeepgramWord
+   */
+  const createWord = (
+    word: string,
+    punctuated_word: string,
+    start: number,
+    end: number
+  ): DeepgramWord => ({
+    word,
+    punctuated_word,
+    start,
+    end,
+    confidence: 0.95
+  })
+
+  describe('Last word detection', () => {
+    it('should break at the last word', () => {
+      const word = createWord('world', 'world.', 1.0, 1.5)
+      const result = testShouldBreakSentence(word, undefined, 0, true)
+      expect(result).toBe(true)
+    })
+  })
+
+  describe('Sentence ending punctuation (no pause required)', () => {
+    it('should break on period with any pause', () => {
+      const currentWord = createWord('world', 'world.', 1.0, 1.5)
+      const nextWord = createWord('Hello', 'Hello', 2.0, 2.5) // 500ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+
+    it('should break on question mark with any pause', () => {
+      const currentWord = createWord('you', 'you?', 1.0, 1.5)
+      const nextWord = createWord('Yes', 'Yes', 2.0, 2.5) // 500ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+
+    it('should break on exclamation mark with any pause', () => {
+      const currentWord = createWord('amazing', 'amazing!', 1.0, 1.5)
+      const nextWord = createWord('Really', 'Really', 2.0, 2.5) // 500ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+
+    it('should break on period even with short pause (< 300ms)', () => {
+      const currentWord = createWord('world', 'world.', 1.0, 1.5)
+      const nextWord = createWord('Hello', 'Hello', 1.7, 2.2) // 200ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true) // Now breaks on punctuation alone
+    })
+
+    it('should break on Chinese period regardless of pause', () => {
+      const currentWord = createWord('世界', '世界。', 1.0, 1.5)
+      const nextWord = createWord('你好', '你好', 2.0, 2.5) // 500ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+  })
+
+  describe('Long pause detection', () => {
+    it('should break on pause > 800ms', () => {
+      const currentWord = createWord('hello', 'hello', 1.0, 1.5)
+      const nextWord = createWord('world', 'world', 2.5, 3.0) // 1000ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+
+    it('should NOT break on pause < 800ms', () => {
+      const currentWord = createWord('hello', 'hello', 1.0, 1.5)
+      const nextWord = createWord('world', 'world', 2.0, 2.5) // 500ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(false)
+    })
+  })
+
+  describe('Duration-based breaking (8-10 seconds)', () => {
+    it('should break at 8s+ duration with comma', () => {
+      const currentWord = createWord('word', 'word,', 8.5, 9.0)
+      const nextWord = createWord('next', 'next', 9.2, 9.7)
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+
+    it('should break at 8s+ duration with pause > 200ms', () => {
+      const currentWord = createWord('word', 'word', 8.5, 9.0)
+      const nextWord = createWord('next', 'next', 9.5, 10.0) // 500ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+
+    it('should NOT break at 8s+ duration without punctuation or pause', () => {
+      const currentWord = createWord('word', 'word', 8.5, 9.0)
+      const nextWord = createWord('next', 'next', 9.1, 9.6) // 100ms pause, no punctuation
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(false)
+    })
+
+    it('should force break at 10s+ duration regardless of punctuation', () => {
+      const currentWord = createWord('word', 'word', 10.5, 11.0)
+      const nextWord = createWord('next', 'next', 11.1, 11.6) // No punctuation, short pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+  })
+
+  describe('Edge cases', () => {
+    it('should handle word without punctuated_word field', () => {
+      const currentWord: DeepgramWord = {
+        word: 'hello',
+        start: 1.0,
+        end: 1.5,
+        confidence: 0.95
+      }
+      const nextWord = createWord('world', 'world', 2.0, 2.5)
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(false)
+    })
+
+    it('should handle zero pause duration', () => {
+      const currentWord = createWord('hello', 'hello', 1.0, 1.5)
+      const nextWord = createWord('world', 'world', 1.5, 2.0) // 0ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(false)
+    })
+
+    it('should handle very short sentence with period and pause', () => {
+      const currentWord = createWord('Hi', 'Hi.', 0.5, 1.0)
+      const nextWord = createWord('Bye', 'Bye', 1.5, 2.0) // 500ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 0, false)
+      expect(result).toBe(true)
+    })
+  })
+
+  describe('Real-world test cases', () => {
+    it('should break on sentence-ending punctuation even with minimal pause', () => {
+      // Real data from user: "tell." followed by "It's" with 0ms pause
+      // This should break because "tell." has sentence-ending punctuation
+      const currentWord = createWord('tell', 'tell.', 50.745, 51.225)
+      const nextWord = createWord("it's", "It's", 51.225, 51.385002) // 0ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 49.785, false)
+
+      // ✅ Fixed: Now breaks on sentence-ending punctuation
+      expect(result).toBe(true)
+    })
+
+    it('should break on sentence-ending punctuation with short pause (80ms)', () => {
+      // Real data: "with." followed by "Come" with 80ms pause
+      const currentWord = createWord('with', 'with.', 52.345, 52.825)
+      const nextWord = createWord('come', 'Come', 52.905, 53.465) // 80ms pause
+      const result = testShouldBreakSentence(currentWord, nextWord, 49.785, false)
+
+      // ✅ Fixed: Now breaks on sentence-ending punctuation
+      expect(result).toBe(true)
+    })
+
+    it('should handle complete real-world sentence sequence', () => {
+      // Test the complete sequence from user's example
+      const words: DeepgramWord[] = [
+        {
+          word: "there's",
+          start: 49.785,
+          end: 50.265,
+          confidence: 0.9863529,
+          punctuated_word: "There's"
+        },
+        {
+          word: 'nothing',
+          start: 50.265,
+          end: 50.505,
+          confidence: 0.99985075,
+          punctuated_word: 'nothing'
+        },
+        { word: 'to', start: 50.505, end: 50.745, confidence: 0.99971515, punctuated_word: 'to' },
+        {
+          word: 'tell',
+          start: 50.745,
+          end: 51.225,
+          confidence: 0.9826325,
+          punctuated_word: 'tell.'
+        },
+        {
+          word: "it's",
+          start: 51.225,
+          end: 51.385002,
+          confidence: 0.8772707,
+          punctuated_word: "It's"
+        },
+        {
+          word: 'just',
+          start: 51.385002,
+          end: 51.545,
+          confidence: 0.99974877,
+          punctuated_word: 'just'
+        },
+        {
+          word: 'some',
+          start: 51.545,
+          end: 51.705,
+          confidence: 0.99927837,
+          punctuated_word: 'some'
+        },
+        {
+          word: 'guy',
+          start: 51.705,
+          end: 51.864998,
+          confidence: 0.999765,
+          punctuated_word: 'guy'
+        },
+        { word: 'i', start: 51.864998, end: 52.105, confidence: 0.9979578, punctuated_word: 'I' },
+        {
+          word: 'work',
+          start: 52.105,
+          end: 52.345,
+          confidence: 0.98591065,
+          punctuated_word: 'work'
+        },
+        {
+          word: 'with',
+          start: 52.345,
+          end: 52.825,
+          confidence: 0.9990688,
+          punctuated_word: 'with.'
+        },
+        {
+          word: 'come',
+          start: 52.905,
+          end: 53.465,
+          confidence: 0.9908832,
+          punctuated_word: 'Come'
+        },
+        { word: 'on', start: 53.465, end: 54.025, confidence: 0.96900225, punctuated_word: 'on.' },
+        {
+          word: "you're",
+          start: 54.025,
+          end: 54.505,
+          confidence: 0.99452776,
+          punctuated_word: "You're"
+        },
+        {
+          word: 'going',
+          start: 54.505,
+          end: 54.745,
+          confidence: 0.9983157,
+          punctuated_word: 'going'
+        },
+        { word: 'out', start: 54.745, end: 54.905, confidence: 0.9927585, punctuated_word: 'out' },
+        {
+          word: 'with',
+          start: 54.905,
+          end: 55.065,
+          confidence: 0.9994931,
+          punctuated_word: 'with'
+        },
+        { word: 'the', start: 55.065, end: 55.145, confidence: 0.8043892, punctuated_word: 'the' },
+        {
+          word: 'guy',
+          start: 55.145,
+          end: 55.465,
+          confidence: 0.99105775,
+          punctuated_word: 'guy.'
+        },
+        {
+          word: "there's",
+          start: 55.465,
+          end: 55.625,
+          confidence: 0.9992779,
+          punctuated_word: "There's"
+        },
+        {
+          word: 'gotta',
+          start: 55.625,
+          end: 55.785,
+          confidence: 0.7105091,
+          punctuated_word: 'gotta'
+        },
+        { word: 'be', start: 55.785, end: 55.945, confidence: 0.9926218, punctuated_word: 'be' },
+        {
+          word: 'something',
+          start: 55.945,
+          end: 56.265,
+          confidence: 0.99703157,
+          punctuated_word: 'something'
+        },
+        {
+          word: 'wrong',
+          start: 56.265,
+          end: 56.505,
+          confidence: 0.99949515,
+          punctuated_word: 'wrong'
+        },
+        {
+          word: 'with',
+          start: 56.505,
+          end: 56.665,
+          confidence: 0.9996629,
+          punctuated_word: 'with'
+        },
+        { word: 'him', start: 56.665, end: 56.905, confidence: 0.9875486, punctuated_word: 'him.' }
+      ]
+
+      // Use the private method to group words into sentences
+      const sentences = service['groupWordsIntoSentences'](words)
+
+      // ✅ Fixed: Now correctly produces 5 sentences based on sentence-ending punctuation
+      // Expected sentences:
+      // 1. "There's nothing to tell."
+      // 2. "It's just some guy I work with."
+      // 3. "Come on."
+      // 4. "You're going out with the guy."
+      // 5. "There's gotta be something wrong with him."
+
+      expect(sentences.length).toBe(5)
+      expect(sentences[0].text).toBe("There's nothing to tell.")
+      expect(sentences[1].text).toBe("It's just some guy I work with.")
+      expect(sentences[2].text).toBe('Come on.')
+      expect(sentences[3].text).toBe("You're going out with the guy.")
+      expect(sentences[4].text).toBe("There's gotta be something wrong with him.")
+    })
+  })
+})
diff --git a/src/main/services/__tests__/AudioPreprocessor.test.ts b/src/main/services/__tests__/AudioPreprocessor.test.ts
new file mode 100644
index 00000000..3aedc170
--- /dev/null
+++ b/src/main/services/__tests__/AudioPreprocessor.test.ts
@@ -0,0 +1,162 @@
+import { beforeEach, describe, expect, it, vi } from 'vitest'
+
+// 取消全局 fs mock，因为我们需要真实的文件系统操作
+vi.unmock('node:fs')
+vi.unmock('node:fs/promises')
+
+import AudioPreprocessor, { parseFFmpegDuration } from '../audio/AudioPreprocessor'
+
+describe('parseFFmpegDuration', () => {
+  it('should parse duration with two-digit centiseconds', () => {
+    const mockOutput = 'Duration: 01:23:45.67'
+    const result = parseFFmpegDuration(mockOutput)
+    expect(result).toBe(1 * 3600 + 23 * 60 + 45 + 67 / 100) // 5025.67
+  })
+
+  it('should parse duration with single-digit milliseconds', () => {
+    const mockOutput = 'Duration: 02:34:56.7'
+    const result = parseFFmpegDuration(mockOutput)
+    expect(result).toBe(2 * 3600 + 34 * 60 + 56 + 7 / 10) // 9296.7
+  })
+
+  it('should parse duration with three-digit milliseconds', () => {
+    const mockOutput = 'Duration: 00:12:34.567'
+    const result = parseFFmpegDuration(mockOutput)
+    expect(result).toBe(0 * 3600 + 12 * 60 + 34 + 567 / 1000) // 754.567
+  })
+
+  it('should parse duration without fractional part', () => {
+    const mockOutput = 'Duration: 03:45:00'
+    const result = parseFFmpegDuration(mockOutput)
+    expect(result).toBe(3 * 3600 + 45 * 60 + 0) // 13500
+  })
+
+  it('should handle edge cases with zero values', () => {
+    const mockOutput = 'Duration: 00:00:00.00'
+    const result = parseFFmpegDuration(mockOutput)
+    expect(result).toBe(0)
+  })
+
+  it('should handle malformed duration gracefully', () => {
+    const mockOutput = 'Duration: XX:YY:ZZ.invalid'
+    const result = parseFFmpegDuration(mockOutput)
+    expect(result).toBeNull()
+  })
+
+  it('should extract duration from complex FFmpeg output', () => {
+    const complexOutput = `
+      Input #0, mp3, from 'test.mp3':
+        Metadata:
+          title           : Test Audio
+          artist          : Test Artist
+        Duration: 00:01:23.456, bitrate: 128 kb/s
+        Stream #0:0: Audio: mp3, 44100 Hz, stereo, fltp, 128 kb/s
+    `
+    const result = parseFFmpegDuration(complexOutput)
+    expect(result).toBe(0 * 3600 + 1 * 60 + 23 + 456 / 1000) // 83.456
+  })
+
+  it('should handle maximum valid hour values', () => {
+    const mockOutput = 'Duration: 99:59:59.999'
+    const result = parseFFmpegDuration(mockOutput)
+    expect(result).toBe(99 * 3600 + 59 * 60 + 59 + 999 / 1000) // 359999.999
+  })
+})
+
+describe('AudioPreprocessor', () => {
+  let audioPreprocessor: AudioPreprocessor
+
+  beforeEach(() => {
+    audioPreprocessor = new AudioPreprocessor()
+  })
+
+  describe('buildFFmpegArgs', () => {
+    // 由于 buildFFmpegArgs 是私有方法，我们通过测试 extractAudioTrack 的行为来间接测试它
+    // 这里我们主要关注格式参数是否正确传递和处理
+
+    it('should handle MP3 format correctly', async () => {
+      // 由于这是一个集成测试，需要真实的文件和 FFmpeg，我们主要测试逻辑是否正确
+      // 创建一个临时目录用于测试
+      const tempDir = audioPreprocessor.createTempDir('test-')
+
+      try {
+        // 验证 options 参数能够正确传递 format
+        // 由于 buildFFmpegArgs 是私有的，我们通过反射来测试它
+        const buildFFmpegArgsMethod = (audioPreprocessor as any).buildFFmpegArgs
+
+        if (buildFFmpegArgsMethod) {
+          // 测试 MP3 格式
+          const mp3Args = buildFFmpegArgsMethod.call(
+            audioPreprocessor,
+            '/test/input.mp4',
+            '/test/output.mp3',
+            16000,
+            1,
+            'mp3'
+          )
+
+          // 验证 MP3 格式使用了正确的编解码器
+          expect(mp3Args).toContain('libmp3lame')
+          expect(mp3Args).toContain('-b:a')
+          expect(mp3Args).toContain('128k')
+          expect(mp3Args).not.toContain('pcm_s16le')
+        }
+      } finally {
+        // 清理临时目录
+        await audioPreprocessor.cleanupTempDir(tempDir)
+      }
+    })
+
+    it('should handle WAV format correctly', async () => {
+      const tempDir = audioPreprocessor.createTempDir('test-')
+
+      try {
+        const buildFFmpegArgsMethod = (audioPreprocessor as any).buildFFmpegArgs
+
+        if (buildFFmpegArgsMethod) {
+          // 测试 WAV 格式
+          const wavArgs = buildFFmpegArgsMethod.call(
+            audioPreprocessor,
+            '/test/input.mp4',
+            '/test/output.wav',
+            16000,
+            1,
+            'wav'
+          )
+
+          // 验证 WAV 格式使用了正确的编解码器
+          expect(wavArgs).toContain('pcm_s16le')
+          expect(wavArgs).not.toContain('libmp3lame')
+          expect(wavArgs).not.toContain('-b:a')
+        }
+      } finally {
+        await audioPreprocessor.cleanupTempDir(tempDir)
+      }
+    })
+
+    it('should use WAV as default format', async () => {
+      const tempDir = audioPreprocessor.createTempDir('test-')
+
+      try {
+        const buildFFmpegArgsMethod = (audioPreprocessor as any).buildFFmpegArgs
+
+        if (buildFFmpegArgsMethod) {
+          // 测试默认格式（不传递 format 参数）
+          const defaultArgs = buildFFmpegArgsMethod.call(
+            audioPreprocessor,
+            '/test/input.mp4',
+            '/test/output.wav',
+            16000,
+            1
+          )
+
+          // 验证默认使用 PCM 编解码器
+          expect(defaultArgs).toContain('pcm_s16le')
+          expect(defaultArgs).not.toContain('libmp3lame')
+        }
+      } finally {
+        await audioPreprocessor.cleanupTempDir(tempDir)
+      }
+    })
+  })
+})
diff --git a/src/main/services/asr/DeepgramTranscriber.ts b/src/main/services/asr/DeepgramTranscriber.ts
new file mode 100644
index 00000000..aaf819f2
--- /dev/null
+++ b/src/main/services/asr/DeepgramTranscriber.ts
@@ -0,0 +1,516 @@
+/**
+ * Deepgram 转写服务
+ * 负责调用 Deepgram API 进行语音转文本
+ */
+
+import type { AudioSegment, DeepgramResponse, TranscriptSegment } from '@shared/types'
+import * as fs from 'fs'
+import { promises as fsPromises } from 'fs'
+import type { ClientRequest } from 'http'
+import https from 'https'
+import PQueue from 'p-queue'
+import * as path from 'path'
+
+import { loggerService } from '../LoggerService'
+
+const logger = loggerService.withContext('DeepgramTranscriber')
+
+export interface DeepgramOptions {
+  /** API Key */
+  apiKey: string
+  /** 模型选择 */
+  model?: 'nova-2' | 'nova-3'
+  /** 语言 */
+  language?: string
+  /** 是否启用智能格式化 */
+  smartFormat?: boolean
+  /** 是否启用句段检测 */
+  utterances?: boolean
+  /** 句段结束静音时长（毫秒） */
+  utteranceEndMs?: number
+  /** 提示文本（前文上下文） */
+  prompt?: string
+}
+
+export interface TranscriptionProgress {
+  /** 已完成的段数 */
+  completed: number
+  /** 总段数 */
+  total: number
+  /** 当前段索引 */
+  current: number
+}
+
+class DeepgramTranscriber {
+  private queue: PQueue
+  private activeRequests: Set<ClientRequest> = new Set()
+  private abortController: AbortController = new AbortController()
+  private currentRequestAbortController: AbortController | null = null
+
+  constructor(concurrency: number = 3) {
+    this.queue = new PQueue({ concurrency })
+    logger.info('Deepgram 转写器初始化', { concurrency })
+  }
+
+  /**
+   * 重置取消状态（用于开始新的转写任务）
+   */
+  private resetCancellationState(): void {
+    if (this.abortController.signal.aborted) {
+      this.abortController = new AbortController()
+    }
+    this.currentRequestAbortController = null
+  }
+
+  /**
+   * 批量转写多个音频段
+   */
+  public async transcribeSegments(
+    segments: AudioSegment[],
+    options: DeepgramOptions,
+    onProgress?: (progress: TranscriptionProgress) => void
+  ): Promise<TranscriptSegment[]> {
+    logger.info('开始批量转写', { segmentCount: segments.length })
+
+    // 重置取消状态，准备新的转写任务
+    this.resetCancellationState()
+
+    const results: TranscriptSegment[] = []
+    let completed = 0
+
+    // 上一段的末尾文本（用作提示）
+    let previousTranscript = ''
+
+    for (let i = 0; i < segments.length; i++) {
+      const segment = segments[i]
+
+      // 添加到队列
+      const promise = this.queue.add(async () => {
+        try {
+          // 携带上一段末尾文本作为提示
+          const promptText =
+            previousTranscript.length > 200 ? previousTranscript.slice(-200) : previousTranscript
+
+          const response = await this.transcribeSingleSegment(segment.filePath, {
+            ...options,
+            prompt: promptText
+          })
+
+          completed++
+          onProgress?.({ completed, total: segments.length, current: i })
+
+          // 更新上一段文本
+          if (response.results.channels[0]?.alternatives[0]?.transcript) {
+            const fullTranscript = response.results.channels[0].alternatives[0].transcript
+            previousTranscript = fullTranscript
+          }
+
+          return {
+            audioSegment: segment,
+            response,
+            success: true
+          } as TranscriptSegment
+        } catch (error) {
+          completed++
+          onProgress?.({ completed, total: segments.length, current: i })
+
+          logger.error('段转写失败', {
+            index: segment.index,
+            error: error instanceof Error ? error.message : String(error)
+          })
+
+          return {
+            audioSegment: segment,
+            success: false,
+            error: error instanceof Error ? error.message : String(error)
+          } as TranscriptSegment
+        }
+      })
+
+      if (promise) {
+        results.push((await promise) as TranscriptSegment)
+      }
+    }
+
+    // 等待所有任务完成
+    await this.queue.onIdle()
+
+    const successCount = results.filter((r) => r.success).length
+    logger.info('批量转写完成', {
+      total: results.length,
+      success: successCount,
+      failed: results.length - successCount
+    })
+
+    return results
+  }
+
+  /**
+   * 转写单个音频段
+   */
+  private async transcribeSingleSegment(
+    audioPath: string,
+    options: DeepgramOptions,
+    retries: number = 3
+  ): Promise<DeepgramResponse> {
+    const {
+      apiKey,
+      model = 'nova-3',
+      language = 'en',
+      smartFormat = true,
+      utterances = true,
+      utteranceEndMs = 1000,
+      prompt
+    } = options
+
+    let lastError: Error | null = null
+
+    for (let attempt = 0; attempt < retries; attempt++) {
+      try {
+        // 在重试前检查是否被取消
+        if (this.abortController.signal.aborted) {
+          logger.debug('检测到请求被取消，停止重试')
+          throw new Error('REQUEST_CANCELLED')
+        }
+
+        if (attempt > 0) {
+          // 指数退避
+          const delay = Math.pow(2, attempt) * 1000
+          logger.debug('重试前等待', { attempt, delay })
+          await new Promise((resolve) => setTimeout(resolve, delay))
+
+          // 等待后再次检查取消状态
+          if (this.abortController.signal.aborted) {
+            logger.debug('等待期间检测到取消，停止重试')
+            throw new Error('REQUEST_CANCELLED')
+          }
+        }
+
+        logger.debug('调用 Deepgram API', { audioPath, model, language, attempt })
+
+        const response = await this.callDeepgramAPI(audioPath, {
+          apiKey,
+          model,
+          language,
+          smartFormat,
+          utterances,
+          utteranceEndMs,
+          prompt
+        })
+
+        logger.debug('Deepgram API 调用成功', { audioPath })
+        return response
+      } catch (error) {
+        lastError = error instanceof Error ? error : new Error(String(error))
+
+        // 如果是请求取消错误，直接抛出，不重试
+        if (
+          lastError.message === 'REQUEST_CANCELLED' ||
+          lastError.message.includes('socket hang up') ||
+          lastError.message.includes('请求被中断') ||
+          lastError.message.includes('socket was destroyed')
+        ) {
+          logger.info('用户取消了 ASR 任务，停止处理', { error: lastError.message })
+          throw new Error('REQUEST_CANCELLED')
+        }
+
+        logger.warn('Deepgram API 调用失败', {
+          attempt: attempt + 1,
+          maxRetries: retries,
+          error: lastError.message
+        })
+      }
+    }
+
+    throw lastError || new Error('Deepgram API 调用失败')
+  }
+
+  /**
+   * 调用 Deepgram API
+   */
+  private async callDeepgramAPI(
+    audioPath: string,
+    options: DeepgramOptions
+  ): Promise<DeepgramResponse> {
+    // 构建查询参数
+    const queryParams = new URLSearchParams({
+      model: options.model || 'nova-3',
+      smart_format: String(options.smartFormat !== false),
+      punctuate: 'true',
+      utterances: String(options.utterances !== false),
+      utterance_end_ms: String(options.utteranceEndMs || 1000)
+    })
+
+    // 处理语言参数：如果是 'auto'，使用 detect_language；否则使用 language
+    if (options.language === 'auto') {
+      queryParams.append('detect_language', 'true')
+    } else if (options.language) {
+      queryParams.append('language', options.language)
+    }
+
+    const url = `https://api.deepgram.com/v1/listen?${queryParams.toString()}`
+
+    // 获取音频文件的 MIME 类型
+    const ext = path.extname(audioPath).toLowerCase()
+    const mimeTypes: Record<string, string> = {
+      '.wav': 'audio/wav',
+      '.mp3': 'audio/mpeg',
+      '.m4a': 'audio/mp4',
+      '.flac': 'audio/flac',
+      '.ogg': 'audio/ogg',
+      '.opus': 'audio/opus',
+      '.webm': 'audio/webm'
+    }
+    const contentType = mimeTypes[ext] || 'audio/wav'
+
+    // 获取文件大小（用于 Content-Length）
+    const stats = await fsPromises.stat(audioPath)
+    const fileSize = stats.size
+
+    // 创建读取流
+    const readStream = fs.createReadStream(audioPath)
+
+    return new Promise<DeepgramResponse>((resolve, reject) => {
+      // 发送请求
+      const req = https.request(
+        url,
+        {
+          method: 'POST',
+          headers: {
+            'Content-Type': contentType,
+            'Content-Length': fileSize,
+            Authorization: `Token ${options.apiKey}`
+          }
+        },
+        (res) => {
+          let responseData = ''
+
+          res.on('data', (chunk) => {
+            responseData += chunk.toString()
+          })
+
+          res.on('end', () => {
+            // 请求完成后从活动请求列表中移除
+            this.activeRequests.delete(req)
+
+            if (res.statusCode === 200) {
+              try {
+                const parsed = JSON.parse(responseData) as DeepgramResponse
+                resolve(parsed)
+              } catch (error) {
+                reject(new Error(`解析 Deepgram 响应失败: ${error}`))
+              }
+            } else if (res.statusCode === 401) {
+              reject(new Error('API Key 无效'))
+            } else if (res.statusCode === 402) {
+              reject(new Error('API 配额不足'))
+            } else if (res.statusCode === 429) {
+              reject(new Error('API 调用频率超限'))
+            } else {
+              reject(new Error(`Deepgram API 错误 (${res.statusCode}): ${responseData}`))
+            }
+          })
+        }
+      )
+
+      // 将请求添加到活动请求列表
+      this.activeRequests.add(req)
+
+      // 请求错误处理
+      req.on('error', (error) => {
+        this.activeRequests.delete(req)
+        readStream.destroy()
+        reject(new Error(`网络错误: ${error.message}`))
+      })
+
+      // 设置超时（10分钟，符合 Deepgram 文档的最大处理时间）
+      req.setTimeout(10 * 60 * 1000, () => {
+        this.activeRequests.delete(req)
+        readStream.destroy()
+        req.destroy()
+        reject(new Error('请求超时（超过10分钟）'))
+      })
+
+      // 读取流错误处理
+      readStream.on('error', (error) => {
+        this.activeRequests.delete(req)
+        req.destroy()
+        reject(new Error(`读取音频文件失败: ${error.message}`))
+      })
+
+      // 将读取流管道连接到请求
+      readStream.pipe(req)
+    })
+  }
+
+  /**
+   * 转写完整音频文件
+   */
+  public async transcribeFile(
+    audioPath: string,
+    options: DeepgramOptions
+  ): Promise<DeepgramResponse> {
+    logger.info('开始转写完整音频文件', { audioPath })
+
+    try {
+      const response = await this.transcribeSingleSegment(audioPath, options)
+      logger.info('音频文件转写成功', { audioPath })
+      return response
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error)
+
+      // 如果是用户取消，使用 info 级别日志
+      if (errorMessage === 'REQUEST_CANCELLED') {
+        logger.info('用户取消了音频文件转写', { audioPath })
+      } else {
+        logger.error('音频文件转写失败', {
+          audioPath,
+          error: errorMessage
+        })
+      }
+
+      throw error
+    }
+  }
+
+  /**
+   * 验证 API Key
+   */
+  public async validateApiKey(apiKey: string): Promise<{ valid: boolean; error?: string }> {
+    try {
+      logger.info('验证 Deepgram API Key')
+
+      if (!apiKey || apiKey.length < 10) {
+        return { valid: false, error: 'API Key 格式无效' }
+      }
+
+      // 调用 Deepgram 官方验证端点
+      const result = await this.makeValidationRequest(apiKey)
+      return result
+    } catch (error) {
+      return {
+        valid: false,
+        error: error instanceof Error ? error.message : 'API Key 验证失败'
+      }
+    }
+  }
+
+  /**
+   * 发送验证请求到 Deepgram API
+   */
+  private makeValidationRequest(apiKey: string): Promise<{ valid: boolean; error?: string }> {
+    return new Promise((resolve) => {
+      const requestOptions = {
+        hostname: 'api.deepgram.com',
+        port: 443,
+        path: '/v1/auth/token',
+        method: 'GET',
+        headers: {
+          Authorization: `Token ${apiKey}`,
+          'Content-Type': 'application/json'
+        },
+        timeout: 8000 // 8秒超时
+      }
+
+      const req = https.request(requestOptions, (res) => {
+        let responseBody = ''
+
+        res.on('data', (chunk) => {
+          responseBody += chunk
+        })
+
+        res.on('end', () => {
+          // 根据状态码返回相应结果
+          if (res.statusCode === 200) {
+            logger.info('API Key 验证成功')
+            resolve({ valid: true })
+          } else if (res.statusCode === 401) {
+            try {
+              const errorData = JSON.parse(responseBody)
+              const errorCode = errorData.err_code || 'UNKNOWN'
+
+              if (errorCode === 'INVALID_AUTH') {
+                resolve({ valid: false, error: 'API Key 无效' })
+              } else if (errorCode === 'INSUFFICIENT_PERMISSIONS') {
+                resolve({ valid: false, error: 'API Key 权限不足' })
+              } else {
+                resolve({ valid: false, error: 'API Key 认证失败' })
+              }
+            } catch {
+              resolve({ valid: false, error: 'API Key 认证失败' })
+            }
+          } else if (res.statusCode === 403) {
+            resolve({ valid: false, error: 'API Key 权限不足或访问被拒绝' })
+          } else {
+            logger.warn('API Key 验证收到意外状态码', {
+              statusCode: res.statusCode,
+              body: responseBody
+            })
+            resolve({
+              valid: false,
+              error: `验证失败 (HTTP ${res.statusCode})`
+            })
+          }
+        })
+      })
+
+      req.on('error', (error) => {
+        logger.error('API Key 验证请求失败', { error: error.message })
+        resolve({
+          valid: false,
+          error: '网络连接失败，请检查网络设置'
+        })
+      })
+
+      req.on('timeout', () => {
+        req.destroy()
+        logger.error('API Key 验证请求超时')
+        resolve({
+          valid: false,
+          error: '验证请求超时，请稍后重试'
+        })
+      })
+
+      req.end()
+    })
+  }
+
+  /**
+   * 取消所有待处理的任务
+   */
+  public async cancelAll(): Promise<void> {
+    // 设置取消标志
+    this.abortController.abort()
+
+    // 如果有当前请求的控制器，也取消它
+    if (this.currentRequestAbortController) {
+      this.currentRequestAbortController.abort()
+    }
+
+    // 清空队列，防止新任务开始
+    this.queue.clear()
+
+    // 中断所有正在进行的 HTTP 请求
+    const requestCount = this.activeRequests.size
+    for (const req of this.activeRequests) {
+      try {
+        req.destroy()
+      } catch (error) {
+        logger.warn('中断请求失败', {
+          error: error instanceof Error ? error.message : String(error)
+        })
+      }
+    }
+    this.activeRequests.clear()
+
+    // 等待队列空闲
+    await this.queue.onIdle()
+
+    logger.info('已取消所有转写任务', {
+      cancelledRequests: requestCount,
+      queuedTasks: this.queue.size
+    })
+  }
+}
+
+export default DeepgramTranscriber
diff --git a/src/main/services/asr/SubtitleFormatter.ts b/src/main/services/asr/SubtitleFormatter.ts
new file mode 100644
index 00000000..d2d03f49
--- /dev/null
+++ b/src/main/services/asr/SubtitleFormatter.ts
@@ -0,0 +1,264 @@
+/**
+ * 字幕格式化服务
+ * 负责将字幕数据格式化并导出为 SRT/VTT 文件
+ */
+
+import type { ASRSubtitleItem } from '@shared/types'
+import * as fs from 'fs'
+import { convert } from 'subsrt-ts'
+
+import { loggerService } from '../LoggerService'
+
+const logger = loggerService.withContext('SubtitleFormatter')
+
+export interface FormatOptions {
+  /** 单条字幕最大时长（秒） */
+  maxDuration?: number
+  /** 单行最大字符数 */
+  maxCharsPerLine?: number
+}
+
+class SubtitleFormatter {
+  /**
+   * 格式化字幕
+   * 确保符合时长和字符数限制
+   */
+  public formatSubtitles(items: ASRSubtitleItem[], options: FormatOptions = {}): ASRSubtitleItem[] {
+    const { maxDuration = 8, maxCharsPerLine = 42 } = options
+
+    logger.info('开始格式化字幕', {
+      itemCount: items.length,
+      maxDuration,
+      maxCharsPerLine
+    })
+
+    const formatted: ASRSubtitleItem[] = []
+
+    for (const item of items) {
+      // 检查是否需要拆分
+      const duration = item.endTime - item.startTime
+      const text = item.text
+
+      if (duration <= maxDuration && text.length <= maxCharsPerLine) {
+        // 不需要拆分
+        formatted.push({
+          ...item,
+          text,
+          index: formatted.length
+        })
+      } else {
+        // 需要拆分
+        const split = this.splitSubtitle(item, maxDuration, maxCharsPerLine)
+        formatted.push(...split.map((s, i) => ({ ...s, index: formatted.length + i })))
+      }
+    }
+
+    logger.info('字幕格式化完成', { outputCount: formatted.length })
+
+    return formatted
+  }
+
+  /**
+   * 拆分过长的字幕
+   */
+  private splitSubtitle(
+    item: ASRSubtitleItem,
+    _maxDuration: number,
+    maxCharsPerLine: number
+  ): ASRSubtitleItem[] {
+    const result: ASRSubtitleItem[] = []
+
+    // 如果有单词级时间戳，使用精确拆分
+    if (item.words && item.words.length > 0) {
+      return this.splitSubtitleWithWordTimestamps(item, maxCharsPerLine)
+    }
+
+    // 降级：按文本估算拆分
+    const words = item.text.split(/\s+/)
+    const duration = item.endTime - item.startTime
+    const avgTimePerChar = duration / item.text.length
+
+    let currentWords: string[] = []
+    let currentChars = 0
+    let segmentStart = item.startTime
+
+    for (const word of words) {
+      const wordLength = word.length + 1 // +1 for space
+
+      if (currentChars + wordLength > maxCharsPerLine && currentWords.length > 0) {
+        // 创建一个字幕段
+        const text = currentWords.join(' ')
+        const estimatedDuration = text.length * avgTimePerChar
+        const segmentEnd = Math.min(segmentStart + estimatedDuration, item.endTime)
+
+        result.push({
+          index: 0, // 稍后重新索引
+          startTime: segmentStart,
+          endTime: segmentEnd,
+          text
+        })
+
+        // 重置
+        currentWords = [word]
+        currentChars = wordLength
+        segmentStart = segmentEnd
+      } else {
+        currentWords.push(word)
+        currentChars += wordLength
+      }
+    }
+
+    // 处理剩余的词
+    if (currentWords.length > 0) {
+      const text = currentWords.join(' ')
+      result.push({
+        index: 0,
+        startTime: segmentStart,
+        endTime: item.endTime,
+        text
+      })
+    }
+
+    return result
+  }
+
+  /**
+   * 使用单词级时间戳精确拆分字幕
+   */
+  private splitSubtitleWithWordTimestamps(
+    item: ASRSubtitleItem,
+    maxCharsPerLine: number
+  ): ASRSubtitleItem[] {
+    const result: ASRSubtitleItem[] = []
+    const words = item.words!
+
+    let currentWords: typeof words = []
+    let currentChars = 0
+
+    for (let i = 0; i < words.length; i++) {
+      const word = words[i]
+      const wordText = word.punctuated_word || word.word
+      const wordLength = wordText.length + 1 // +1 for space
+
+      if (currentChars + wordLength > maxCharsPerLine && currentWords.length > 0) {
+        // 创建一个字幕段（使用精确的单词时间戳）
+        const text = currentWords.map((w) => w.punctuated_word || w.word).join(' ')
+        const segmentStart = currentWords[0].start
+        const segmentEnd = currentWords[currentWords.length - 1].end
+
+        result.push({
+          index: 0, // 稍后重新索引
+          startTime: segmentStart,
+          endTime: segmentEnd,
+          text,
+          words: currentWords
+        })
+
+        // 重置
+        currentWords = [word]
+        currentChars = wordLength
+      } else {
+        currentWords.push(word)
+        currentChars += wordLength
+      }
+    }
+
+    // 处理剩余的词
+    if (currentWords.length > 0) {
+      const text = currentWords.map((w) => w.punctuated_word || w.word).join(' ')
+      const segmentStart = currentWords[0].start
+      const segmentEnd = currentWords[currentWords.length - 1].end
+
+      result.push({
+        index: 0,
+        startTime: segmentStart,
+        endTime: segmentEnd,
+        text,
+        words: currentWords
+      })
+    }
+
+    logger.debug('使用单词级时间戳拆分字幕', {
+      originalLength: item.text.length,
+      segmentCount: result.length
+    })
+
+    return result
+  }
+
+  /**
+   * 导出为 SRT 格式
+   */
+  public async exportToSRT(items: ASRSubtitleItem[], outputPath: string): Promise<void> {
+    logger.info('导出 SRT 文件', { outputPath, itemCount: items.length })
+
+    try {
+      // 生成 SRT 内容
+      let srtContent = ''
+
+      for (const item of items) {
+        srtContent += `${item.index + 1}\n`
+        srtContent += `${this.formatTime(item.startTime)} --> ${this.formatTime(item.endTime)}\n`
+        srtContent += `${item.text}\n\n`
+      }
+
+      // 写入文件
+      await fs.promises.writeFile(outputPath, srtContent, 'utf-8')
+
+      logger.info('SRT 文件导出成功', { outputPath })
+    } catch (error) {
+      logger.error('SRT 文件导出失败', {
+        error: error instanceof Error ? error.message : String(error)
+      })
+      throw error
+    }
+  }
+
+  /**
+   * 导出为 VTT 格式
+   */
+  public async exportToVTT(items: ASRSubtitleItem[], outputPath: string): Promise<void> {
+    logger.info('导出 VTT 文件', { outputPath, itemCount: items.length })
+
+    try {
+      // 先生成 SRT 格式
+      const srtItems = items.map((item) => ({
+        index: item.index + 1,
+        start: this.formatTime(item.startTime),
+        end: this.formatTime(item.endTime),
+        text: item.text
+      }))
+
+      const srtContent = srtItems
+        .map((item) => `${item.index}\n${item.start} --> ${item.end}\n${item.text}\n`)
+        .join('\n')
+
+      // 转换为 VTT
+      const vttContent = convert(srtContent, { from: 'srt', to: 'vtt' })
+
+      // 写入文件
+      await fs.promises.writeFile(outputPath, vttContent, 'utf-8')
+
+      logger.info('VTT 文件导出成功', { outputPath })
+    } catch (error) {
+      logger.error('VTT 文件导出失败', {
+        error: error instanceof Error ? error.message : String(error)
+      })
+      throw error
+    }
+  }
+
+  /**
+   * 格式化时间为 SRT 格式 (HH:MM:SS,mmm)
+   */
+  private formatTime(seconds: number): string {
+    const hours = Math.floor(seconds / 3600)
+    const minutes = Math.floor((seconds % 3600) / 60)
+    const secs = Math.floor(seconds % 60)
+    const millis = Math.floor((seconds % 1) * 1000)
+
+    return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(secs).padStart(2, '0')},${String(millis).padStart(3, '0')}`
+  }
+}
+
+export default SubtitleFormatter
diff --git a/src/main/services/asr/__tests__/DeepgramTranscriber.callDeepgramAPI.test.ts b/src/main/services/asr/__tests__/DeepgramTranscriber.callDeepgramAPI.test.ts
new file mode 100644
index 00000000..0f8933e3
--- /dev/null
+++ b/src/main/services/asr/__tests__/DeepgramTranscriber.callDeepgramAPI.test.ts
@@ -0,0 +1,313 @@
+import type { DeepgramResponse } from '@shared/types'
+import https from 'https'
+import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'
+
+type SpyInstance = any
+
+const mockLogger = {
+  info: vi.fn(),
+  debug: vi.fn(),
+  warn: vi.fn(),
+  error: vi.fn()
+}
+
+vi.mock('../../LoggerService', () => ({
+  loggerService: {
+    withContext: () => mockLogger
+  }
+}))
+
+const statMock = vi.fn()
+const createReadStreamMock = vi.fn()
+
+vi.mock('fs', () => {
+  const mockModule = {
+    createReadStream: createReadStreamMock,
+    promises: {
+      stat: statMock
+    }
+  }
+
+  return {
+    __esModule: true,
+    default: mockModule,
+    ...mockModule
+  }
+})
+
+type Handler = (...args: any[]) => void
+type HandlerMap = Record<string, Handler[]>
+
+const createFakeReadStream = () => {
+  const handlers: HandlerMap = {}
+  const stream = {
+    pipe: vi.fn(),
+    destroy: vi.fn(),
+    on: vi.fn((event: string, handler: Handler) => {
+      ;(handlers[event] ||= []).push(handler)
+      return stream
+    })
+  }
+
+  return { stream, handlers }
+}
+
+const createFakeRequest = () => {
+  const handlers: HandlerMap = {}
+  const req = {
+    on: vi.fn((event: string, handler: Handler) => {
+      ;(handlers[event] ||= []).push(handler)
+      return req
+    }),
+    setTimeout: vi.fn((_: number, handler: Handler) => {
+      ;(handlers.timeout ||= []).push(handler)
+      return req
+    }),
+    destroy: vi.fn(),
+    end: vi.fn()
+  }
+
+  return { req, handlers }
+}
+
+const emitResponse = (
+  response: { statusCode: number },
+  handlers: HandlerMap,
+  statusCode: number,
+  bodyChunks: Array<string | Buffer> = []
+) => {
+  response.statusCode = statusCode
+
+  process.nextTick(() => {
+    for (const chunk of bodyChunks) {
+      handlers.data?.forEach((handler) => handler(chunk))
+    }
+
+    handlers.end?.forEach((handler) => handler())
+  })
+}
+
+type DeepgramTranscriberClass = typeof import('../DeepgramTranscriber').default
+let DeepgramTranscriber: DeepgramTranscriberClass
+
+beforeAll(async () => {
+  DeepgramTranscriber = (await import('../DeepgramTranscriber')).default
+})
+
+describe('DeepgramTranscriber.callDeepgramAPI', () => {
+  let transcriber: InstanceType<DeepgramTranscriberClass>
+  let requestSpy: SpyInstance
+
+  beforeEach(() => {
+    createReadStreamMock.mockReset()
+    transcriber = new DeepgramTranscriber(1)
+
+    statMock.mockResolvedValue({ size: 1024 } as any)
+    requestSpy = vi.spyOn(https, 'request')
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+    statMock.mockReset()
+    createReadStreamMock.mockReset()
+  })
+
+  it('resolves with parsed response when Deepgram returns 200', async () => {
+    const audioPath = '/tmp/audio.m4a'
+    const callOptions = {
+      apiKey: 'test-key',
+      model: 'nova-2' as const,
+      language: 'en',
+      smartFormat: true,
+      utterances: true,
+      utteranceEndMs: 750
+    }
+
+    const fakeResponse: DeepgramResponse = {
+      results: {
+        channels: [
+          {
+            alternatives: [
+              {
+                transcript: 'hello world',
+                confidence: 0.95,
+                words: []
+              }
+            ],
+            utterances: []
+          }
+        ]
+      },
+      metadata: {
+        request_id: 'req-123',
+        duration: 1.23,
+        channels: 1
+      }
+    }
+
+    const readStream = createFakeReadStream()
+    createReadStreamMock.mockReturnValue(readStream.stream as any)
+
+    let currentRequest: ReturnType<typeof createFakeRequest> | undefined
+
+    requestSpy.mockImplementation((url: string, options: any, callback: any) => {
+      const searchParams = new URL(url).searchParams
+      expect(searchParams.get('model')).toBe('nova-2')
+      expect(searchParams.get('language')).toBe('en')
+      expect(searchParams.get('detect_language')).toBeNull()
+      expect(searchParams.get('smart_format')).toBe('true')
+      expect(searchParams.get('utterances')).toBe('true')
+      expect(searchParams.get('utterance_end_ms')).toBe('750')
+
+      expect(options.method).toBe('POST')
+      expect(options.headers['Content-Type']).toBe('audio/mp4')
+      expect(options.headers['Content-Length']).toBe(1024)
+      expect(options.headers.Authorization).toBe(`Token ${callOptions.apiKey}`)
+
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 200,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      currentRequest = createFakeRequest()
+      readStream.stream.pipe.mockReturnValue(currentRequest.req as any)
+
+      emitResponse(response, responseHandlers, 200, [Buffer.from(JSON.stringify(fakeResponse))])
+
+      return currentRequest.req as any
+    })
+
+    const result = await (transcriber as any).callDeepgramAPI(audioPath, callOptions)
+
+    expect(result).toEqual(fakeResponse)
+    expect(statMock).toHaveBeenCalledWith(audioPath)
+    expect(readStream.stream.pipe).toHaveBeenCalledWith(currentRequest?.req)
+  })
+
+  it('rejects with specific error when Deepgram returns 401', async () => {
+    const audioPath = '/tmp/audio.wav'
+    const callOptions = {
+      apiKey: 'test-key',
+      model: 'nova-3' as const,
+      language: 'auto' as const,
+      smartFormat: false,
+      utterances: false,
+      utteranceEndMs: 500
+    }
+
+    const readStream = createFakeReadStream()
+    createReadStreamMock.mockReturnValue(readStream.stream as any)
+
+    requestSpy.mockImplementation((url: string, options: any, callback: any) => {
+      const params = new URL(url).searchParams
+      expect(params.get('model')).toBe('nova-3')
+      expect(params.get('language')).toBeNull()
+      expect(params.get('detect_language')).toBe('true')
+      expect(params.get('smart_format')).toBe('false')
+      expect(params.get('utterances')).toBe('false')
+      expect(params.get('utterance_end_ms')).toBe('500')
+
+      expect(options.headers['Content-Type']).toBe('audio/wav')
+
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 401,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      readStream.stream.pipe.mockReturnValue(requestState.req as any)
+
+      emitResponse(response, responseHandlers, 401, ['Unauthorized'])
+
+      return requestState.req as any
+    })
+
+    await expect((transcriber as any).callDeepgramAPI(audioPath, callOptions)).rejects.toThrow(
+      'API Key 无效'
+    )
+  })
+
+  it('rejects when Deepgram returns invalid JSON body', async () => {
+    const audioPath = '/tmp/audio.flac'
+    const callOptions = {
+      apiKey: 'test-key'
+    }
+
+    const readStream = createFakeReadStream()
+    createReadStreamMock.mockReturnValue(readStream.stream as any)
+
+    requestSpy.mockImplementation((_url: string, _options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 200,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      readStream.stream.pipe.mockReturnValue(requestState.req as any)
+
+      emitResponse(response, responseHandlers, 200, ['{ invalid json'])
+
+      return requestState.req as any
+    })
+
+    await expect((transcriber as any).callDeepgramAPI(audioPath, callOptions)).rejects.toThrow(
+      '解析 Deepgram 响应失败'
+    )
+  })
+
+  it('rejects on network error and destroys read stream', async () => {
+    const audioPath = '/tmp/audio.ogg'
+    const callOptions = {
+      apiKey: 'test-key'
+    }
+
+    const readStream = createFakeReadStream()
+    createReadStreamMock.mockReturnValue(readStream.stream as any)
+
+    requestSpy.mockImplementation((_url: string, _options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 200,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      readStream.stream.pipe.mockReturnValue(requestState.req as any)
+
+      process.nextTick(() => {
+        requestState.handlers.error?.forEach((handler) => handler(new Error('connection reset')))
+      })
+
+      return requestState.req as any
+    })
+
+    await expect((transcriber as any).callDeepgramAPI(audioPath, callOptions)).rejects.toThrow(
+      '网络错误: connection reset'
+    )
+
+    expect(readStream.stream.destroy).toHaveBeenCalledTimes(1)
+  })
+})
diff --git a/src/main/services/asr/__tests__/DeepgramTranscriber.makeValidationRequest.test.ts b/src/main/services/asr/__tests__/DeepgramTranscriber.makeValidationRequest.test.ts
new file mode 100644
index 00000000..c83d6a71
--- /dev/null
+++ b/src/main/services/asr/__tests__/DeepgramTranscriber.makeValidationRequest.test.ts
@@ -0,0 +1,314 @@
+import https from 'https'
+import { afterEach, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'
+
+type SpyInstance = any
+
+const mockLogger = {
+  info: vi.fn(),
+  debug: vi.fn(),
+  warn: vi.fn(),
+  error: vi.fn()
+}
+
+vi.mock('../../LoggerService', () => ({
+  loggerService: {
+    withContext: () => mockLogger
+  }
+}))
+
+type Handler = (...args: any[]) => void
+type HandlerMap = Record<string, Handler[]>
+
+const createFakeRequest = () => {
+  const handlers: HandlerMap = {}
+  const req = {
+    on: vi.fn((event: string, handler: Handler) => {
+      ;(handlers[event] ||= []).push(handler)
+      return req
+    }),
+    setTimeout: vi.fn((_: number, handler: Handler) => {
+      ;(handlers.timeout ||= []).push(handler)
+      return req
+    }),
+    destroy: vi.fn(),
+    end: vi.fn()
+  }
+
+  return { req, handlers }
+}
+
+const emitResponse = (
+  response: { statusCode: number },
+  handlers: HandlerMap,
+  statusCode: number,
+  bodyChunks: Array<string | Buffer> = []
+) => {
+  response.statusCode = statusCode
+
+  process.nextTick(() => {
+    for (const chunk of bodyChunks) {
+      handlers.data?.forEach((handler) => handler(chunk))
+    }
+
+    handlers.end?.forEach((handler) => handler())
+  })
+}
+
+type DeepgramTranscriberClass = typeof import('../DeepgramTranscriber').default
+let DeepgramTranscriber: DeepgramTranscriberClass
+
+beforeAll(async () => {
+  DeepgramTranscriber = (await import('../DeepgramTranscriber')).default
+})
+
+describe('DeepgramTranscriber.makeValidationRequest', () => {
+  let transcriber: InstanceType<DeepgramTranscriberClass>
+  let requestSpy: SpyInstance
+
+  beforeEach(() => {
+    transcriber = new DeepgramTranscriber(1)
+    requestSpy = vi.spyOn(https, 'request')
+  })
+
+  afterEach(() => {
+    vi.restoreAllMocks()
+  })
+
+  it('返回验证成功当 Deepgram 返回 200', async () => {
+    const apiKey = 'valid-api-key'
+
+    requestSpy.mockImplementation((options: any, callback: any) => {
+      expect(options.hostname).toBe('api.deepgram.com')
+      expect(options.path).toBe('/v1/auth/token')
+      expect(options.method).toBe('GET')
+      expect(options.headers.Authorization).toBe(`Token ${apiKey}`)
+      expect(options.headers['Content-Type']).toBe('application/json')
+      expect(options.timeout).toBe(8000)
+
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 200,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      emitResponse(response, responseHandlers, 200, [Buffer.from('{"access_token": "test-token"}')])
+
+      return requestState.req as any
+    })
+
+    const result = await (transcriber as any).makeValidationRequest(apiKey)
+
+    expect(result).toEqual({ valid: true })
+    expect(mockLogger.info).toHaveBeenCalledWith('API Key 验证成功')
+  })
+
+  it('返回验证失败当 Deepgram 返回 401 INVALID_AUTH', async () => {
+    const apiKey = 'invalid-api-key'
+
+    requestSpy.mockImplementation((_options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 401,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      const errorBody = JSON.stringify({
+        err_code: 'INVALID_AUTH',
+        message: 'Invalid credentials.'
+      })
+      emitResponse(response, responseHandlers, 401, [Buffer.from(errorBody)])
+
+      return requestState.req as any
+    })
+
+    const result = await (transcriber as any).makeValidationRequest(apiKey)
+
+    expect(result).toEqual({ valid: false, error: 'API Key 无效' })
+  })
+
+  it('返回验证失败当 Deepgram 返回 401 INSUFFICIENT_PERMISSIONS', async () => {
+    const apiKey = 'insufficient-permissions-key'
+
+    requestSpy.mockImplementation((_options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 401,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      const errorBody = JSON.stringify({
+        err_code: 'INSUFFICIENT_PERMISSIONS',
+        message: 'Insufficient permissions.'
+      })
+      emitResponse(response, responseHandlers, 401, [Buffer.from(errorBody)])
+
+      return requestState.req as any
+    })
+
+    const result = await (transcriber as any).makeValidationRequest(apiKey)
+
+    expect(result).toEqual({ valid: false, error: 'API Key 权限不足' })
+  })
+
+  it('返回验证失败当 Deepgram 返回 401 但响应体不是有效 JSON', async () => {
+    const apiKey = 'invalid-api-key'
+
+    requestSpy.mockImplementation((_options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 401,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      emitResponse(response, responseHandlers, 401, ['Invalid credentials'])
+
+      return requestState.req as any
+    })
+
+    const result = await (transcriber as any).makeValidationRequest(apiKey)
+
+    expect(result).toEqual({ valid: false, error: 'API Key 认证失败' })
+  })
+
+  it('返回验证失败当 Deepgram 返回 403', async () => {
+    const apiKey = 'forbidden-api-key'
+
+    requestSpy.mockImplementation((_options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 403,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      emitResponse(response, responseHandlers, 403)
+
+      return requestState.req as any
+    })
+
+    const result = await (transcriber as any).makeValidationRequest(apiKey)
+
+    expect(result).toEqual({ valid: false, error: 'API Key 权限不足或访问被拒绝' })
+  })
+
+  it('返回验证失败当收到其他 HTTP 状态码', async () => {
+    const apiKey = 'test-api-key'
+
+    requestSpy.mockImplementation((_options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 500,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+      emitResponse(response, responseHandlers, 500, [Buffer.from('Internal Server Error')])
+
+      return requestState.req as any
+    })
+
+    const result = await (transcriber as any).makeValidationRequest(apiKey)
+
+    expect(result).toEqual({ valid: false, error: '验证失败 (HTTP 500)' })
+    expect(mockLogger.warn).toHaveBeenCalledWith('API Key 验证收到意外状态码', {
+      statusCode: 500,
+      body: 'Internal Server Error'
+    })
+  })
+
+  it('返回验证失败当网络请求出错', async () => {
+    const apiKey = 'test-api-key'
+
+    requestSpy.mockImplementation((_options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 200,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+
+      process.nextTick(() => {
+        requestState.handlers.error?.forEach((handler) => handler(new Error('ECONNRESET')))
+      })
+
+      return requestState.req as any
+    })
+
+    const result = await (transcriber as any).makeValidationRequest(apiKey)
+
+    expect(result).toEqual({ valid: false, error: '网络连接失败，请检查网络设置' })
+    expect(mockLogger.error).toHaveBeenCalledWith('API Key 验证请求失败', { error: 'ECONNRESET' })
+  })
+
+  it('返回验证失败当请求超时', async () => {
+    const apiKey = 'test-api-key'
+
+    requestSpy.mockImplementation((_options: any, callback: any) => {
+      const responseHandlers: HandlerMap = {}
+      const response = {
+        statusCode: 200,
+        on: vi.fn((event: string, handler: Handler) => {
+          ;(responseHandlers[event] ||= []).push(handler)
+          return response
+        })
+      }
+
+      callback?.(response as any)
+
+      const requestState = createFakeRequest()
+
+      // 模拟超时
+      process.nextTick(() => {
+        requestState.handlers.timeout?.forEach((handler) => handler())
+      })
+
+      return requestState.req as any
+    })
+
+    const result = await (transcriber as any).makeValidationRequest(apiKey)
+
+    expect(result).toEqual({ valid: false, error: '验证请求超时，请稍后重试' })
+    expect(mockLogger.error).toHaveBeenCalledWith('API Key 验证请求超时')
+    expect(requestSpy.mock.results[0].value.destroy).toHaveBeenCalled()
+  })
+})
diff --git a/src/main/services/audio/AudioPreprocessor.ts b/src/main/services/audio/AudioPreprocessor.ts
new file mode 100644
index 00000000..191b4fe3
--- /dev/null
+++ b/src/main/services/audio/AudioPreprocessor.ts
@@ -0,0 +1,268 @@
+/**
+ * 音频预处理服务
+ * 负责从视频中提取音频轨道，转换为适合 ASR 的格式
+ */
+
+import { spawn } from 'child_process'
+import * as fs from 'fs'
+import * as os from 'os'
+import * as path from 'path'
+
+import FFmpegService from '../FFmpegService'
+import { loggerService } from '../LoggerService'
+
+const logger = loggerService.withContext('AudioPreprocessor')
+
+export interface AudioExtractOptions {
+  /** 采样率（Hz），默认 16000 */
+  sampleRate?: number
+  /** 声道数，默认 1 (mono)，FFmpeg 会自动混音 */
+  channels?: number
+  /** 输出格式，默认 'wav' */
+  format?: 'wav' | 'mp3'
+}
+
+export interface AudioExtractResult {
+  /** 是否成功 */
+  success: boolean
+  /** 输出音频文件路径 */
+  audioPath?: string
+  /** 音频时长（秒） */
+  duration?: number
+  /** 错误信息 */
+  error?: string
+}
+
+/**
+ * 从 FFmpeg 输出中解析时长信息
+ * 支持多种格式：无小数部分、1-3位小数部分
+ */
+export function parseFFmpegDuration(output: string): number | null {
+  const durationMatch = output.match(/Duration: (\d{2}):(\d{2}):(\d{2})(?:\.(\d{1,3}))?/)
+  if (durationMatch) {
+    const hours = Number(durationMatch[1]) || 0
+    const minutes = Number(durationMatch[2]) || 0
+    const seconds = Number(durationMatch[3]) || 0
+    const fractionStr = durationMatch[4] || ''
+
+    // 计算毫秒部分：如果没有小数部分则为0，否则根据位数计算
+    let fractionalSeconds = 0
+    if (fractionStr) {
+      const fraction = Number(fractionStr) || 0
+      const divisor = Math.pow(10, fractionStr.length)
+      fractionalSeconds = fraction / divisor
+    }
+
+    return hours * 3600 + minutes * 60 + seconds + fractionalSeconds
+  }
+  return null
+}
+
+class AudioPreprocessor {
+  private ffmpegService: FFmpegService
+
+  constructor() {
+    this.ffmpegService = new FFmpegService()
+  }
+
+  /**
+   * 从视频中提取音频轨道并进行转码
+   * 转换为适合 ASR 的格式：采样率 16000Hz、单声道、PCM 16位编码
+   * 包含重采样和声道混音处理，不进行流拷贝
+   */
+  public async extractAudioTrack(
+    videoPath: string,
+    outputDir: string,
+    options: AudioExtractOptions = {}
+  ): Promise<AudioExtractResult> {
+    const { sampleRate = 16000, channels = 1, format = 'wav' } = options
+
+    const startTime = Date.now()
+    logger.info('开始提取音频轨道', {
+      videoPath,
+      sampleRate,
+      channels,
+      format
+    })
+
+    try {
+      // 验证输入文件
+      if (!fs.existsSync(videoPath)) {
+        logger.error('视频文件不存在', { videoPath })
+        return { success: false, error: '视频文件不存在' }
+      }
+
+      // 创建输出目录
+      if (!fs.existsSync(outputDir)) {
+        fs.mkdirSync(outputDir, { recursive: true })
+      }
+
+      // 生成输出文件路径
+      const outputPath = path.join(outputDir, `audio.${format}`)
+
+      // 构建 FFmpeg 命令
+      const ffmpegPath = this.ffmpegService.getFFmpegPath()
+      const args = this.buildFFmpegArgs(videoPath, outputPath, sampleRate, channels, format)
+
+      logger.debug('执行 FFmpeg 命令', { ffmpegPath, args })
+
+      // 执行提取
+      const { success, duration, error } = await this.runFFmpegExtract(ffmpegPath, args)
+
+      if (!success) {
+        return { success: false, error: error || 'FFmpeg 提取失败' }
+      }
+
+      // 验证输出文件
+      if (!fs.existsSync(outputPath)) {
+        return { success: false, error: '输出文件未生成' }
+      }
+
+      const totalTime = Date.now() - startTime
+      logger.info('音频提取成功', {
+        outputPath,
+        duration,
+        totalTime: `${totalTime}ms`
+      })
+
+      return {
+        success: true,
+        audioPath: outputPath,
+        duration
+      }
+    } catch (error) {
+      const totalTime = Date.now() - startTime
+      const errorMsg = error instanceof Error ? error.message : String(error)
+      logger.error('音频提取失败', { error: errorMsg, totalTime: `${totalTime}ms` })
+      return { success: false, error: errorMsg }
+    }
+  }
+
+  /**
+   * 构建 FFmpeg 参数
+   */
+  private buildFFmpegArgs(
+    inputPath: string,
+    outputPath: string,
+    sampleRate: number,
+    channels: number,
+    format: 'wav' | 'mp3' = 'wav'
+  ): string[] {
+    // 根据格式选择合适的编解码器和音频参数
+    const codecConfig =
+      format === 'mp3'
+        ? { codec: 'libmp3lame', bitrate: '128k' }
+        : { codec: 'pcm_s16le', bitrate: undefined }
+
+    // FFmpeg 命令：提取第一个音频流并转码为 ASR 适配格式
+    const args: string[] = [
+      '-i',
+      inputPath,
+      '-vn', // 禁用视频
+      '-map',
+      '0:a:0', // 选择第一个音频流
+      '-ar',
+      String(sampleRate), // 采样率
+      '-ac',
+      String(channels), // 声道数（FFmpeg 会自动混音）
+      '-c:a',
+      codecConfig.codec, // 根据格式选择编解码器
+      '-y' // 覆盖输出文件
+    ]
+
+    // 如果是 MP3 格式，添加比特率参数
+    if (codecConfig.bitrate) {
+      args.push('-b:a', codecConfig.bitrate)
+    }
+
+    args.push(outputPath)
+
+    return args
+  }
+
+  /**
+   * 执行 FFmpeg 提取命令
+   */
+  private async runFFmpegExtract(
+    ffmpegPath: string,
+    args: string[]
+  ): Promise<{ success: boolean; duration?: number; error?: string }> {
+    return new Promise((resolve) => {
+      const ffmpeg = spawn(ffmpegPath, args)
+
+      let stderrOutput = ''
+      let duration: number | undefined
+
+      ffmpeg.stderr?.on('data', (data) => {
+        const output = data.toString()
+        stderrOutput += output
+
+        // 尝试解析音频时长
+        const parsedDuration = parseFFmpegDuration(output)
+        if (parsedDuration !== null && !duration) {
+          duration = parsedDuration
+        }
+      })
+
+      const timeoutHandle = setTimeout(
+        () => {
+          if (ffmpeg && !ffmpeg.killed) {
+            ffmpeg.kill('SIGKILL')
+          }
+          logger.error('FFmpeg 提取超时')
+          resolve({ success: false, error: 'FFmpeg 提取超时' })
+        },
+        5 * 60 * 1000
+      ) // 5 分钟超时
+
+      ffmpeg.on('close', (code) => {
+        clearTimeout(timeoutHandle)
+
+        if (code === 0) {
+          logger.debug('FFmpeg 提取成功', { code, duration })
+          resolve({ success: true, duration })
+        } else {
+          logger.error('FFmpeg 提取失败', {
+            code,
+            error: stderrOutput.slice(-500)
+          })
+          resolve({ success: false, error: `FFmpeg 退出码: ${code}` })
+        }
+      })
+
+      ffmpeg.on('error', (error) => {
+        clearTimeout(timeoutHandle)
+        logger.error('FFmpeg 进程错误', { error: error.message })
+        resolve({ success: false, error: error.message })
+      })
+    })
+  }
+
+  /**
+   * 创建临时目录
+   */
+  public createTempDir(prefix: string = 'asr-audio-'): string {
+    const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), prefix))
+    logger.debug('创建临时目录', { tempDir })
+    return tempDir
+  }
+
+  /**
+   * 清理临时目录
+   */
+  public async cleanupTempDir(dirPath: string): Promise<void> {
+    try {
+      if (fs.existsSync(dirPath)) {
+        await fs.promises.rm(dirPath, { recursive: true, force: true })
+        logger.info('清理临时目录成功', { dirPath })
+      }
+    } catch (error) {
+      logger.error('清理临时目录失败', {
+        dirPath,
+        error: error instanceof Error ? error.message : String(error)
+      })
+    }
+  }
+}
+
+export default AudioPreprocessor
diff --git a/src/preload/index.ts b/src/preload/index.ts
index 13deba01..d77bd60a 100644
--- a/src/preload/index.ts
+++ b/src/preload/index.ts
@@ -2,6 +2,7 @@ import { electronAPI } from '@electron-toolkit/preload'
 import { UpgradeChannel } from '@shared/config/constant'
 import { LogLevel, LogSourceWithContext } from '@shared/config/logger'
 import { IpcChannel } from '@shared/IpcChannel'
+import type { ASRGenerateOptions, ASRProgress, ASRResult } from '@shared/types'
 import { DictionaryResponse, FFmpegVideoInfo, Shortcut, ThemeMode } from '@types'
 import { contextBridge, ipcRenderer, OpenDialogOptions, shell, webUtils } from 'electron'
 import type {
@@ -268,6 +269,20 @@ const api = {
         timeoutMs
       )
   },
+  asr: {
+    generate: (options: ASRGenerateOptions): Promise<ASRResult> =>
+      ipcRenderer.invoke(IpcChannel.ASR_Generate, options),
+    cancel: (taskId: string): Promise<void> => ipcRenderer.invoke(IpcChannel.ASR_Cancel, taskId),
+    validateApiKey: (apiKey: string): Promise<boolean> =>
+      ipcRenderer.invoke(IpcChannel.ASR_ValidateApiKey, apiKey),
+    onProgress: (listener: (progress: ASRProgress) => void) => {
+      const handler = (_event: unknown, payload: ASRProgress) => listener(payload)
+      ipcRenderer.on(IpcChannel.ASR_Progress, handler)
+      return () => {
+        ipcRenderer.removeListener(IpcChannel.ASR_Progress, handler)
+      }
+    }
+  },
   uv: {
     checkInstallation: (): Promise<{
       exists: boolean
@@ -584,79 +599,6 @@ const api = {
         ipcRenderer.invoke(IpcChannel.DB_PlayerSettings_Has, videoId)
     }
   }
-  // Binary related APIs
-  // isBinaryExist: (name: string) => ipcRenderer.invoke(IpcChannel.App_IsBinaryExist, name),
-  // getBinaryPath: (name: string) => ipcRenderer.invoke(IpcChannel.App_GetBinaryPath, name),
-  // installUVBinary: () => ipcRenderer.invoke(IpcChannel.App_InstallUvBinary),
-  // installBunBinary: () => ipcRenderer.invoke(IpcChannel.App_InstallBunBinary),
-
-  // searchService: {
-  //   openSearchWindow: (uid: string) => ipcRenderer.invoke(IpcChannel.SearchWindow_Open, uid),
-  //   closeSearchWindow: (uid: string) => ipcRenderer.invoke(IpcChannel.SearchWindow_Close, uid),
-  //   openUrlInSearchWindow: (uid: string, url: string) =>
-  //     ipcRenderer.invoke(IpcChannel.SearchWindow_OpenUrl, uid, url)
-  // },
-  // webview: {
-  //   setOpenLinkExternal: (webviewId: number, isExternal: boolean) =>
-  //     ipcRenderer.invoke(IpcChannel.Webview_SetOpenLinkExternal, webviewId, isExternal),
-  //   setSpellCheckEnabled: (webviewId: number, isEnable: boolean) =>
-  //     ipcRenderer.invoke(IpcChannel.Webview_SetSpellCheckEnabled, webviewId, isEnable)
-  // },
-  // storeSync: {
-  //   subscribe: () => ipcRenderer.invoke(IpcChannel.StoreSync_Subscribe),
-  //   unsubscribe: () => ipcRenderer.invoke(IpcChannel.StoreSync_Unsubscribe),
-  //   onUpdate: (action: any) => ipcRenderer.invoke(IpcChannel.StoreSync_OnUpdate, action)
-  // },
-  // selection: {
-  //   hideToolbar: () => ipcRenderer.invoke(IpcChannel.Selection_ToolbarHide),
-  //   writeToClipboard: (text: string) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_WriteToClipboard, text),
-  //   determineToolbarSize: (width: number, height: number) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_ToolbarDetermineSize, width, height),
-  //   setEnabled: (enabled: boolean) => ipcRenderer.invoke(IpcChannel.Selection_SetEnabled, enabled),
-  //   setTriggerMode: (triggerMode: string) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_SetTriggerMode, triggerMode),
-  //   setFollowToolbar: (isFollowToolbar: boolean) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_SetFollowToolbar, isFollowToolbar),
-  //   setRemeberWinSize: (isRemeberWinSize: boolean) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_SetRemeberWinSize, isRemeberWinSize),
-  //   setFilterMode: (filterMode: string) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_SetFilterMode, filterMode),
-  //   setFilterList: (filterList: string[]) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_SetFilterList, filterList),
-  //   processAction: (actionItem: ActionItem, isFullScreen: boolean = false) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_ProcessAction, actionItem, isFullScreen),
-  //   closeActionWindow: () => ipcRenderer.invoke(IpcChannel.Selection_ActionWindowClose),
-  //   minimizeActionWindow: () => ipcRenderer.invoke(IpcChannel.Selection_ActionWindowMinimize),
-  //   pinActionWindow: (isPinned: boolean) =>
-  //     ipcRenderer.invoke(IpcChannel.Selection_ActionWindowPin, isPinned)
-  // },
-  // quoteToMainWindow: (text: string) => ipcRenderer.invoke(IpcChannel.App_QuoteToMain, text),
-  // setDisableHardwareAcceleration: (isDisable: boolean) =>
-  //   ipcRenderer.invoke(IpcChannel.App_SetDisableHardwareAcceleration, isDisable),
-  // trace: {
-  //   saveData: (topicId: string) => ipcRenderer.invoke(IpcChannel.TRACE_SAVE_DATA, topicId),
-  //   getData: (topicId: string, traceId: string, modelName?: string) =>
-  //     ipcRenderer.invoke(IpcChannel.TRACE_GET_DATA, topicId, traceId, modelName),
-  //   saveEntity: (entity: SpanEntity) => ipcRenderer.invoke(IpcChannel.TRACE_SAVE_ENTITY, entity),
-  //   getEntity: (spanId: string) => ipcRenderer.invoke(IpcChannel.TRACE_GET_ENTITY, spanId),
-  //   bindTopic: (topicId: string, traceId: string) =>
-  //     ipcRenderer.invoke(IpcChannel.TRACE_BIND_TOPIC, topicId, traceId),
-  //   tokenUsage: (spanId: string, usage: TokenUsage) =>
-  //     ipcRenderer.invoke(IpcChannel.TRACE_TOKEN_USAGE, spanId, usage),
-  //   cleanHistory: (topicId: string, traceId: string, modelName?: string) =>
-  //     ipcRenderer.invoke(IpcChannel.TRACE_CLEAN_HISTORY, topicId, traceId, modelName),
-  //   cleanTopic: (topicId: string, traceId?: string) =>
-  //     ipcRenderer.invoke(IpcChannel.TRACE_CLEAN_TOPIC, topicId, traceId),
-  //   openWindow: (topicId: string, traceId: string, autoOpen?: boolean, modelName?: string) =>
-  //     ipcRenderer.invoke(IpcChannel.TRACE_OPEN_WINDOW, topicId, traceId, autoOpen, modelName),
-  //   setTraceWindowTitle: (title: string) => ipcRenderer.invoke(IpcChannel.TRACE_SET_TITLE, title),
-  //   addEndMessage: (spanId: string, modelName: string, context: string) =>
-  //     ipcRenderer.invoke(IpcChannel.TRACE_ADD_END_MESSAGE, spanId, modelName, context),
-  //   cleanLocalData: () => ipcRenderer.invoke(IpcChannel.TRACE_CLEAN_LOCAL_DATA),
-  //   addStreamMessage: (spanId: string, modelName: string, context: string, message: any) =>
-  //     ipcRenderer.invoke(IpcChannel.TRACE_ADD_STREAM_MESSAGE, spanId, modelName, context, message)
-  // }
 }
 
 // Use `contextBridge` APIs to expose Electron APIs to
diff --git a/src/renderer/src/i18n/locales/en-us.json b/src/renderer/src/i18n/locales/en-us.json
index 3e56423c..b8850941 100644
--- a/src/renderer/src/i18n/locales/en-us.json
+++ b/src/renderer/src/i18n/locales/en-us.json
@@ -1,5 +1,6 @@
 {
   "common": {
+    "confirm": "Yes",
     "favorites": "Favorites",
     "favorites_developing": "This feature is under development",
     "home": "Home",
@@ -12,6 +13,50 @@
     "title": "Documentation"
   },
   "player": {
+    "asr": {
+      "errors": {
+        "apiQuotaExceeded": "API quota exceeded, please check your Deepgram account",
+        "audioExtractionFailed": "Audio extraction failed, please check the video file",
+        "invalidApiKey": "Invalid API Key, please check settings",
+        "networkError": "Network error, please check your connection and retry",
+        "noApiKey": "Please configure Deepgram API Key in settings first",
+        "transcriptionFailed": "Transcription failed, please retry",
+        "unknown": "Generation failed: {{message}}"
+      },
+      "progress": {
+        "cancel": "Cancel",
+        "cancelConfirm": "Are you sure you want to cancel subtitle generation?",
+        "cancelConfirmDescription": "All temporary files will be cleaned up and generated content will not be saved.",
+        "confirmCancel": "Confirm Cancel",
+        "estimatedTime": "Estimated remaining time: {{minutes}} minutes",
+        "stages": {
+          "complete": "Generation complete!",
+          "extracting": "Extracting audio...",
+          "failed": "Generation failed",
+          "formatting": "Formatting subtitles...",
+          "initializing": "Initializing...",
+          "saving": "Saving subtitles...",
+          "transcribing": "Transcribing..."
+        },
+        "title": "Generating Subtitles"
+      },
+      "prompt": {
+        "configureApiKey": "Please configure Deepgram API Key in settings first",
+        "description": "Use Deepgram speech recognition technology to automatically generate accurate subtitles with multi-language support and word-level timestamps.",
+        "dontShowAgain": "Don't show again",
+        "estimatedTime": "Estimated time: {{minutes}} minutes",
+        "generate": "Generate",
+        "language": "Select Language",
+        "later": "Later",
+        "message": "Would you like to automatically generate subtitles using AI?",
+        "title": "No Subtitles Detected"
+      },
+      "success": {
+        "autoLoaded": "Subtitles have been automatically loaded",
+        "message": "Successfully generated {{count}} subtitle items",
+        "title": "Subtitle Generation Successful"
+      }
+    },
     "controls": {
       "subtitle": {
         "background-type": {
@@ -90,23 +135,24 @@
     "subtitleList": {
       "empty": {
         "description": "Choose a way to start adding subtitles",
-        "title": "No matching subtitle file found",
         "options": {
+          "ai": {
+            "action": "Coming Soon",
+            "actionEnabled": "Generate",
+            "description": "Generate word-level subtitles based on speech recognition",
+            "title": "AI-Generated Subtitles"
+          },
           "embedded": {
-            "title": "Use Embedded Subtitles",
+            "action": "Select",
             "description": "Video file contains subtitle tracks that can be imported directly",
-            "action": "Select"
+            "title": "Use Embedded Subtitles"
           },
           "external": {
-            "title": "Import External Subtitles",
-            "description": "Import SRT, VTT, and other subtitle formats from local files"
-          },
-          "ai": {
-            "title": "AI-Generated Subtitles",
-            "description": "Generate word-level subtitles based on speech recognition",
-            "action": "Coming Soon"
+            "description": "Import SRT, VTT, and other subtitle formats from local files",
+            "title": "Import External Subtitles"
           }
-        }
+        },
+        "title": "No matching subtitle file found"
       },
       "search": {
         "count": "Found {{count}} subtitle",
@@ -120,11 +166,21 @@
       }
     },
     "subtitleTrackSelector": {
-      "title": "Import Embedded Subtitle Tracks",
+      "actions": {
+        "cancel": "Cancel",
+        "import": "Import"
+      },
       "empty": "No subtitle tracks detected",
+      "messages": {
+        "extractFailed": "Failed to extract subtitle track {{index}}",
+        "importFailed": "Failed to extract subtitle tracks, please try again",
+        "importMultipleSuccess": "Imported {{tracks}} subtitle tracks ({{count}} items total)",
+        "importSuccess": "Imported subtitle: {{source}} ({{count}} items)",
+        "selectAtLeastOne": "Please select at least one subtitle track"
+      },
       "sections": {
-        "text": "Text Subtitle Tracks",
-        "image": "PGS Subtitle Tracks (Image Subtitles)"
+        "image": "PGS Subtitle Tracks (Image Subtitles)",
+        "text": "Text Subtitle Tracks"
       },
       "stream": {
         "label": "Stream {{index}}",
@@ -134,19 +190,9 @@
           "unsupported": "Unsupported"
         }
       },
+      "title": "Import Embedded Subtitle Tracks",
       "warning": {
         "pgs": "PGS is an image-based subtitle format that requires OCR technology support. Import is not currently supported."
-      },
-      "actions": {
-        "cancel": "Cancel",
-        "import": "Import"
-      },
-      "messages": {
-        "selectAtLeastOne": "Please select at least one subtitle track",
-        "extractFailed": "Failed to extract subtitle track {{index}}",
-        "importFailed": "Failed to extract subtitle tracks, please try again",
-        "importSuccess": "Imported subtitle: {{source}} ({{count}} items)",
-        "importMultipleSuccess": "Imported {{tracks}} subtitle tracks ({{count}} items total)"
       }
     }
   },
@@ -189,6 +235,43 @@
     "appearance": {
       "title": "Appearance Settings"
     },
+    "asr": {
+      "apiKey": {
+        "description": "Get an API Key from Deepgram to use AI subtitle generation features",
+        "getKey": "Get API Key",
+        "invalid": "API Key is invalid",
+        "label": "Deepgram API Key",
+        "placeholder": "Enter your Deepgram API Key",
+        "saveFailed": "Failed to save",
+        "saved": "API Key saved",
+        "valid": "API Key is valid",
+        "validate": "Validate",
+        "validating": "Validating..."
+      },
+      "defaultLanguage": {
+        "description": "Default language for automatic subtitle generation",
+        "label": "Default Language"
+      },
+      "description": "Configure AI subtitle auto-generation using Deepgram speech recognition technology to generate accurate subtitles for videos",
+      "languages": {
+        "auto": "Auto Detect",
+        "de": "German",
+        "en": "English",
+        "es": "Spanish",
+        "fr": "French",
+        "ja": "Japanese",
+        "ko": "Korean",
+        "ru": "Russian",
+        "zh": "Chinese"
+      },
+      "model": {
+        "description": "Select Deepgram transcription model",
+        "label": "Transcription Model",
+        "nova2": "Nova 2 (Recommended)",
+        "nova3": "Nova 3 (Latest)"
+      },
+      "title": "Speech Recognition"
+    },
     "developer": {
       "enable_developer_mode": "Enable developer mode",
       "title": "Developer mode"
diff --git a/src/renderer/src/i18n/locales/zh-cn.json b/src/renderer/src/i18n/locales/zh-cn.json
index 2432b31d..a3138f57 100644
--- a/src/renderer/src/i18n/locales/zh-cn.json
+++ b/src/renderer/src/i18n/locales/zh-cn.json
@@ -4,6 +4,7 @@
   },
   "common": {
     "cancel": "取消",
+    "confirm": "确认",
     "disabled": "已关闭",
     "enabled": "已开启",
     "favorites": "收藏",
@@ -44,6 +45,50 @@
     }
   },
   "player": {
+    "asr": {
+      "errors": {
+        "apiQuotaExceeded": "API 配额已用尽，请检查您的 Deepgram 账户",
+        "audioExtractionFailed": "音频提取失败，请检查视频文件",
+        "invalidApiKey": "API Key 无效，请检查设置",
+        "networkError": "网络错误，请检查连接后重试",
+        "noApiKey": "请先在设置中配置 Deepgram API Key",
+        "transcriptionFailed": "转写失败，请重试",
+        "unknown": "生成失败：{{message}}"
+      },
+      "progress": {
+        "cancel": "取消",
+        "cancelConfirm": "确定要取消字幕生成吗？",
+        "cancelConfirmDescription": "取消后将清理所有临时文件，已生成的内容将不会保存。",
+        "confirmCancel": "确认取消",
+        "estimatedTime": "预计剩余时间：{{minutes}} 分钟",
+        "stages": {
+          "complete": "生成完成！",
+          "extracting": "提取音频中...",
+          "failed": "生成失败",
+          "formatting": "格式化字幕中...",
+          "initializing": "初始化中...",
+          "saving": "保存字幕中...",
+          "transcribing": "转写中..."
+        },
+        "title": "正在生成字幕"
+      },
+      "prompt": {
+        "configureApiKey": "请先在设置中配置 Deepgram API Key",
+        "description": "使用 Deepgram 语音识别技术自动生成准确的字幕，支持多语言和词级时间戳。",
+        "dontShowAgain": "不再提示",
+        "estimatedTime": "预计需要 {{minutes}} 分钟",
+        "generate": "开始生成",
+        "language": "选择语言",
+        "later": "稍后",
+        "message": "是否使用 AI 自动生成字幕？",
+        "title": "未检测到字幕"
+      },
+      "success": {
+        "autoLoaded": "字幕已自动加载",
+        "message": "已成功生成 {{count}} 条字幕",
+        "title": "字幕生成成功"
+      }
+    },
     "controls": {
       "auto_pause": {
         "disabled": "字幕未加载",
@@ -209,49 +254,55 @@
       "subtitle": "检测到当前视频格式不受支持",
       "title": "视频格式不兼容"
     },
-    "subtitles": {
-      "hide": "隐藏字幕列表",
-      "search": "搜索字幕",
-      "show": "展开字幕列表"
-    },
     "subtitleList": {
       "empty": {
-        "title": "在视频文件同目录下未找到匹配的字幕文件",
         "description": "选择一种方式开始添加字幕",
         "options": {
+          "ai": {
+            "action": "即将推出",
+            "actionEnabled": "生成",
+            "description": "基于语音识别生成单词级字幕",
+            "title": "AI 生成字幕"
+          },
           "embedded": {
-            "title": "使用内嵌字幕",
+            "action": "选择",
             "description": "视频文件包含字幕轨道，可直接导入",
-            "action": "选择"
+            "title": "使用内嵌字幕"
           },
           "external": {
-            "title": "导入外挂字幕",
-            "description": "从本地文件导入 SRT、VTT 等格式字幕"
-          },
-          "ai": {
-            "title": "AI 生成字幕",
-            "description": "基于语音识别生成单词级字幕",
-            "action": "即将推出"
+            "description": "从本地文件导入 SRT、VTT 等格式字幕",
+            "title": "导入外挂字幕"
           }
-        }
+        },
+        "title": "在视频文件同目录下未找到匹配的字幕文件"
       },
       "search": {
-        "placeholder": "搜索字幕...",
-        "pending": "搜索中...",
         "count": "找到 {{count}} 条字幕",
         "count_one": "找到 {{count}} 条字幕",
         "count_other": "找到 {{count}} 条字幕",
-        "none": "未找到匹配的字幕",
+        "emptySubtitle": "请尝试其他关键词",
         "emptyTitle": "未找到匹配结果",
-        "emptySubtitle": "请尝试其他关键词"
+        "none": "未找到匹配的字幕",
+        "pending": "搜索中...",
+        "placeholder": "搜索字幕..."
       }
     },
     "subtitleTrackSelector": {
-      "title": "导入内嵌字幕轨道",
+      "actions": {
+        "cancel": "取消",
+        "import": "导入"
+      },
       "empty": "未检测到字幕轨道",
+      "messages": {
+        "extractFailed": "提取字幕轨道 {{index}} 失败",
+        "importFailed": "字幕轨道提取失败，请重试",
+        "importMultipleSuccess": "已导入 {{tracks}} 个字幕轨道（共 {{count}} 条）",
+        "importSuccess": "已导入字幕：{{source}}（共 {{count}} 条）",
+        "selectAtLeastOne": "请选择至少一个字幕轨道"
+      },
       "sections": {
-        "text": "文本字幕轨道",
-        "image": "PGS 字幕轨（图像字幕）"
+        "image": "PGS 字幕轨（图像字幕）",
+        "text": "文本字幕轨道"
       },
       "stream": {
         "label": "Stream {{index}}",
@@ -261,20 +312,15 @@
           "unsupported": "暂不支持"
         }
       },
+      "title": "导入内嵌字幕轨道",
       "warning": {
         "pgs": "PGS 是图像格式字幕，需要 OCR 技术支持，暂不支持导入。"
-      },
-      "actions": {
-        "cancel": "取消",
-        "import": "导入"
-      },
-      "messages": {
-        "selectAtLeastOne": "请选择至少一个字幕轨道",
-        "extractFailed": "提取字幕轨道 {{index}} 失败",
-        "importFailed": "字幕轨道提取失败，请重试",
-        "importSuccess": "已导入字幕：{{source}}（共 {{count}} 条）",
-        "importMultipleSuccess": "已导入 {{tracks}} 个字幕轨道（共 {{count}} 条）"
       }
+    },
+    "subtitles": {
+      "hide": "隐藏字幕列表",
+      "search": "搜索字幕",
+      "show": "展开字幕列表"
     }
   },
   "search": {
@@ -336,6 +382,43 @@
     "appearance": {
       "title": "外观设置"
     },
+    "asr": {
+      "apiKey": {
+        "description": "从 Deepgram 获取 API Key 以使用 AI 字幕生成功能",
+        "getKey": "获取 API Key",
+        "invalid": "API Key 无效",
+        "label": "Deepgram API Key",
+        "placeholder": "输入您的 Deepgram API Key",
+        "saveFailed": "保存失败",
+        "saved": "API Key 已保存",
+        "valid": "API Key 有效",
+        "validate": "验证",
+        "validating": "验证中..."
+      },
+      "defaultLanguage": {
+        "description": "自动生成字幕时使用的默认语言",
+        "label": "默认语言"
+      },
+      "description": "配置 AI 字幕自动生成功能，使用 Deepgram 语音识别技术为视频生成准确的字幕",
+      "languages": {
+        "auto": "自动检测",
+        "de": "德语",
+        "en": "英语",
+        "es": "西班牙语",
+        "fr": "法语",
+        "ja": "日语",
+        "ko": "韩语",
+        "ru": "俄语",
+        "zh": "中文"
+      },
+      "model": {
+        "description": "选择 Deepgram 转写模型",
+        "label": "转写模型",
+        "nova2": "Nova 2（推荐）",
+        "nova3": "Nova 3（最新）"
+      },
+      "title": "语音识别"
+    },
     "developer": {
       "enable_developer_mode": "启用开发者模式",
       "title": "开发者模式"
diff --git a/src/renderer/src/infrastructure/types/subtitle.ts b/src/renderer/src/infrastructure/types/subtitle.ts
index be82280d..e14b51a4 100644
--- a/src/renderer/src/infrastructure/types/subtitle.ts
+++ b/src/renderer/src/infrastructure/types/subtitle.ts
@@ -6,6 +6,15 @@
  * Based on existing EchoPlayer project's subtitle processing features
  */
 
+// 单词级时间戳接口 / Word-level Timestamp Interface
+export interface WordTimestamp {
+  readonly word: string
+  readonly start: number
+  readonly end: number
+  readonly confidence: number
+  readonly punctuated_word?: string
+}
+
 // 字幕项接口 / Subtitle Item Interface
 export interface SubtitleItem {
   readonly id: string
@@ -13,6 +22,7 @@ export interface SubtitleItem {
   readonly endTime: number
   readonly originalText: string
   readonly translatedText?: string
+  readonly words?: WordTimestamp[]
 }
 
 // 字幕格式枚举 / Subtitle Format Enum
diff --git a/src/renderer/src/pages/player/PlayerPage.tsx b/src/renderer/src/pages/player/PlayerPage.tsx
index 80e4f4a9..8b042ad2 100644
--- a/src/renderer/src/pages/player/PlayerPage.tsx
+++ b/src/renderer/src/pages/player/PlayerPage.tsx
@@ -34,6 +34,8 @@ import styled from 'styled-components'
 
 import { NavbarIcon } from '.'
 import {
+  ASRProgressModal,
+  ASRSubtitlePrompt,
   ControllerPanel,
   PlayerSelector,
   ProgressBar,
@@ -42,6 +44,7 @@ import {
   SubtitleTrackSelector,
   VideoErrorRecovery
 } from './components'
+import { useASRSubtitle } from './hooks/useASRSubtitle'
 import { disposeGlobalOrchestrator } from './hooks/usePlayerEngine'
 import { PlayerPageProvider } from './state/player-page.provider'
 
@@ -117,6 +120,18 @@ function PlayerPage() {
   // 保存原始文件路径用于字幕检测（不是 HLS 播放源）
   const originalFilePathRef = useRef<string | null>(null)
 
+  // ASR subtitle generation
+  const {
+    asrEnabled,
+    showAsrPrompt,
+    showAsrProgress,
+    asrProgress,
+    handleOpenASRGenerator,
+    handleGenerateSubtitle,
+    handleCancelAsr,
+    handleAsrLater
+  } = useASRSubtitle(videoId, originalFilePathRef.current)
+
   // 加载视频数据
   useEffect(() => {
     let cancelled = false
@@ -723,6 +738,8 @@ function PlayerPage() {
                         subtitleStreams !== null && subtitleStreams.streams.length > 0
                       }
                       onOpenEmbeddedSubtitleSelector={() => setShowSubtitleTrackSelector(true)}
+                      asrEnabled={asrEnabled}
+                      onOpenASRGenerator={handleOpenASRGenerator}
                     />
                   </RightSidebar>
                 </Sider>
@@ -759,6 +776,21 @@ function PlayerPage() {
           onImported={() => setShowSubtitleTrackSelector(false)}
           onDismiss={() => setUserDismissedEmbeddedSubtitles(true)}
         />
+
+        {/* ASR 字幕生成提示弹窗 */}
+        <ASRSubtitlePrompt
+          open={showAsrPrompt}
+          onGenerate={handleGenerateSubtitle}
+          onLater={handleAsrLater}
+          estimatedMinutes={5}
+        />
+
+        {/* ASR 字幕生成进度弹窗 */}
+        <ASRProgressModal
+          open={showAsrProgress}
+          progress={asrProgress}
+          onCancel={handleCancelAsr}
+        />
       </Container>
     </PlayerPageProvider>
   )
diff --git a/src/renderer/src/pages/player/components/ASRProgressModal.tsx b/src/renderer/src/pages/player/components/ASRProgressModal.tsx
new file mode 100644
index 00000000..78a6c54f
--- /dev/null
+++ b/src/renderer/src/pages/player/components/ASRProgressModal.tsx
@@ -0,0 +1,144 @@
+import {
+  ANIMATION_DURATION,
+  FONT_SIZES,
+  FONT_WEIGHTS,
+  SPACING
+} from '@renderer/infrastructure/styles/theme'
+import { ASRProgress, ASRProgressStage } from '@shared/types'
+import { Button, Flex, Modal, Progress } from 'antd'
+import { FC, useEffect, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import styled from 'styled-components'
+
+interface ASRProgressModalProps {
+  open: boolean
+  progress: ASRProgress
+  onCancel: () => void
+}
+
+const Section = styled.div``
+
+const StageTitle = styled.div`
+  font-size: ${FONT_SIZES.BASE}px;
+  font-weight: ${FONT_WEIGHTS.MEDIUM};
+  margin-bottom: ${SPACING.MD}px;
+`
+
+const EstimatedText = styled.div`
+  font-size: ${FONT_SIZES.SM}px;
+  color: var(--ant-color-text-secondary);
+`
+
+const MessageText = styled.div`
+  font-size: ${FONT_SIZES.XS}px;
+  color: var(--ant-color-text-tertiary);
+`
+
+const CancelButton = styled(Button)<{ $confirmMode: boolean }>`
+  transition:
+    color ${ANIMATION_DURATION.SLOW} ease-in-out,
+    border-color ${ANIMATION_DURATION.SLOW} ease-in-out;
+
+  ${(props) =>
+    props.$confirmMode &&
+    `
+    border-color: var(--ant-color-error) !important;
+    color: var(--color-error-text) !important;
+
+    &:hover {
+      border-color: var(--ant-color-error) !important;
+    }
+  `}
+`
+
+const ASRProgressModal: FC<ASRProgressModalProps> = ({ open, progress, onCancel }) => {
+  const { t } = useTranslation()
+  const [confirmMode, setConfirmMode] = useState(false)
+
+  const getStageText = () => {
+    switch (progress.stage) {
+      case ASRProgressStage.Initializing:
+        return t('player.asr.progress.stages.initializing')
+      case ASRProgressStage.ExtractingAudio:
+        return t('player.asr.progress.stages.extracting')
+      case ASRProgressStage.Transcribing:
+        return t('player.asr.progress.stages.transcribing')
+      case ASRProgressStage.Formatting:
+        return t('player.asr.progress.stages.formatting')
+      case ASRProgressStage.Saving:
+        return t('player.asr.progress.stages.saving')
+      case ASRProgressStage.Complete:
+        return t('player.asr.progress.stages.complete')
+      case ASRProgressStage.Failed:
+        return t('player.asr.progress.stages.failed')
+      default:
+        return ''
+    }
+  }
+
+  const handleCancel = () => {
+    if (confirmMode) {
+      onCancel()
+      setConfirmMode(false)
+    } else {
+      setConfirmMode(true)
+    }
+  }
+
+  const handleCancelMouseLeave = () => {
+    if (confirmMode) {
+      setConfirmMode(false)
+    }
+  }
+
+  useEffect(() => {
+    if (confirmMode) {
+      const timer = setTimeout(() => {
+        setConfirmMode(false)
+      }, 3000)
+      return () => clearTimeout(timer)
+    }
+    return undefined
+  }, [confirmMode])
+
+  const estimatedMinutes = progress.eta ? Math.ceil(progress.eta / 60) : undefined
+
+  return (
+    <Modal
+      open={open}
+      title={t('player.asr.progress.title')}
+      footer={null}
+      closable={false}
+      maskClosable={false}
+      centered
+    >
+      <Flex vertical gap={SPACING.LG}>
+        <Section>
+          <StageTitle>{getStageText()}</StageTitle>
+          <Progress percent={progress.percent} status="active" />
+        </Section>
+
+        {estimatedMinutes !== undefined && estimatedMinutes > 0 && (
+          <EstimatedText>
+            {t('player.asr.progress.estimatedTime', { minutes: estimatedMinutes })}
+          </EstimatedText>
+        )}
+
+        {progress.message && <MessageText>{progress.message}</MessageText>}
+
+        <Flex justify="flex-end">
+          <CancelButton
+            $confirmMode={confirmMode}
+            onClick={handleCancel}
+            onMouseLeave={handleCancelMouseLeave}
+            disabled={progress.stage === ASRProgressStage.Complete}
+          >
+            {confirmMode ? t('player.asr.progress.confirmCancel') : t('player.asr.progress.cancel')}
+          </CancelButton>
+        </Flex>
+      </Flex>
+    </Modal>
+  )
+}
+
+export default ASRProgressModal
diff --git a/src/renderer/src/pages/player/components/ASRSubtitlePrompt.tsx b/src/renderer/src/pages/player/components/ASRSubtitlePrompt.tsx
new file mode 100644
index 00000000..aea74e28
--- /dev/null
+++ b/src/renderer/src/pages/player/components/ASRSubtitlePrompt.tsx
@@ -0,0 +1,85 @@
+import { useTheme } from '@renderer/contexts'
+import { Button, Flex, Modal, Select } from 'antd'
+import { FC, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+
+interface ASRSubtitlePromptProps {
+  open: boolean
+  onGenerate: (language: string) => void
+  onLater: () => void
+  estimatedMinutes?: number
+}
+
+const ASRSubtitlePrompt: FC<ASRSubtitlePromptProps> = ({
+  open,
+  onGenerate,
+  onLater,
+  estimatedMinutes = 5
+}) => {
+  const { t } = useTranslation()
+  const { theme } = useTheme()
+  const [selectedLanguage, setSelectedLanguage] = useState<string>('auto')
+
+  const languageOptions = [
+    { value: 'auto', label: t('settings.asr.languages.auto') },
+    { value: 'en', label: t('settings.asr.languages.en') },
+    { value: 'zh', label: t('settings.asr.languages.zh') },
+    { value: 'ja', label: t('settings.asr.languages.ja') },
+    { value: 'es', label: t('settings.asr.languages.es') },
+    { value: 'fr', label: t('settings.asr.languages.fr') },
+    { value: 'de', label: t('settings.asr.languages.de') },
+    { value: 'ko', label: t('settings.asr.languages.ko') },
+    { value: 'ru', label: t('settings.asr.languages.ru') }
+  ]
+
+  const handleGenerate = () => {
+    onGenerate(selectedLanguage)
+  }
+
+  return (
+    <Modal
+      open={open}
+      title={t('player.asr.prompt.title')}
+      onCancel={onLater}
+      footer={null}
+      width={500}
+      centered
+    >
+      <Flex vertical gap={20}>
+        <div>
+          <p style={{ fontSize: '16px', marginBottom: '8px' }}>{t('player.asr.prompt.message')}</p>
+          <p style={{ fontSize: '14px', color: theme === 'dark' ? '#999' : '#666' }}>
+            {t('player.asr.prompt.description')}
+          </p>
+        </div>
+
+        <Flex vertical gap={8}>
+          <label style={{ fontSize: '14px', fontWeight: 500 }}>
+            {t('player.asr.prompt.language')}
+          </label>
+          <Select
+            value={selectedLanguage}
+            onChange={setSelectedLanguage}
+            options={languageOptions}
+            style={{ width: '100%' }}
+            size="large"
+            placement="bottomLeft"
+          />
+        </Flex>
+
+        <div style={{ fontSize: '13px', color: theme === 'dark' ? '#999' : '#666' }}>
+          {t('player.asr.prompt.estimatedTime', { minutes: estimatedMinutes })}
+        </div>
+
+        <Flex gap={12} justify="flex-end">
+          <Button onClick={onLater}>{t('player.asr.prompt.later')}</Button>
+          <Button type="primary" onClick={handleGenerate}>
+            {t('player.asr.prompt.generate')}
+          </Button>
+        </Flex>
+      </Flex>
+    </Modal>
+  )
+}
+
+export default ASRSubtitlePrompt
diff --git a/src/renderer/src/pages/player/components/SubtitleListPanel.tsx b/src/renderer/src/pages/player/components/SubtitleListPanel.tsx
index 2edc0c99..65c095d9 100644
--- a/src/renderer/src/pages/player/components/SubtitleListPanel.tsx
+++ b/src/renderer/src/pages/player/components/SubtitleListPanel.tsx
@@ -37,6 +37,10 @@ interface SubtitleListPannelProps {
   hasEmbeddedSubtitles?: boolean
   /** 打开内置字幕选择对话框 */
   onOpenEmbeddedSubtitleSelector?: () => void
+  /** 打开 ASR 字幕生成对话框 */
+  onOpenASRGenerator?: () => void
+  /** 是否启用 ASR 功能（当 API key 已配置时） */
+  asrEnabled?: boolean
 }
 
 type SubtitleSearchResult = {
@@ -60,7 +64,9 @@ function SubtitleListPanel({
   emptyDescription,
   emptyActions,
   hasEmbeddedSubtitles,
-  onOpenEmbeddedSubtitleSelector
+  onOpenEmbeddedSubtitleSelector,
+  onOpenASRGenerator,
+  asrEnabled = false
 }: SubtitleListPannelProps) {
   const subtitles = useSubtitles()
   usePlayerEngine()
@@ -244,7 +250,7 @@ function SubtitleListPanel({
             </OptionCard>
 
             {/* AI 生成选项 */}
-            <OptionCard $disabled>
+            <OptionCard $disabled={!asrEnabled}>
               <OptionIconWrapper $color="var(--ant-color-warning, #faad14)">
                 <Sparkles size={20} />
               </OptionIconWrapper>
@@ -254,7 +260,15 @@ function SubtitleListPanel({
                   {t('player.subtitleList.empty.options.ai.description')}
                 </OptionDescription>
               </OptionContent>
-              <Button disabled>{t('player.subtitleList.empty.options.ai.action')}</Button>
+              <Button
+                type="primary"
+                disabled={!asrEnabled}
+                onClick={asrEnabled ? onOpenASRGenerator : undefined}
+              >
+                {asrEnabled
+                  ? t('player.subtitleList.empty.options.ai.actionEnabled')
+                  : t('player.subtitleList.empty.options.ai.action')}
+              </Button>
             </OptionCard>
           </OptionsGrid>
 
diff --git a/src/renderer/src/pages/player/components/index.ts b/src/renderer/src/pages/player/components/index.ts
index 69f2e07a..f4a21525 100644
--- a/src/renderer/src/pages/player/components/index.ts
+++ b/src/renderer/src/pages/player/components/index.ts
@@ -1,5 +1,7 @@
 import styled from 'styled-components'
 
+export { default as ASRProgressModal } from './ASRProgressModal'
+export { default as ASRSubtitlePrompt } from './ASRSubtitlePrompt'
 export { default as ControllerPanel } from './ControllerPanel'
 export { default as ImportSubtitleButton } from './ImportSubtitleButton'
 export { default as PlayerHeader } from './PlayerHeader'
diff --git a/src/renderer/src/pages/player/hooks/useASRSubtitle.ts b/src/renderer/src/pages/player/hooks/useASRSubtitle.ts
new file mode 100644
index 00000000..015bcb95
--- /dev/null
+++ b/src/renderer/src/pages/player/hooks/useASRSubtitle.ts
@@ -0,0 +1,219 @@
+import { loggerService } from '@logger'
+import { SubtitleLibraryService } from '@renderer/services/SubtitleLibrary'
+import { usePlayerSubtitlesStore } from '@renderer/state/stores/player-subtitles.store'
+import { ASRProgress, ASRProgressStage, ASRResult } from '@shared/types'
+import { message } from 'antd'
+import { useCallback, useEffect, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+
+const logger = loggerService.withContext('useASRSubtitle')
+
+export function useASRSubtitle(videoId: number | null, videoPath: string | null) {
+  const { t } = useTranslation()
+  const setSubtitles = usePlayerSubtitlesStore((s) => s.setSubtitles)
+
+  const [asrEnabled, setAsrEnabled] = useState(false)
+  const [showAsrPrompt, setShowAsrPrompt] = useState(false)
+  const [showAsrProgress, setShowAsrProgress] = useState(false)
+  const [asrProgress, setAsrProgress] = useState<ASRProgress>({
+    taskId: '',
+    stage: ASRProgressStage.Initializing,
+    percent: 0
+  })
+
+  // Check if ASR is enabled (API key configured)
+  useEffect(() => {
+    const checkAsrEnabled = async () => {
+      try {
+        const apiKey = await window.api.config.get('deepgramApiKey')
+        setAsrEnabled(!!apiKey && apiKey.trim().length > 0)
+      } catch (error) {
+        logger.error('Failed to check ASR enabled status', { error })
+        setAsrEnabled(false)
+      }
+    }
+
+    checkAsrEnabled()
+  }, [])
+
+  // Listen for ASR progress updates
+  useEffect(() => {
+    const handleProgress = (progress: ASRProgress) => {
+      // 添加空值检查
+      if (!progress || !progress.stage) {
+        logger.warn('Received invalid ASR progress', { progress })
+        return
+      }
+
+      setAsrProgress(progress)
+
+      // Auto-close progress modal when complete
+      if (progress.stage === ASRProgressStage.Complete) {
+        setTimeout(() => {
+          setShowAsrProgress(false)
+        }, 2000)
+      }
+    }
+
+    // 使用白名单方案的ASR进度订阅方法
+    const unsubscribe = window.api.asr.onProgress(handleProgress)
+    return unsubscribe
+  }, [])
+
+  const handleOpenASRGenerator = useCallback(() => {
+    setShowAsrPrompt(true)
+  }, [])
+
+  const handleGenerateSubtitle = useCallback(
+    async (language: string) => {
+      setShowAsrPrompt(false)
+
+      if (!videoPath || !videoId) {
+        message.error(t('player.asr.errors.unknown', { message: 'No video path' }))
+        return
+      }
+
+      try {
+        setShowAsrProgress(true)
+        setAsrProgress({
+          taskId: '',
+          stage: ASRProgressStage.ExtractingAudio,
+          percent: 0
+        })
+
+        const result: ASRResult = await window.api.asr.generate({
+          videoPath,
+          language,
+          videoId
+        })
+
+        // Success - reload subtitles from database
+        if (result.success && result.subtitleLibraryId) {
+          message.success(
+            t('player.asr.success.message', { count: result.stats?.subtitleCount || 0 })
+          )
+
+          // Reload subtitles from database
+          const svc = new SubtitleLibraryService()
+          const subtitles = await svc.getSubtitlesForVideo(videoId)
+          setSubtitles(subtitles)
+
+          logger.info('ASR 字幕加载成功', {
+            subtitleLibraryId: result.subtitleLibraryId,
+            count: subtitles.length
+          })
+        } else {
+          // Handle error response - prioritize errorCode over error message
+          const errorCode = result.errorCode
+          const errorMessage = result.error || 'Unknown error'
+
+          // Log the error details for debugging
+          if (errorCode === 'TASK_CANCELLED') {
+            logger.info('用户取消了 ASR 字幕生成', {
+              errorCode,
+              errorMessage,
+              fullResult: result
+            })
+          } else {
+            logger.error('ASR generation failed with error code', {
+              errorCode,
+              errorMessage,
+              fullResult: result
+            })
+          }
+
+          setShowAsrProgress(false)
+
+          // Map error codes to user-friendly messages
+          let translationKey: string
+          switch (errorCode) {
+            case 'NO_API_KEY':
+              translationKey = 'player.asr.errors.noApiKey'
+              break
+            case 'INVALID_API_KEY':
+              translationKey = 'player.asr.errors.invalidApiKey'
+              break
+            case 'QUOTA_EXCEEDED':
+              translationKey = 'player.asr.errors.apiQuotaExceeded'
+              break
+            case 'NETWORK_ERROR':
+              translationKey = 'player.asr.errors.networkError'
+              break
+            case 'AUDIO_EXTRACTION_FAILED':
+              translationKey = 'player.asr.errors.audioExtractionFailed'
+              break
+            case 'TASK_CANCELLED':
+              // Don't show error for user-initiated cancellation
+              return
+            case 'SUBTITLE_EXTRACTION_FAILED':
+              translationKey = 'player.asr.errors.transcriptionFailed'
+              break
+            case 'UNKNOWN_ERROR':
+            default:
+              // Fall back to string matching for legacy errors without codes
+              if (!errorCode) {
+                if (errorMessage.includes('API key') || errorMessage.includes('API Key')) {
+                  translationKey = 'player.asr.errors.invalidApiKey'
+                } else if (errorMessage.includes('quota') || errorMessage.includes('配额')) {
+                  translationKey = 'player.asr.errors.apiQuotaExceeded'
+                } else if (errorMessage.includes('network') || errorMessage.includes('网络')) {
+                  translationKey = 'player.asr.errors.networkError'
+                } else if (errorMessage.includes('audio')) {
+                  translationKey = 'player.asr.errors.audioExtractionFailed'
+                } else {
+                  translationKey = 'player.asr.errors.unknown'
+                }
+              } else {
+                translationKey = 'player.asr.errors.unknown'
+              }
+          }
+
+          // Show the error message
+          if (translationKey === 'player.asr.errors.unknown') {
+            message.error(t(translationKey, { message: errorMessage }))
+          } else {
+            message.error(t(translationKey))
+          }
+          return
+        }
+      } catch (error: any) {
+        // Handle unexpected errors (network failures, etc.)
+        logger.error('ASR generation failed with unexpected error', { error })
+        setShowAsrProgress(false)
+        message.error(
+          t('player.asr.errors.unknown', { message: error.message || error.toString() })
+        )
+      }
+    },
+    [videoPath, videoId, t, setSubtitles]
+  )
+
+  const handleCancelAsr = useCallback(async () => {
+    if (asrProgress?.taskId) {
+      try {
+        await window.api.asr.cancel(asrProgress.taskId)
+        setShowAsrProgress(false)
+      } catch (error) {
+        logger.error('Failed to cancel ASR task', { error })
+      }
+    } else {
+      // 如果没有 taskId，直接关闭进度窗口
+      setShowAsrProgress(false)
+    }
+  }, [asrProgress])
+
+  const handleAsrLater = useCallback(() => {
+    setShowAsrPrompt(false)
+  }, [])
+
+  return {
+    asrEnabled,
+    showAsrPrompt,
+    showAsrProgress,
+    asrProgress,
+    handleOpenASRGenerator,
+    handleGenerateSubtitle,
+    handleCancelAsr,
+    handleAsrLater
+  }
+}
diff --git a/src/renderer/src/pages/settings/ASRSettings.tsx b/src/renderer/src/pages/settings/ASRSettings.tsx
new file mode 100644
index 00000000..f21ffab8
--- /dev/null
+++ b/src/renderer/src/pages/settings/ASRSettings.tsx
@@ -0,0 +1,204 @@
+import { loggerService } from '@logger'
+import Selector from '@renderer/components/Selector'
+import { useTheme } from '@renderer/contexts'
+import { Button, Flex, Input, message } from 'antd'
+import { ExternalLink } from 'lucide-react'
+import { FC, useEffect, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+
+import {
+  HelpText,
+  SettingContainer,
+  SettingDescription,
+  SettingDivider,
+  SettingGroup,
+  SettingRow,
+  SettingRowTitle,
+  SettingTitle
+} from '.'
+
+const logger = loggerService.withContext('ASRSettings')
+
+const ASRSettings: FC = () => {
+  const { theme } = useTheme()
+  const { t } = useTranslation()
+
+  // ASR settings state
+  const [deepgramApiKey, setDeepgramApiKey] = useState<string>('')
+  const [asrDefaultLanguage, setAsrDefaultLanguage] = useState<string>('en')
+  const [asrModel, setAsrModel] = useState<string>('nova-3')
+  const [validatingApiKey, setValidatingApiKey] = useState(false)
+  const [apiKeyValid, setApiKeyValid] = useState<boolean | null>(null)
+
+  // ASR language options
+  const asrLanguageOptions = [
+    { value: 'auto', label: t('settings.asr.languages.auto') },
+    { value: 'en', label: t('settings.asr.languages.en') },
+    { value: 'zh', label: t('settings.asr.languages.zh') },
+    { value: 'ja', label: t('settings.asr.languages.ja') },
+    { value: 'es', label: t('settings.asr.languages.es') },
+    { value: 'fr', label: t('settings.asr.languages.fr') },
+    { value: 'de', label: t('settings.asr.languages.de') },
+    { value: 'ko', label: t('settings.asr.languages.ko') },
+    { value: 'ru', label: t('settings.asr.languages.ru') }
+  ]
+
+  // ASR model options
+  const asrModelOptions = [{ value: 'nova-3', label: t('settings.asr.model.nova3') }]
+
+  // Load ASR settings on mount
+  useEffect(() => {
+    const loadSettings = async () => {
+      try {
+        const apiKey = await window.api.config.get('deepgramApiKey')
+        const lang = await window.api.config.get('asrDefaultLanguage')
+        const model = await window.api.config.get('asrModel')
+
+        setDeepgramApiKey(apiKey || '')
+        setAsrDefaultLanguage(lang || 'en')
+        setAsrModel(model || 'nova-3')
+      } catch (error) {
+        logger.error('加载 ASR 设置失败', { error })
+      }
+    }
+
+    loadSettings()
+  }, [])
+
+  const handleApiKeyChange = (e: React.ChangeEvent<HTMLInputElement>) => {
+    setDeepgramApiKey(e.target.value)
+    setApiKeyValid(null) // Reset validation state
+  }
+
+  const handleApiKeySave = async () => {
+    try {
+      await window.api.config.set('deepgramApiKey', deepgramApiKey)
+      message.success(t('settings.asr.apiKey.saved') || '保存成功')
+    } catch (error) {
+      message.error(t('settings.asr.apiKey.saveFailed') || '保存失败')
+    }
+  }
+
+  const handleValidateApiKey = async () => {
+    if (!deepgramApiKey.trim()) {
+      message.warning(t('settings.asr.apiKey.invalid') || 'API Key 无效')
+      return
+    }
+
+    setValidatingApiKey(true)
+    try {
+      const isValid = await window.api.asr.validateApiKey(deepgramApiKey)
+      setApiKeyValid(isValid)
+      if (isValid) {
+        message.success(t('settings.asr.apiKey.valid') || 'API Key 有效')
+        // Save the validated key
+        await window.api.config.set('deepgramApiKey', deepgramApiKey)
+      } else {
+        message.error(t('settings.asr.apiKey.invalid') || 'API Key 无效')
+      }
+    } catch (error) {
+      setApiKeyValid(false)
+      message.error(t('settings.asr.apiKey.invalid') || 'API Key 无效')
+    } finally {
+      setValidatingApiKey(false)
+    }
+  }
+
+  const handleAsrLanguageChange = async (value: string) => {
+    setAsrDefaultLanguage(value)
+    try {
+      await window.api.config.set('asrDefaultLanguage', value)
+    } catch (error) {
+      logger.error('保存 ASR 语言失败', { error })
+    }
+  }
+
+  const handleAsrModelChange = async (value: string) => {
+    setAsrModel(value)
+    try {
+      await window.api.config.set('asrModel', value)
+    } catch (error) {
+      logger.error('保存 ASR 模型失败', { error })
+    }
+  }
+
+  const openDeepgramWebsite = () => {
+    window.api.openWebsite('https://console.deepgram.com/signup')
+  }
+
+  return (
+    <SettingContainer theme={theme}>
+      <SettingGroup theme={theme}>
+        <SettingTitle>{t('settings.asr.title')}</SettingTitle>
+        <SettingDescription>{t('settings.asr.description')}</SettingDescription>
+        <SettingDivider />
+
+        <SettingRow>
+          <SettingRowTitle>
+            <Flex vertical style={{ flex: 1 }}>
+              <span>{t('settings.asr.apiKey.label')}</span>
+              <HelpText>{t('settings.asr.apiKey.description')}</HelpText>
+            </Flex>
+          </SettingRowTitle>
+          <Flex vertical gap={8} style={{ flex: 1, maxWidth: '400px' }}>
+            <Flex gap={8}>
+              <Input.Password
+                value={deepgramApiKey}
+                onChange={handleApiKeyChange}
+                placeholder={t('settings.asr.apiKey.placeholder')}
+                onBlur={handleApiKeySave}
+                status={apiKeyValid === false ? 'error' : undefined}
+              />
+              <Button onClick={handleValidateApiKey} loading={validatingApiKey}>
+                {t('settings.asr.apiKey.validate')}
+              </Button>
+            </Flex>
+            <Button
+              type="link"
+              onClick={openDeepgramWebsite}
+              style={{ alignSelf: 'flex-start', padding: 0 }}
+            >
+              {t('settings.asr.apiKey.getKey')} <ExternalLink size={14} style={{ marginLeft: 4 }} />
+            </Button>
+          </Flex>
+        </SettingRow>
+
+        <SettingDivider />
+
+        <SettingRow>
+          <SettingRowTitle>
+            <Flex vertical>
+              <span>{t('settings.asr.defaultLanguage.label')}</span>
+              <HelpText>{t('settings.asr.defaultLanguage.description')}</HelpText>
+            </Flex>
+          </SettingRowTitle>
+          <Selector
+            size={14}
+            value={asrDefaultLanguage}
+            onChange={handleAsrLanguageChange}
+            options={asrLanguageOptions}
+          />
+        </SettingRow>
+
+        <SettingDivider />
+
+        <SettingRow>
+          <SettingRowTitle>
+            <Flex vertical>
+              <span>{t('settings.asr.model.label')}</span>
+              <HelpText>{t('settings.asr.model.description')}</HelpText>
+            </Flex>
+          </SettingRowTitle>
+          <Selector
+            size={14}
+            value={asrModel}
+            onChange={handleAsrModelChange}
+            options={asrModelOptions}
+          />
+        </SettingRow>
+      </SettingGroup>
+    </SettingContainer>
+  )
+}
+
+export default ASRSettings
diff --git a/src/renderer/src/pages/settings/SettingsPage.tsx b/src/renderer/src/pages/settings/SettingsPage.tsx
index 3f910be0..a470ae1f 100644
--- a/src/renderer/src/pages/settings/SettingsPage.tsx
+++ b/src/renderer/src/pages/settings/SettingsPage.tsx
@@ -1,5 +1,5 @@
 import { Navbar, NavbarCenter } from '@renderer/components/app/Navbar'
-import { Command, Eye, Info, Monitor, PlayCircle, Settings2 } from 'lucide-react'
+import { Command, Eye, Info, Mic, Monitor, PlayCircle, Settings2 } from 'lucide-react'
 import React from 'react'
 import { useTranslation } from 'react-i18next'
 import { Link, Route, Routes, useLocation } from 'react-router-dom'
@@ -7,6 +7,7 @@ import styled from 'styled-components'
 
 import AboutSettings from './AboutSettings'
 import { AppearanceSettings } from './AppearanceSettings'
+import ASRSettings from './ASRSettings'
 import GeneralSettings from './GeneralSettings'
 import PlaybackSettings from './PlaybackSettings'
 import PluginsSettings from './PluginsSettings'
@@ -52,6 +53,12 @@ export function SettingsPage(): React.JSX.Element {
               {t('settings.playback.title')}
             </MenuItem>
           </MenuItemLink>
+          <MenuItemLink to="/settings/asr">
+            <MenuItem className={isRoute('/settings/asr')}>
+              <Mic size={18} />
+              {t('settings.asr.title')}
+            </MenuItem>
+          </MenuItemLink>
           <MenuItemLink to="/settings/plugins">
             <MenuItem className={isRoute('/settings/plugins')}>
               <Monitor size={18} />
@@ -71,6 +78,7 @@ export function SettingsPage(): React.JSX.Element {
             <Route path="general" element={<GeneralSettings />} />
             <Route path="shortcut" element={<ShortcutSettings />} />
             <Route path="playback" element={<PlaybackSettings />} />
+            <Route path="asr" element={<ASRSettings />} />
             <Route path="plugins" element={<PluginsSettings />} />
             <Route path="about" element={<AboutSettings />} />
           </Routes>
diff --git a/src/renderer/src/pages/settings/index.tsx b/src/renderer/src/pages/settings/index.tsx
index 3db3a269..75714619 100644
--- a/src/renderer/src/pages/settings/index.tsx
+++ b/src/renderer/src/pages/settings/index.tsx
@@ -11,6 +11,8 @@ import { Divider } from 'antd'
 import Link from 'antd/es/typography/Link'
 import styled, { CSSProp } from 'styled-components'
 
+import { FONT_SIZES, FONT_WEIGHTS, SPACING } from '../../infrastructure/styles/theme'
+
 export const SettingContainer = styled.div<{ theme?: ThemeMode }>`
   display: flex;
   flex-direction: column;
@@ -99,3 +101,9 @@ export const SettingGroup = styled.div<{ theme?: ThemeMode; css?: CSSProp }>`
   padding: 16px;
   background: ${(props) => (props.theme === 'dark' ? '#00000010' : 'var(--color-background)')};
 `
+export const HelpText = styled.span`
+  font-size: ${FONT_SIZES.XS}px;
+  color: var(--color-text-3);
+  margin-top: ${SPACING.XXS}px;
+  font-weight: ${FONT_WEIGHTS.REGULAR};
+`
diff --git a/tests/SubtitleDictionaryLookup.test.tsx b/tests/SubtitleDictionaryLookup.test.tsx
index 88f697b9..86367bbb 100644
--- a/tests/SubtitleDictionaryLookup.test.tsx
+++ b/tests/SubtitleDictionaryLookup.test.tsx
@@ -67,8 +67,8 @@ vi.mock('antd', async () => {
           {children}
           {open && (
             <div role="tooltip" data-testid="dictionary-popover">
-              {title && <div>{title}</div>}
-              {content && <div>{content}</div>}
+              {title && <div data-testid="dictionary-popover-title">{title}</div>}
+              {content && <div data-testid="dictionary-popover-content-wrapper">{content}</div>}
             </div>
           )}
         </div>
@@ -85,6 +85,27 @@ vi.mock('antd', async () => {
 })
 
 describe('SubtitleOverlay dictionary lookup', () => {
+  // 辅助函数：等待弹窗显示并获取内容
+  const waitForPopover = async (timeout = 3000) => {
+    await waitFor(
+      () => {
+        const popover = screen.getByTestId('dictionary-popover')
+        expect(popover).toBeInTheDocument()
+      },
+      { timeout }
+    )
+
+    await waitFor(
+      () => {
+        const contentWrapper = screen.getByTestId('dictionary-popover-content-wrapper')
+        expect(contentWrapper).toBeInTheDocument()
+      },
+      { timeout }
+    )
+
+    return screen.getByTestId('dictionary-popover')
+  }
+
   beforeEach(() => {
     vi.clearAllMocks()
 
@@ -129,7 +150,9 @@ describe('SubtitleOverlay dictionary lookup', () => {
     fireEvent.click(word)
 
     await waitFor(() => expect(mockQuery).toHaveBeenCalledWith('hello'))
-    const popover = await screen.findByTestId('dictionary-popover')
+
+    // 使用辅助函数等待弹窗显示
+    const popover = await waitForPopover()
 
     // 检查单词标题
     expect(popover.textContent).toContain('hello')
@@ -167,12 +190,18 @@ describe('SubtitleOverlay dictionary lookup', () => {
     const word = screen.getByText('hello')
     fireEvent.click(word)
 
+    // 等待弹窗显示
+    await waitForPopover()
+
     // 等待数据加载完成后显示结果
-    await waitFor(() => {
-      const popoverContent = screen.getByTestId('dictionary-popover-content')
-      expect(popoverContent).toBeInTheDocument()
-      expect(popoverContent.textContent).toContain('你好')
-    })
+    await waitFor(
+      () => {
+        const popoverContent = screen.getByTestId('dictionary-popover-content')
+        expect(popoverContent).toBeInTheDocument()
+        expect(popoverContent.textContent).toContain('你好')
+      },
+      { timeout: 3000 }
+    )
   })
 
   it('shows error state when query fails', async () => {
@@ -190,9 +219,17 @@ describe('SubtitleOverlay dictionary lookup', () => {
 
     await waitFor(() => expect(mockQuery).toHaveBeenCalledWith('hello'))
 
-    // 检查错误状态
-    expect(screen.getByText('查询失败')).toBeInTheDocument()
-    expect(screen.getByText('网络错误')).toBeInTheDocument()
+    // 等待弹窗显示
+    await waitForPopover()
+
+    // 等待错误内容渲染完成
+    await waitFor(
+      () => {
+        expect(screen.getByText('查询失败')).toBeInTheDocument()
+        expect(screen.getByText('网络错误')).toBeInTheDocument()
+      },
+      { timeout: 3000 }
+    )
   })
 
   it('displays all definitions without limitation', async () => {
@@ -215,7 +252,9 @@ describe('SubtitleOverlay dictionary lookup', () => {
     fireEvent.click(word)
 
     await waitFor(() => expect(mockQuery).toHaveBeenCalledWith('hello'))
-    const popover = await screen.findByTestId('dictionary-popover')
+
+    // 使用辅助函数等待弹窗显示
+    const popover = await waitForPopover()
 
     // 检查显示所有释义
     expect(popover.textContent).toContain('释义 1')