From 7d6f76f622f7420308f50423979f9e89482df260 Mon Sep 17 00:00:00 2001 From: dberkin1 Date: Tue, 30 Dec 2025 13:15:44 +0300 Subject: [PATCH 1/5] feat: Add custom vocabulary support via AnalysisContext - Fix SpeechTranscriber initialization for macOS 26 release (use transcriptionOptions/reportingOptions/attributeOptions instead of preset) - Add --custom-phrases CLI argument for custom vocabulary boosting - Use AnalysisContext.contextualStrings with vocabulary tag - Use file-based SpeechAnalyzer initializer Reference: https://developer.apple.com/documentation/speech/analysiscontext --- .../SpeechAnalyzerCLI.swift | 91 ++++++++++++++----- 1 file changed, 66 insertions(+), 25 deletions(-) diff --git a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift index 6979682..2fecd5a 100644 --- a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift +++ b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift @@ -4,9 +4,12 @@ // Usage: // .build/release/apple-speechanalyzer-cli \ // --input-audio-path \ -// --output-txt-path [--locale en-US] [--live] +// --output-text-path \ +// [--locale en-US] \ +// [--custom-phrases "word1,word2,phrase one"] // -// Requires: Xcode 26 beta command-line tools and macOS 26.0 runtime. +// Requires: Xcode 26 and macOS 26.0 runtime. +// Reference: https://developer.apple.com/documentation/speech/analysiscontext import Foundation import AVFAudio @@ -19,15 +22,15 @@ struct SpeechAnalyzerCLI { var inputPath: String? var outputPath: String? var localeIdentifier = Locale.current.identifier - var liveMode = false + var customPhrasesString: String? var it = CommandLine.arguments.dropFirst().makeIterator() while let arg = it.next() { switch arg { case "--input-audio-path": inputPath = it.next() - case "--output-txt-path": outputPath = it.next() + case "--output-text-path": outputPath = it.next() case "--locale": localeIdentifier = it.next() ?? localeIdentifier - case "--live": liveMode = true + case "--custom-phrases": customPhrasesString = it.next() default: CLIUsage.exit() } } @@ -41,36 +44,64 @@ struct SpeechAnalyzerCLI { Darwin.exit(EXIT_FAILURE) } - let locale = Locale(identifier: localeIdentifier) + // Parse custom phrases from comma-separated string + let customPhrases: [String]? = customPhrasesString?.split(separator: ",").map { + String($0).trimmingCharacters(in: .whitespaces) + } + + let locale = Locale(identifier: localeIdentifier) + + // Use the correct SpeechTranscriber initializer for macOS 26 let transcriber = SpeechTranscriber( locale: locale, - preset: liveMode ? .progressiveLiveTranscription : .offlineTranscription + transcriptionOptions: [], + reportingOptions: [.volatileResults], + attributeOptions: [.audioTimeRange] ) - if !(await SpeechTranscriber.installedLocales).contains(locale) { + // Check if locale is installed, download if needed + let installedLocales = await SpeechTranscriber.installedLocales + if !installedLocales.contains(where: { $0.identifier == locale.identifier }) { FileHandle.standardError.write(Data("Downloading speech model for \(localeIdentifier)…\n".utf8)) if let request = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) { try await request.downloadAndInstall() } } - let analyzer = SpeechAnalyzer(modules: [transcriber]) - let inputURL = URL(fileURLWithPath: inPath) - let audioFile = try AVAudioFile(forReading: inputURL) - let outputURL = URL(fileURLWithPath: outPath) + let inputURL = URL(fileURLWithPath: inPath) + let audioFile = try AVAudioFile(forReading: inputURL) + let outputURL = URL(fileURLWithPath: outPath) - async let attrTranscript: AttributedString = transcriber.results.reduce(into: AttributedString("")) { partial, result in - partial.append(result.text) - partial.append(AttributedString(" ")) - } + // Create AnalysisContext with custom phrases if provided + // Reference: https://developer.apple.com/documentation/speech/analysiscontext + let context: AnalysisContext = { + var ctx = AnalysisContext() + if let phrases = customPhrases, !phrases.isEmpty { + // Use the vocabulary tag for custom phrases + let vocabTag = AnalysisContext.ContextualStringsTag(rawValue: "vocabulary") + ctx.contextualStrings = [vocabTag: phrases] + FileHandle.standardError.write(Data("Using custom phrases: \(phrases)\n".utf8)) + } + return ctx + }() - if let last = try await analyzer.analyzeSequence(from: audioFile) { - try await analyzer.finalizeAndFinish(through: last) - } else { - await analyzer.cancelAndFinishNow() + // Use the file-based SpeechAnalyzer initializer + // The analyzer processes the audio file and feeds results to transcriber.results + _ = try await SpeechAnalyzer( + inputAudioFile: audioFile, + modules: [transcriber], + analysisContext: context, + finishAfterFile: true + ) + + // Collect transcription results + var transcript = AttributedString("") + for try await result in transcriber.results { + transcript.append(result.text) + transcript.append(AttributedString(" ")) } - let plainText = String((try await attrTranscript).characters) + let plainText = String(transcript.characters) .trimmingCharacters(in: .whitespacesAndNewlines) try plainText.write(to: outputURL, atomically: true, encoding: .utf8) print("✅ Saved transcript to \(outputURL.path)") @@ -81,12 +112,22 @@ enum CLIUsage { static func exit() -> Never { let prog = (CommandLine.arguments.first as NSString?)?.lastPathComponent ?? "apple-speechanalyzer-cli" fputs(""" -Usage: \(prog) --input-audio-path --output-txt-path [--locale ] [--live] +Usage: \(prog) --input-audio-path --output-text-path [--locale ] [--custom-phrases ] + +Options: + --input-audio-path Path to input audio file (required) + --output-text-path Path to output transcript file (required) + --locale Locale identifier for transcription (default: system locale) + --custom-phrases Comma-separated list of custom vocabulary phrases to boost Example: - .build/release/\(prog) --input-audio-path demo.flac \ - --output-txt-path demo.txt \ - --locale en-US + .build/release/\(prog) --input-audio-path demo.flac \\ + --output-text-path demo.txt \\ + --locale en-US \\ + --custom-phrases "Argmax,WhisperKit,SpeakerKit" + +Custom phrases improve recognition accuracy for domain-specific terms. +Reference: https://developer.apple.com/documentation/speech/analysiscontext """, stderr) Darwin.exit(EXIT_FAILURE) From b5a40f3bc08743a46caec4383f38119ce4f8e087 Mon Sep 17 00:00:00 2001 From: dberkin1 Date: Tue, 30 Dec 2025 18:50:04 +0300 Subject: [PATCH 2/5] Add SFSpeechRecognizer support for contextualStrings - contextualStrings only works with SFSpeechRecognizer + server-based recognition - SpeechTranscriber (macOS 26) ignores contextualStrings - Auto-switch to SFSpeechRecognizer when --custom-phrases is provided - Add --server flag to force server-based recognition - Add --debug flag for troubleshooting --- .../SpeechAnalyzerCLI.swift | 208 ++++++++++-------- 1 file changed, 115 insertions(+), 93 deletions(-) diff --git a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift index 2fecd5a..da423ab 100644 --- a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift +++ b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift @@ -1,16 +1,3 @@ - -// Apple Speech Analyzer CLI (macOS 26.0+) -// Build with: swift build -c release -// Usage: -// .build/release/apple-speechanalyzer-cli \ -// --input-audio-path \ -// --output-text-path \ -// [--locale en-US] \ -// [--custom-phrases "word1,word2,phrase one"] -// -// Requires: Xcode 26 and macOS 26.0 runtime. -// Reference: https://developer.apple.com/documentation/speech/analysiscontext - import Foundation import AVFAudio import Speech @@ -23,6 +10,8 @@ struct SpeechAnalyzerCLI { var outputPath: String? var localeIdentifier = Locale.current.identifier var customPhrasesString: String? + var debug = false + var useServerRecognition = false // Default to on-device with SpeechTranscriber var it = CommandLine.arguments.dropFirst().makeIterator() while let arg = it.next() { @@ -31,105 +20,138 @@ struct SpeechAnalyzerCLI { case "--output-text-path": outputPath = it.next() case "--locale": localeIdentifier = it.next() ?? localeIdentifier case "--custom-phrases": customPhrasesString = it.next() - default: CLIUsage.exit() + case "--debug": debug = true + case "--server": useServerRecognition = true // Use SFSpeechRecognizer with server + default: break } } guard let inPath = inputPath, let outPath = outputPath else { - CLIUsage.exit() + fputs("Usage: cli --input-audio-path --output-text-path [--custom-phrases

] [--server] [--debug]\n", stderr) + Darwin.exit(1) } - guard #available(macOS 26.0, *) else { - fputs("Error: SpeechAnalyzer requires macOS 26.0 or newer.\n", stderr) - Darwin.exit(EXIT_FAILURE) - } - - // Parse custom phrases from comma-separated string + // Parse phrases let customPhrases: [String]? = customPhrasesString?.split(separator: ",").map { String($0).trimmingCharacters(in: .whitespaces) } - let locale = Locale(identifier: localeIdentifier) - - // Use the correct SpeechTranscriber initializer for macOS 26 - let transcriber = SpeechTranscriber( - locale: locale, - transcriptionOptions: [], - reportingOptions: [.volatileResults], - attributeOptions: [.audioTimeRange] - ) - - // Check if locale is installed, download if needed - let installedLocales = await SpeechTranscriber.installedLocales - if !installedLocales.contains(where: { $0.identifier == locale.identifier }) { - FileHandle.standardError.write(Data("Downloading speech model for \(localeIdentifier)…\n".utf8)) - if let request = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) { - try await request.downloadAndInstall() - } + if debug { + fputs("=== DEBUG: Input Parameters ===\n", stderr) + fputs(" Input: \(inPath)\n", stderr) + fputs(" Output: \(outPath)\n", stderr) + fputs(" Locale: \(localeIdentifier)\n", stderr) + fputs(" Custom phrases: \(customPhrases ?? [])\n", stderr) + fputs(" Use server: \(useServerRecognition)\n", stderr) } + let locale = Locale(identifier: localeIdentifier) let inputURL = URL(fileURLWithPath: inPath) - let audioFile = try AVAudioFile(forReading: inputURL) let outputURL = URL(fileURLWithPath: outPath) - // Create AnalysisContext with custom phrases if provided - // Reference: https://developer.apple.com/documentation/speech/analysiscontext - let context: AnalysisContext = { - var ctx = AnalysisContext() + // If we have custom phrases and want them to work, use server-based recognition + // contextualStrings only works with SFSpeechRecognizer + server (not on-device) + let hasCustomPhrases = customPhrases?.isEmpty == false + let shouldUseServer = useServerRecognition || hasCustomPhrases + + var plainText = "" + + if shouldUseServer { + // Use SFSpeechRecognizer with server-based recognition + // This is the only way contextualStrings actually works + if debug { + fputs("\n=== Using SFSpeechRecognizer (server-based) for contextualStrings support ===\n", stderr) + } + + guard let recognizer = SFSpeechRecognizer(locale: locale) else { + fputs("Error: Could not create SFSpeechRecognizer for locale \(localeIdentifier)\n", stderr) + Darwin.exit(1) + } + + if !recognizer.isAvailable { + fputs("Error: Speech recognizer not available\n", stderr) + Darwin.exit(1) + } + + let request = SFSpeechURLRecognitionRequest(url: inputURL) + request.requiresOnDeviceRecognition = false // CRITICAL: Use server for contextualStrings + request.addsPunctuation = true + if let phrases = customPhrases, !phrases.isEmpty { - // Use the vocabulary tag for custom phrases - let vocabTag = AnalysisContext.ContextualStringsTag(rawValue: "vocabulary") - ctx.contextualStrings = [vocabTag: phrases] - FileHandle.standardError.write(Data("Using custom phrases: \(phrases)\n".utf8)) + request.contextualStrings = phrases + if debug { + fputs(" Set contextualStrings: \(phrases)\n", stderr) + } + } + + // Perform recognition using continuation for async/await compatibility + do { + plainText = try await withCheckedThrowingContinuation { continuation in + recognizer.recognitionTask(with: request) { result, error in + if let error = error { + continuation.resume(throwing: error) + return + } + if let result = result, result.isFinal { + continuation.resume(returning: result.bestTranscription.formattedString) + } + } + } + } catch { + fputs("Error: \(error)\n", stderr) + Darwin.exit(1) + } + + } else { + // Use new SpeechTranscriber (on-device, faster, but contextualStrings don't work) + guard #available(macOS 26.0, *) else { + fputs("Error: SpeechTranscriber requires macOS 26.0+\n", stderr) + Darwin.exit(1) + } + + if debug { + fputs("\n=== Using SpeechTranscriber (on-device) ===\n", stderr) } - return ctx - }() - - // Use the file-based SpeechAnalyzer initializer - // The analyzer processes the audio file and feeds results to transcriber.results - _ = try await SpeechAnalyzer( - inputAudioFile: audioFile, - modules: [transcriber], - analysisContext: context, - finishAfterFile: true - ) - - // Collect transcription results - var transcript = AttributedString("") - for try await result in transcriber.results { - transcript.append(result.text) - transcript.append(AttributedString(" ")) + + let transcriber = SpeechTranscriber( + locale: locale, + transcriptionOptions: [], + reportingOptions: [], + attributeOptions: [] + ) + + let installedLocales = await SpeechTranscriber.installedLocales + if !installedLocales.contains(where: { $0.identifier == locale.identifier }) { + if let request = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) { + try await request.downloadAndInstall() + } + } + + let audioFile = try AVAudioFile(forReading: inputURL) + + // Note: context is NOT used here because it doesn't work with SpeechTranscriber + let _ = try await SpeechAnalyzer( + inputAudioFile: audioFile, + modules: [transcriber], + finishAfterFile: true + ) + + // Collect results + var transcript = AttributedString("") + for try await result in transcriber.results { + transcript.append(result.text) + transcript.append(AttributedString(" ")) + } + + plainText = String(transcript.characters).trimmingCharacters(in: .whitespacesAndNewlines) } - let plainText = String(transcript.characters) - .trimmingCharacters(in: .whitespacesAndNewlines) - try plainText.write(to: outputURL, atomically: true, encoding: .utf8) - print("✅ Saved transcript to \(outputURL.path)") - } -} + if debug { + fputs("\n=== DEBUG: Final Transcript ===\n", stderr) + fputs(" \(plainText)\n", stderr) + } -enum CLIUsage { - static func exit() -> Never { - let prog = (CommandLine.arguments.first as NSString?)?.lastPathComponent ?? "apple-speechanalyzer-cli" - fputs(""" -Usage: \(prog) --input-audio-path --output-text-path [--locale ] [--custom-phrases ] - -Options: - --input-audio-path Path to input audio file (required) - --output-text-path Path to output transcript file (required) - --locale Locale identifier for transcription (default: system locale) - --custom-phrases Comma-separated list of custom vocabulary phrases to boost - -Example: - .build/release/\(prog) --input-audio-path demo.flac \\ - --output-text-path demo.txt \\ - --locale en-US \\ - --custom-phrases "Argmax,WhisperKit,SpeakerKit" - -Custom phrases improve recognition accuracy for domain-specific terms. -Reference: https://developer.apple.com/documentation/speech/analysiscontext - -""", stderr) - Darwin.exit(EXIT_FAILURE) + try plainText.write(to: outputURL, atomically: true, encoding: .utf8) + print("OK") } } From f09a83396caf996e2704c0cd2d9a38cc2cd5cb53 Mon Sep 17 00:00:00 2001 From: dberkin1 Date: Wed, 31 Dec 2025 15:28:03 +0300 Subject: [PATCH 3/5] Refactor --- .../SpeechAnalyzerCLI.swift | 125 +++++++++--------- 1 file changed, 61 insertions(+), 64 deletions(-) diff --git a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift index da423ab..fd509c2 100644 --- a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift +++ b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift @@ -1,3 +1,13 @@ + +// Apple Speech Analyzer CLI (macOS 26.0+) +// Build with: swift build -c release +// Usage: +// .build/release/apple-speechanalyzer-cli \ +// --input-audio-path \ +// --output-txt-path [--locale en-US] [--live] +// +// Requires: Xcode 26 beta command-line tools and macOS 26.0 runtime. + import Foundation import AVFAudio import Speech @@ -9,26 +19,25 @@ struct SpeechAnalyzerCLI { var inputPath: String? var outputPath: String? var localeIdentifier = Locale.current.identifier + var liveMode = false var customPhrasesString: String? - var debug = false - var useServerRecognition = false // Default to on-device with SpeechTranscriber + var useSFSpeech = false // Use SFSpeechRecognizer instead of SpeechTranscriber var it = CommandLine.arguments.dropFirst().makeIterator() while let arg = it.next() { switch arg { case "--input-audio-path": inputPath = it.next() - case "--output-text-path": outputPath = it.next() + case "--output-txt-path": outputPath = it.next() case "--locale": localeIdentifier = it.next() ?? localeIdentifier + case "--live": liveMode = true case "--custom-phrases": customPhrasesString = it.next() - case "--debug": debug = true - case "--server": useServerRecognition = true // Use SFSpeechRecognizer with server - default: break + case "--sfspeech": useSFSpeech = true + default: CLIUsage.exit() } } guard let inPath = inputPath, let outPath = outputPath else { - fputs("Usage: cli --input-audio-path --output-text-path [--custom-phrases

] [--server] [--debug]\n", stderr) - Darwin.exit(1) + CLIUsage.exit() } // Parse phrases @@ -36,52 +45,36 @@ struct SpeechAnalyzerCLI { String($0).trimmingCharacters(in: .whitespaces) } - if debug { - fputs("=== DEBUG: Input Parameters ===\n", stderr) - fputs(" Input: \(inPath)\n", stderr) - fputs(" Output: \(outPath)\n", stderr) - fputs(" Locale: \(localeIdentifier)\n", stderr) - fputs(" Custom phrases: \(customPhrases ?? [])\n", stderr) - fputs(" Use server: \(useServerRecognition)\n", stderr) - } - let locale = Locale(identifier: localeIdentifier) let inputURL = URL(fileURLWithPath: inPath) let outputURL = URL(fileURLWithPath: outPath) - // If we have custom phrases and want them to work, use server-based recognition - // contextualStrings only works with SFSpeechRecognizer + server (not on-device) + // If we have custom phrases and want them to work, use SFSpeechRecognizer + // contextualStrings only works with SFSpeechRecognizer (not SpeechTranscriber) let hasCustomPhrases = customPhrases?.isEmpty == false - let shouldUseServer = useServerRecognition || hasCustomPhrases + let shouldUseSFSpeech = useSFSpeech || hasCustomPhrases var plainText = "" - if shouldUseServer { - // Use SFSpeechRecognizer with server-based recognition + if shouldUseSFSpeech { + // Use SFSpeechRecognizer // This is the only way contextualStrings actually works - if debug { - fputs("\n=== Using SFSpeechRecognizer (server-based) for contextualStrings support ===\n", stderr) - } - guard let recognizer = SFSpeechRecognizer(locale: locale) else { fputs("Error: Could not create SFSpeechRecognizer for locale \(localeIdentifier)\n", stderr) - Darwin.exit(1) + Darwin.exit(EXIT_FAILURE) } if !recognizer.isAvailable { fputs("Error: Speech recognizer not available\n", stderr) - Darwin.exit(1) + Darwin.exit(EXIT_FAILURE) } let request = SFSpeechURLRecognitionRequest(url: inputURL) - request.requiresOnDeviceRecognition = false // CRITICAL: Use server for contextualStrings + request.requiresOnDeviceRecognition = false request.addsPunctuation = true if let phrases = customPhrases, !phrases.isEmpty { request.contextualStrings = phrases - if debug { - fputs(" Set contextualStrings: \(phrases)\n", stderr) - } } // Perform recognition using continuation for async/await compatibility @@ -99,59 +92,63 @@ struct SpeechAnalyzerCLI { } } catch { fputs("Error: \(error)\n", stderr) - Darwin.exit(1) + Darwin.exit(EXIT_FAILURE) } } else { - // Use new SpeechTranscriber (on-device, faster, but contextualStrings don't work) + // Use new SpeechTranscriber (on-device, faster) guard #available(macOS 26.0, *) else { - fputs("Error: SpeechTranscriber requires macOS 26.0+\n", stderr) - Darwin.exit(1) - } - - if debug { - fputs("\n=== Using SpeechTranscriber (on-device) ===\n", stderr) + fputs("Error: SpeechTranscriber requires macOS 26.0 or newer.\n", stderr) + Darwin.exit(EXIT_FAILURE) } let transcriber = SpeechTranscriber( locale: locale, - transcriptionOptions: [], - reportingOptions: [], - attributeOptions: [] + preset: liveMode ? .progressiveLiveTranscription : .offlineTranscription ) - let installedLocales = await SpeechTranscriber.installedLocales - if !installedLocales.contains(where: { $0.identifier == locale.identifier }) { + if !(await SpeechTranscriber.installedLocales).contains(locale) { + FileHandle.standardError.write(Data("Downloading speech model for \(localeIdentifier)…\n".utf8)) if let request = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) { try await request.downloadAndInstall() } } - let audioFile = try AVAudioFile(forReading: inputURL) + let analyzer = SpeechAnalyzer(modules: [transcriber]) + let audioFile = try AVAudioFile(forReading: inputURL) - // Note: context is NOT used here because it doesn't work with SpeechTranscriber - let _ = try await SpeechAnalyzer( - inputAudioFile: audioFile, - modules: [transcriber], - finishAfterFile: true - ) - - // Collect results - var transcript = AttributedString("") - for try await result in transcriber.results { - transcript.append(result.text) - transcript.append(AttributedString(" ")) + async let attrTranscript: AttributedString = transcriber.results.reduce(into: AttributedString("")) { partial, result in + partial.append(result.text) + partial.append(AttributedString(" ")) } - plainText = String(transcript.characters).trimmingCharacters(in: .whitespacesAndNewlines) - } + if let last = try await analyzer.analyzeSequence(from: audioFile) { + try await analyzer.finalizeAndFinish(through: last) + } else { + await analyzer.cancelAndFinishNow() + } - if debug { - fputs("\n=== DEBUG: Final Transcript ===\n", stderr) - fputs(" \(plainText)\n", stderr) + plainText = String((try await attrTranscript).characters) + .trimmingCharacters(in: .whitespacesAndNewlines) } try plainText.write(to: outputURL, atomically: true, encoding: .utf8) - print("OK") + print("✅ Saved transcript to \(outputURL.path)") + } +} + +enum CLIUsage { + static func exit() -> Never { + let prog = (CommandLine.arguments.first as NSString?)?.lastPathComponent ?? "apple-speechanalyzer-cli" + fputs(""" +Usage: \(prog) --input-audio-path --output-txt-path [--locale ] [--live] [--sfspeech] [--custom-phrases ] + +Example: + .build/release/\(prog) --input-audio-path demo.flac \\ + --output-txt-path demo.txt \\ + --locale en-US + +""", stderr) + Darwin.exit(EXIT_FAILURE) } } From d4b79cd030e66158c63e7dba802bcf07c876a0f2 Mon Sep 17 00:00:00 2001 From: dberkin1 Date: Wed, 31 Dec 2025 17:20:41 +0300 Subject: [PATCH 4/5] remove livemode --- .../apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift index fd509c2..829e9f4 100644 --- a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift +++ b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift @@ -4,7 +4,7 @@ // Usage: // .build/release/apple-speechanalyzer-cli \ // --input-audio-path \ -// --output-txt-path [--locale en-US] [--live] +// --output-txt-path [--locale en-US] // // Requires: Xcode 26 beta command-line tools and macOS 26.0 runtime. @@ -19,7 +19,6 @@ struct SpeechAnalyzerCLI { var inputPath: String? var outputPath: String? var localeIdentifier = Locale.current.identifier - var liveMode = false var customPhrasesString: String? var useSFSpeech = false // Use SFSpeechRecognizer instead of SpeechTranscriber @@ -29,7 +28,6 @@ struct SpeechAnalyzerCLI { case "--input-audio-path": inputPath = it.next() case "--output-txt-path": outputPath = it.next() case "--locale": localeIdentifier = it.next() ?? localeIdentifier - case "--live": liveMode = true case "--custom-phrases": customPhrasesString = it.next() case "--sfspeech": useSFSpeech = true default: CLIUsage.exit() @@ -104,7 +102,9 @@ struct SpeechAnalyzerCLI { let transcriber = SpeechTranscriber( locale: locale, - preset: liveMode ? .progressiveLiveTranscription : .offlineTranscription + transcriptionOptions: [], + reportingOptions: [], + attributeOptions: [] ) if !(await SpeechTranscriber.installedLocales).contains(locale) { @@ -141,7 +141,7 @@ enum CLIUsage { static func exit() -> Never { let prog = (CommandLine.arguments.first as NSString?)?.lastPathComponent ?? "apple-speechanalyzer-cli" fputs(""" -Usage: \(prog) --input-audio-path --output-txt-path [--locale ] [--live] [--sfspeech] [--custom-phrases ] +Usage: \(prog) --input-audio-path --output-txt-path [--locale ] [--sfspeech] [--custom-phrases ] Example: .build/release/\(prog) --input-audio-path demo.flac \\ From f47d46b83e79f5f687075de392f6689c758b67d7 Mon Sep 17 00:00:00 2001 From: dberkin1 Date: Wed, 31 Dec 2025 17:23:32 +0300 Subject: [PATCH 5/5] fix bug --- Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift index 829e9f4..ad942d9 100644 --- a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift +++ b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift @@ -4,7 +4,7 @@ // Usage: // .build/release/apple-speechanalyzer-cli \ // --input-audio-path \ -// --output-txt-path [--locale en-US] +// --output-text-path [--locale en-US] // // Requires: Xcode 26 beta command-line tools and macOS 26.0 runtime. @@ -26,7 +26,7 @@ struct SpeechAnalyzerCLI { while let arg = it.next() { switch arg { case "--input-audio-path": inputPath = it.next() - case "--output-txt-path": outputPath = it.next() + case "--output-text-path": outputPath = it.next() case "--locale": localeIdentifier = it.next() ?? localeIdentifier case "--custom-phrases": customPhrasesString = it.next() case "--sfspeech": useSFSpeech = true @@ -141,7 +141,7 @@ enum CLIUsage { static func exit() -> Never { let prog = (CommandLine.arguments.first as NSString?)?.lastPathComponent ?? "apple-speechanalyzer-cli" fputs(""" -Usage: \(prog) --input-audio-path --output-txt-path [--locale ] [--sfspeech] [--custom-phrases ] +Usage: \(prog) --input-audio-path --output-text-path [--locale ] [--sfspeech] [--custom-phrases ] Example: .build/release/\(prog) --input-audio-path demo.flac \\