argmaxinc · dbrkn · Dec 30, 2025 · Dec 30, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift b/Sources/apple-speechanalyzer-cli/SpeechAnalyzerCLI.swift
@@ -4,7 +4,7 @@
 // Usage:
 //   .build/release/apple-speechanalyzer-cli \
 //       --input-audio-path <path-to-audio> \
-//       --output-txt-path <path-to-output> [--locale en-US] [--live]
+//       --output-text-path <path-to-output> [--locale en-US]
 //
 // Requires: Xcode 26 beta command-line tools and macOS 26.0 runtime.
 
@@ -19,15 +19,17 @@ struct SpeechAnalyzerCLI {
         var inputPath: String?
         var outputPath: String?
         var localeIdentifier = Locale.current.identifier
-        var liveMode = false
+        var customPhrasesString: String?
+        var useSFSpeech = false // Use SFSpeechRecognizer instead of SpeechTranscriber
 
         var it = CommandLine.arguments.dropFirst().makeIterator()
         while let arg = it.next() {
             switch arg {
             case "--input-audio-path": inputPath  = it.next()
-            case "--output-txt-path":  outputPath = it.next()
+            case "--output-text-path": outputPath = it.next()
             case "--locale":           localeIdentifier = it.next() ?? localeIdentifier
-            case "--live":            liveMode = true
+            case "--custom-phrases":   customPhrasesString = it.next()
+            case "--sfspeech":         useSFSpeech = true
             default:                   CLIUsage.exit()
             }
         }
@@ -36,42 +38,100 @@ struct SpeechAnalyzerCLI {
             CLIUsage.exit()
         }
 
-        guard #available(macOS 26.0, *) else {
-            fputs("Error: SpeechAnalyzer requires macOS 26.0 or newer.\n", stderr)
-            Darwin.exit(EXIT_FAILURE)
+        // Parse phrases
+        let customPhrases: [String]? = customPhrasesString?.split(separator: ",").map {
+            String($0).trimmingCharacters(in: .whitespaces)
         }
 
-        let locale      = Locale(identifier: localeIdentifier)
-        let transcriber = SpeechTranscriber(
-            locale: locale,
-            preset: liveMode ? .progressiveLiveTranscription : .offlineTranscription
-        )
+        let locale = Locale(identifier: localeIdentifier)
+        let inputURL  = URL(fileURLWithPath: inPath)
+        let outputURL = URL(fileURLWithPath: outPath)
 
-        if !(await SpeechTranscriber.installedLocales).contains(locale) {
-            FileHandle.standardError.write(Data("Downloading speech model for \(localeIdentifier)…\n".utf8))
-            if let request = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
-                try await request.downloadAndInstall()
+        // If we have custom phrases and want them to work, use SFSpeechRecognizer
+        // contextualStrings only works with SFSpeechRecognizer (not SpeechTranscriber)
+        let hasCustomPhrases = customPhrases?.isEmpty == false
+        let shouldUseSFSpeech = useSFSpeech || hasCustomPhrases
+
+        var plainText = ""
+
+        if shouldUseSFSpeech {
+            // Use SFSpeechRecognizer
+            // This is the only way contextualStrings actually works
+            guard let recognizer = SFSpeechRecognizer(locale: locale) else {
+                fputs("Error: Could not create SFSpeechRecognizer for locale \(localeIdentifier)\n", stderr)
+                Darwin.exit(EXIT_FAILURE)
             }
-        }
 
-        let analyzer    = SpeechAnalyzer(modules: [transcriber])
-        let inputURL    = URL(fileURLWithPath: inPath)
-        let audioFile   = try AVAudioFile(forReading: inputURL)
-        let outputURL   = URL(fileURLWithPath: outPath)
+            if !recognizer.isAvailable {
+                fputs("Error: Speech recognizer not available\n", stderr)
+                Darwin.exit(EXIT_FAILURE)
+            }
 
-        async let attrTranscript: AttributedString = transcriber.results.reduce(into: AttributedString("")) { partial, result in
-            partial.append(result.text)
-            partial.append(AttributedString(" "))
-        }
+            let request = SFSpeechURLRecognitionRequest(url: inputURL)
+            request.requiresOnDeviceRecognition = false
+            request.addsPunctuation = true
+
+            if let phrases = customPhrases, !phrases.isEmpty {
+                request.contextualStrings = phrases
+            }
+
+            // Perform recognition using continuation for async/await compatibility
+            do {
+                plainText = try await withCheckedThrowingContinuation { continuation in
+                    recognizer.recognitionTask(with: request) { result, error in
+                        if let error = error {
+                            continuation.resume(throwing: error)
+                            return
+                        }
+                        if let result = result, result.isFinal {
+                            continuation.resume(returning: result.bestTranscription.formattedString)
+                        }
+                    }
+                }
+            } catch {
+                fputs("Error: \(error)\n", stderr)
+                Darwin.exit(EXIT_FAILURE)
+            }
 
-        if let last = try await analyzer.analyzeSequence(from: audioFile) {
-            try await analyzer.finalizeAndFinish(through: last)
         } else {
-            await analyzer.cancelAndFinishNow()
+            // Use new SpeechTranscriber (on-device, faster)
+            guard #available(macOS 26.0, *) else {
+                fputs("Error: SpeechTranscriber requires macOS 26.0 or newer.\n", stderr)
+                Darwin.exit(EXIT_FAILURE)
+            }
+
+            let transcriber = SpeechTranscriber(
+                locale: locale,
+                transcriptionOptions: [],
+                reportingOptions: [],
+                attributeOptions: []
+            )
+
+            if !(await SpeechTranscriber.installedLocales).contains(locale) {
+                FileHandle.standardError.write(Data("Downloading speech model for \(localeIdentifier)…\n".utf8))
+                if let request = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
+                    try await request.downloadAndInstall()
+                }
+            }
+
+            let analyzer    = SpeechAnalyzer(modules: [transcriber])
+            let audioFile   = try AVAudioFile(forReading: inputURL)
+
+            async let attrTranscript: AttributedString = transcriber.results.reduce(into: AttributedString("")) { partial, result in
+                partial.append(result.text)
+                partial.append(AttributedString(" "))
+            }
+
+            if let last = try await analyzer.analyzeSequence(from: audioFile) {
+                try await analyzer.finalizeAndFinish(through: last)
+            } else {
+                await analyzer.cancelAndFinishNow()
+            }
+
+            plainText = String((try await attrTranscript).characters)
+                .trimmingCharacters(in: .whitespacesAndNewlines)
         }
 
-        let plainText = String((try await attrTranscript).characters)
-            .trimmingCharacters(in: .whitespacesAndNewlines)
         try plainText.write(to: outputURL, atomically: true, encoding: .utf8)
         print("✅ Saved transcript to \(outputURL.path)")
     }
@@ -81,11 +141,11 @@ enum CLIUsage {
     static func exit() -> Never {
         let prog = (CommandLine.arguments.first as NSString?)?.lastPathComponent ?? "apple-speechanalyzer-cli"
         fputs("""
-Usage: \(prog) --input-audio-path <file> --output-txt-path <file> [--locale <id>] [--live]
+Usage: \(prog) --input-audio-path <file> --output-text-path <file> [--locale <id>] [--sfspeech] [--custom-phrases <phrases>]
 
 Example:
-  .build/release/\(prog) --input-audio-path demo.flac \
-                         --output-txt-path demo.txt \
+  .build/release/\(prog) --input-audio-path demo.flac \\
+                         --output-txt-path demo.txt \\
                          --locale en-US
 
 """, stderr)