From 3c7876690779e114635ded8d40dd8b198dbea5ca Mon Sep 17 00:00:00 2001 From: stego-z Date: Sat, 4 Apr 2026 09:35:36 +0800 Subject: [PATCH 1/2] Fix Chinese input and Apple ASR locale selection --- Sources/Fluid/Persistence/SettingsStore.swift | 31 ++++++-- .../AppleSpeechAnalyzerProvider.swift | 71 ++++++++++++++----- .../Fluid/Services/AppleSpeechProvider.swift | 19 +++-- .../Fluid/Services/SpeechLocaleResolver.swift | 64 +++++++++++++++++ Sources/Fluid/Services/TypingService.swift | 35 ++++++++- .../UI/AISettingsView+SpeechRecognition.swift | 17 ++++- Sources/Fluid/UI/WelcomeView.swift | 9 +++ 7 files changed, 215 insertions(+), 31 deletions(-) create mode 100644 Sources/Fluid/Services/SpeechLocaleResolver.swift diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index cfafda6b..a50149e7 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2493,7 +2493,10 @@ final class SettingsStore: ObservableObject { /// Default model for the current architecture static var defaultModel: SpeechModel { - CPUArchitecture.isAppleSilicon ? .parakeetTDT : .whisperBase + if SpeechLocaleResolver.prefersChineseRecognition { + return .whisperBase + } + return CPUArchitecture.isAppleSilicon ? .parakeetTDT : .whisperBase } // MARK: - UI Card Metadata @@ -3168,7 +3171,8 @@ extension SettingsStore { case polish = "pl" case portuguese = "pt" case vietnamese = "vi" - case mandarinChinese = "zh" + case simplifiedChinese = "zh-Hans" + case traditionalChinese = "zh-Hant" var id: String { self.rawValue } @@ -3187,11 +3191,19 @@ extension SettingsStore { case .polish: return "Polish" case .portuguese: return "Portuguese" case .vietnamese: return "Vietnamese" - case .mandarinChinese: return "Mandarin Chinese" + case .simplifiedChinese: return "Simplified Chinese" + case .traditionalChinese: return "Traditional Chinese" } } - var tokenString: String { "<|\(self.rawValue)|>" } + var tokenString: String { + switch self { + case .simplifiedChinese, .traditionalChinese: + return "<|zh|>" + default: + return "<|\(self.rawValue)|>" + } + } } // MARK: - Unified Speech Model Selection @@ -3218,6 +3230,14 @@ extension SettingsStore { if model.requiresMacOS26, #unavailable(macOS 26.0) { return .whisperBase } + if SpeechLocaleResolver.prefersChineseRecognition { + switch model { + case .parakeetTDT, .parakeetTDTv2, .parakeetRealtime: + return .whisperBase + default: + break + } + } return model } @@ -3237,6 +3257,9 @@ extension SettingsStore { { return language } + if self.defaults.string(forKey: Keys.selectedCohereLanguage) == "zh" { + return .traditionalChinese + } return .english } set { diff --git a/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift b/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift index 0835d902..3f3c4b73 100644 --- a/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift +++ b/Sources/Fluid/Services/AppleSpeechAnalyzerProvider.swift @@ -34,31 +34,63 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { init() {} + private func resolvedRecognitionLocale() async throws -> Locale { + let preferredLocale = SpeechLocaleResolver.preferredRecognitionLocale() + + if let supportedLocale = await SpeechTranscriber.supportedLocale(equivalentTo: preferredLocale) { + let preferredID = preferredLocale.identifier(.bcp47) + let resolvedID = supportedLocale.identifier(.bcp47) + if preferredID != resolvedID { + DebugLogger.shared.info( + "AppleSpeechAnalyzerProvider: Falling back from locale \(preferredID) to supported locale \(resolvedID)", + source: "AppleSpeechAnalyzerProvider" + ) + } + return supportedLocale + } + + let supportedLocales = await SpeechTranscriber.supportedLocales + if let englishLocale = supportedLocales.first(where: { + $0.language.languageCode?.identifier == "en" + }) { + DebugLogger.shared.warning( + "AppleSpeechAnalyzerProvider: Preferred locale \(preferredLocale.identifier(.bcp47)) unsupported, using English fallback \(englishLocale.identifier(.bcp47))", + source: "AppleSpeechAnalyzerProvider" + ) + return englishLocale + } + + if let firstSupportedLocale = supportedLocales.first { + DebugLogger.shared.warning( + "AppleSpeechAnalyzerProvider: Preferred locale \(preferredLocale.identifier(.bcp47)) unsupported, using first supported locale \(firstSupportedLocale.identifier(.bcp47))", + source: "AppleSpeechAnalyzerProvider" + ) + return firstSupportedLocale + } + + throw NSError( + domain: "AppleSpeechAnalyzerProvider", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "SpeechAnalyzer is unavailable on this device"] + ) + } + // MARK: - Lifecycle func prepare(progressHandler: ((Double) -> Void)?) async throws { + let recognitionLocale = try await self.resolvedRecognitionLocale() + // 1. Create a transcriber to check locale support and download if needed let transcriber = SpeechTranscriber( - locale: Locale.current, + locale: recognitionLocale, transcriptionOptions: [], reportingOptions: [], attributeOptions: [] ) - // 2. Check if locale is supported - let supportedLocales = await SpeechTranscriber.supportedLocales - let currentLocaleID = Locale.current.identifier(.bcp47) - let isSupported = supportedLocales.map { $0.identifier(.bcp47) }.contains(currentLocaleID) - - guard isSupported else { - throw NSError( - domain: "AppleSpeechAnalyzerProvider", - code: 1, - userInfo: [NSLocalizedDescriptionKey: "Current locale is not supported by SpeechAnalyzer"] - ) - } + let currentLocaleID = recognitionLocale.identifier(.bcp47) - // 3. Check if model is installed, download if needed + // 2. Check if model is installed, download if needed let installedLocales = await SpeechTranscriber.installedLocales let isInstalled = installedLocales.map { $0.identifier(.bcp47) }.contains(currentLocaleID) @@ -77,7 +109,7 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { } } - // 4. Get the best available audio format for conversion + // 3. Get the best available audio format for conversion self.analyzerFormat = await SpeechAnalyzer.bestAvailableAudioFormat(compatibleWith: [transcriber]) self.converter = BufferConverter() @@ -121,8 +153,12 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { /// /// - Returns: `true` if the current locale's speech model is installed on disk, `false` otherwise. func refreshModelsExistOnDiskAsync() async -> Bool { + guard let recognitionLocale = try? await self.resolvedRecognitionLocale() else { + self._cacheQueue.sync { self._modelsInstalledCache = false } + return false + } let installedLocales = await SpeechTranscriber.installedLocales - let currentLocaleID = Locale.current.identifier(.bcp47) + let currentLocaleID = recognitionLocale.identifier(.bcp47) let isInstalled = installedLocales.map { $0.identifier(.bcp47) }.contains(currentLocaleID) self._cacheQueue.sync { self._modelsInstalledCache = isInstalled } @@ -147,10 +183,11 @@ final class AppleSpeechAnalyzerProvider: TranscriptionProvider { } DebugLogger.shared.debug("AppleSpeechAnalyzer: Starting transcription with \(samples.count) samples", source: "AppleSpeechAnalyzerProvider") + let recognitionLocale = try await self.resolvedRecognitionLocale() // 1. Create a FRESH transcriber for this transcription let freshTranscriber = SpeechTranscriber( - locale: Locale.current, + locale: recognitionLocale, transcriptionOptions: [], reportingOptions: [], attributeOptions: [] diff --git a/Sources/Fluid/Services/AppleSpeechProvider.swift b/Sources/Fluid/Services/AppleSpeechProvider.swift index af32948a..c23b8a42 100644 --- a/Sources/Fluid/Services/AppleSpeechProvider.swift +++ b/Sources/Fluid/Services/AppleSpeechProvider.swift @@ -23,13 +23,24 @@ final class AppleSpeechProvider: TranscriptionProvider { private var recognizer: SFSpeechRecognizer? init() { - // Initialize with user's current locale - self.recognizer = SFSpeechRecognizer(locale: Locale.current) + // Initialize with the user's preferred speech locale when possible. + self.recognizer = SFSpeechRecognizer(locale: SpeechLocaleResolver.preferredRecognitionLocale()) + } + + private func refreshRecognizerIfNeeded() { + let preferredLocale = SpeechLocaleResolver.preferredRecognitionLocale() + if self.recognizer?.locale.identifier != preferredLocale.identifier { + self.recognizer = SFSpeechRecognizer(locale: preferredLocale) + } else if self.recognizer == nil { + self.recognizer = SFSpeechRecognizer(locale: preferredLocale) + } } // MARK: - Lifecycle func prepare(progressHandler: ((Double) -> Void)?) async throws { + self.refreshRecognizerIfNeeded() + // 1. Request Authorization let status = await self.requestAuthorization() @@ -70,9 +81,7 @@ final class AppleSpeechProvider: TranscriptionProvider { } // 2. Ensure recognizer exists - if self.recognizer == nil { - self.recognizer = SFSpeechRecognizer(locale: Locale.current) - } + self.refreshRecognizerIfNeeded() guard let recognizer = self.recognizer else { throw NSError(domain: "AppleSpeechProvider", code: 5, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize SFSpeechRecognizer"]) } diff --git a/Sources/Fluid/Services/SpeechLocaleResolver.swift b/Sources/Fluid/Services/SpeechLocaleResolver.swift new file mode 100644 index 00000000..1c32aeeb --- /dev/null +++ b/Sources/Fluid/Services/SpeechLocaleResolver.swift @@ -0,0 +1,64 @@ +import Foundation + +enum SpeechLocaleResolver { + static var prefersChineseRecognition: Bool { + Locale.preferredLanguages.contains { Self.languageCode(from: $0) == "zh" } + } + + static func preferredRecognitionLocale() -> Locale { + let selectedModel = SettingsStore.shared.selectedSpeechModel + switch selectedModel { + case .appleSpeech, .appleSpeechAnalyzer, .cohereTranscribeSixBit: + return Self.locale(for: SettingsStore.shared.selectedCohereLanguage) + default: + break + } + + if let preferredChinese = Locale.preferredLanguages.first(where: { Self.languageCode(from: $0) == "zh" }) { + return Locale(identifier: preferredChinese) + } + return Locale.autoupdatingCurrent + } + + private static func locale(for language: SettingsStore.CohereLanguage) -> Locale { + switch language { + case .arabic: + return Locale(identifier: "ar-SA") + case .german: + return Locale(identifier: "de-DE") + case .greek: + return Locale(identifier: "el-GR") + case .english: + return Locale(identifier: "en-US") + case .spanish: + return Locale(identifier: "es-ES") + case .french: + return Locale(identifier: "fr-FR") + case .italian: + return Locale(identifier: "it-IT") + case .japanese: + return Locale(identifier: "ja-JP") + case .korean: + return Locale(identifier: "ko-KR") + case .dutch: + return Locale(identifier: "nl-NL") + case .polish: + return Locale(identifier: "pl-PL") + case .portuguese: + return Locale(identifier: "pt-BR") + case .vietnamese: + return Locale(identifier: "vi-VN") + case .simplifiedChinese: + return Locale(identifier: "zh-CN") + case .traditionalChinese: + return Locale(identifier: "zh-TW") + } + } + + private static func languageCode(from identifier: String) -> String? { + let normalized = identifier.lowercased() + let separator = normalized.firstIndex(where: { $0 == "-" || $0 == "_" }) ?? normalized.endIndex + let code = String(normalized[.. Bool { + text.unicodeScalars.contains { scalar in + switch scalar.value { + case 0x2E80...0x2EFF, // CJK Radicals Supplement + 0x2F00...0x2FDF, // Kangxi Radicals + 0x3040...0x309F, // Hiragana + 0x30A0...0x30FF, // Katakana + 0x3100...0x312F, // Bopomofo + 0x3130...0x318F, // Hangul Compatibility Jamo + 0x31A0...0x31BF, // Bopomofo Extended + 0x3400...0x4DBF, // CJK Unified Ideographs Extension A + 0x4E00...0x9FFF, // CJK Unified Ideographs + 0xA960...0xA97F, // Hangul Jamo Extended-A + 0xAC00...0xD7AF, // Hangul Syllables + 0xD7B0...0xD7FF, // Hangul Jamo Extended-B + 0xF900...0xFAFF, // CJK Compatibility Ideographs + 0xFE30...0xFE4F, // CJK Compatibility Forms + 0xFF00...0xFFEF: // Fullwidth forms + return true + default: + return false + } + } + } + // MARK: - Layout-aware key code lookup /// Returns the virtual key code that produces `character` under the current keyboard layout. @@ -262,7 +287,7 @@ final class TypingService { } self.log("[TypingService] Starting async text insertion process") - if self.textInsertionMode == .reliablePaste { + if self.shouldForceReliablePaste(for: text) || self.textInsertionMode == .reliablePaste { // Reliable Paste still needs a short settle window after focus restoration. usleep(80_000) } else { @@ -279,8 +304,14 @@ final class TypingService { private func insertTextInstantly(_ text: String, preferredTargetPID: pid_t?) { self.log("[TypingService] insertTextInstantly called with \(text.count) characters") self.log("[TypingService] Attempting to type text: \"\(text.prefix(50))\(text.count > 50 ? "..." : "")\"") + let forceReliablePaste = self.shouldForceReliablePaste(for: text) + let effectiveInsertionMode: SettingsStore.TextInsertionMode = forceReliablePaste ? .reliablePaste : self.textInsertionMode + + if forceReliablePaste { + self.log("[TypingService] Detected CJK text; forcing Reliable Paste for IME-safe insertion") + } - if self.textInsertionMode == .reliablePaste { + if effectiveInsertionMode == .reliablePaste { self.log("[TypingService] Reliable Paste mode enabled") if self.tryReliablePasteInsertion(text, preferredTargetPID: preferredTargetPID) { self.log("[TypingService] SUCCESS: Reliable Paste mode completed") diff --git a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift index e7322d76..8079ee73 100644 --- a/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift +++ b/Sources/Fluid/UI/AISettingsView+SpeechRecognition.swift @@ -270,7 +270,7 @@ extension VoiceEngineSettingsView { .animation(.spring(response: 0.5, dampingFraction: 0.7), value: model.id) } - if model == .cohereTranscribeSixBit { + if model == .cohereTranscribeSixBit || model == .appleSpeech || model == .appleSpeechAnalyzer { VStack(alignment: .leading, spacing: 8) { HStack(alignment: .center, spacing: 10) { Image(systemName: "globe") @@ -281,7 +281,7 @@ extension VoiceEngineSettingsView { Text("Select Language Manually") .font(.caption) .fontWeight(.semibold) - Text("Choose the language token injected into Cohere's transcription prompt.") + Text(self.manualLanguageDescription(for: model)) .font(.caption2) .foregroundStyle(.secondary) .lineLimit(2) @@ -289,7 +289,7 @@ extension VoiceEngineSettingsView { Spacer(minLength: 8) - Picker("Cohere Language", selection: Binding( + Picker("Recognition Language", selection: Binding( get: { self.settings.selectedCohereLanguage }, set: { newValue in guard newValue != self.settings.selectedCohereLanguage else { return } @@ -352,6 +352,17 @@ extension VoiceEngineSettingsView { .padding(.vertical, 6) } + private func manualLanguageDescription(for model: SettingsStore.SpeechModel) -> String { + switch model { + case .cohereTranscribeSixBit: + return "Choose the language token injected into Cohere's transcription prompt." + case .appleSpeech, .appleSpeechAnalyzer: + return "Choose the locale used by Apple's on-device speech recognition." + default: + return "Choose the recognition language." + } + } + func speechModelCard(for model: SettingsStore.SpeechModel) -> some View { let isSelected = self.viewModel.previewSpeechModel == model let isConfiguredActive = self.viewModel.isActiveSpeechModel(model) diff --git a/Sources/Fluid/UI/WelcomeView.swift b/Sources/Fluid/UI/WelcomeView.swift index 207191fa..9f74f6be 100644 --- a/Sources/Fluid/UI/WelcomeView.swift +++ b/Sources/Fluid/UI/WelcomeView.swift @@ -648,6 +648,9 @@ struct OnboardingFlowView: View { } private var recommendedOnboardingModel: SettingsStore.SpeechModel { + if SpeechLocaleResolver.prefersChineseRecognition { + return .whisperBase + } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: @@ -664,6 +667,9 @@ struct OnboardingFlowView: View { } private var recommendedOnboardingModels: [SettingsStore.SpeechModel] { + if SpeechLocaleResolver.prefersChineseRecognition { + return [.whisperBase, .whisperSmall].filter { SettingsStore.SpeechModel.availableModels.contains($0) } + } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: @@ -678,6 +684,9 @@ struct OnboardingFlowView: View { } private var recommendedModelReasonText: String { + if SpeechLocaleResolver.prefersChineseRecognition { + return "Best if you mainly speak Chinese. Whisper Base gives broad multilingual coverage and works across Apple Silicon and Intel Macs." + } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: From be764f239d878324fe1279ed9617d60751bb10a6 Mon Sep 17 00:00:00 2001 From: stego-z Date: Sun, 5 Apr 2026 13:27:26 +0800 Subject: [PATCH 2/2] Refine Chinese onboarding defaults and speech permissions --- Info.plist | 34 +++++++++---------- Sources/Fluid/Persistence/SettingsStore.swift | 8 ----- Sources/Fluid/UI/WelcomeView.swift | 12 ++----- 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/Info.plist b/Info.plist index 91484849..06de88d0 100644 --- a/Info.plist +++ b/Info.plist @@ -2,37 +2,37 @@ - CFBundleExecutable - $(EXECUTABLE_NAME) - CFBundlePackageType - APPL CFBundleDisplayName $(PRODUCT_NAME) - CFBundleName - $(PRODUCT_NAME) + CFBundleExecutable + $(EXECUTABLE_NAME) CFBundleIdentifier $(PRODUCT_BUNDLE_IDENTIFIER) - CFBundleVersion - 10 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + APPL CFBundleShortVersionString - 1.5.11-beta.3 - LSMinimumSystemVersion - $(MACOSX_DEPLOYMENT_TARGET) + 1.5.11-beta.3 + CFBundleVersion + 10 LSApplicationCategoryType public.app-category.productivity - NSAccessibilityUsageDescription - FluidVoice needs Accessibility permission to listen for global hotkeys and type text into other apps when you stop recording. - NSMicrophoneUsageDescription - FluidVoice needs microphone access to record your voice for speech-to-text transcription. Your audio is processed locally and never sent to external servers. + LSMinimumSystemVersion + $(MACOSX_DEPLOYMENT_TARGET) LSUIElement + NSAccessibilityUsageDescription + FluidVoice needs Accessibility permission to listen for global hotkeys and type text into other apps when you stop recording. NSAppleEventsUsageDescription FluidVoice needs permission to control system events for managing launch at startup settings. + NSMicrophoneUsageDescription + FluidVoice needs microphone access to record your voice for speech-to-text transcription. Your audio is processed locally and never sent to external servers. + NSSpeechRecognitionUsageDescription + FluidVoice uses on-device speech recognition to transcribe your voice commands and text quickly and privately. POSTHOG_API_KEY phc_coNu37345O2bgaAeQMD3tezkg1rDCbDS9Y3pOVvp2VM POSTHOG_HOST https://eu.i.posthog.com - NSSpeechRecognitionUsageDescription - FluidVoice uses on-device speech recognition to transcribe your voice commands and text quickly and privately. diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index a50149e7..33c91969 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -3230,14 +3230,6 @@ extension SettingsStore { if model.requiresMacOS26, #unavailable(macOS 26.0) { return .whisperBase } - if SpeechLocaleResolver.prefersChineseRecognition { - switch model { - case .parakeetTDT, .parakeetTDTv2, .parakeetRealtime: - return .whisperBase - default: - break - } - } return model } diff --git a/Sources/Fluid/UI/WelcomeView.swift b/Sources/Fluid/UI/WelcomeView.swift index 9f74f6be..8ba78f37 100644 --- a/Sources/Fluid/UI/WelcomeView.swift +++ b/Sources/Fluid/UI/WelcomeView.swift @@ -648,9 +648,6 @@ struct OnboardingFlowView: View { } private var recommendedOnboardingModel: SettingsStore.SpeechModel { - if SpeechLocaleResolver.prefersChineseRecognition { - return .whisperBase - } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: @@ -667,9 +664,6 @@ struct OnboardingFlowView: View { } private var recommendedOnboardingModels: [SettingsStore.SpeechModel] { - if SpeechLocaleResolver.prefersChineseRecognition { - return [.whisperBase, .whisperSmall].filter { SettingsStore.SpeechModel.availableModels.contains($0) } - } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: @@ -684,9 +678,6 @@ struct OnboardingFlowView: View { } private var recommendedModelReasonText: String { - if SpeechLocaleResolver.prefersChineseRecognition { - return "Best if you mainly speak Chinese. Whisper Base gives broad multilingual coverage and works across Apple Silicon and Intel Macs." - } if CPUArchitecture.isAppleSilicon { switch self.preferredLanguageChoice { case .englishOnly: @@ -694,6 +685,9 @@ struct OnboardingFlowView: View { case .multipleLanguages: return "Best if you switch languages. Parakeet TDT v3 is the lighter default, and Cohere is the higher-accuracy option." case .other: + if SpeechLocaleResolver.prefersChineseRecognition { + return "Best if you mainly speak Chinese. Whisper Base gives broad multilingual coverage and works across Apple Silicon and Intel Macs." + } return "Choose a different model below if neither of the default language paths fits." } }