diff --git a/Sources/VimAssistant/SpeechRecognizer/SpeechRecognizer.swift b/Sources/VimAssistant/SpeechRecognizer/SpeechRecognizer.swift index 5d2870d..4cca357 100644 --- a/Sources/VimAssistant/SpeechRecognizer/SpeechRecognizer.swift +++ b/Sources/VimAssistant/SpeechRecognizer/SpeechRecognizer.swift @@ -10,6 +10,9 @@ import Foundation import Speech import SwiftUI +private let bus: AVAudioNodeBus = 0 +private let bufferSize: AVAudioFrameCount = 1024 + public actor SpeechRecognizer: ObservableObject { enum RecognizerError: Error { @@ -30,9 +33,22 @@ public actor SpeechRecognizer: ObservableObject { } } + /// The speech recognition transcript result. @MainActor public var transcript: String = .empty + @MainActor + public var run: Bool = false { + didSet { + if run { + resetTranscript() + startTranscribing() + } else { + stopTranscribing() + } + } + } + private var audioEngine: AVAudioEngine? private var request: SFSpeechAudioBufferRecognitionRequest? private var task: SFSpeechRecognitionTask? @@ -92,13 +108,18 @@ public actor SpeechRecognizer: ObservableObject { } } + /// Handles speech recognition results. + /// - Parameters: + /// - audioEngine: the audio engine that processed the task + /// - result: the speech recognition result + /// - error: errors that could have occurred during recognition nonisolated private func recognitionHandler(audioEngine: AVAudioEngine, result: SFSpeechRecognitionResult?, error: Error?) { let receivedFinalResult = result?.isFinal ?? false let receivedError = error != nil if receivedFinalResult || receivedError { audioEngine.stop() - audioEngine.inputNode.removeTap(onBus: 0) + audioEngine.inputNode.removeTap(onBus: bus) } if let result { @@ -130,12 +151,14 @@ public actor SpeechRecognizer: ObservableObject { #endif let inputNode = audioEngine.inputNode + let recordingFormat = inputNode.outputFormat(forBus: bus) + let inputFormat = inputNode.inputFormat(forBus: bus) - let recordingFormat = inputNode.outputFormat(forBus: 0) - inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in + inputNode.installTap(onBus: bus, bufferSize: bufferSize, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in request.append(buffer) } audioEngine.prepare() + try audioEngine.start() return (audioEngine, request) diff --git a/Sources/VimAssistant/Views/VimAssistantView.swift b/Sources/VimAssistant/Views/VimAssistantView.swift index 27459a1..3b28ad1 100644 --- a/Sources/VimAssistant/Views/VimAssistantView.swift +++ b/Sources/VimAssistant/Views/VimAssistantView.swift @@ -18,17 +18,40 @@ public struct VimAssistantView: View { @State var inputText: String = .empty + @State + private var animateGradient = false + + private var displayResponse: Bool { + speechRecognizer.transcript.isNotEmpty + } + + private var animation: Animation { + if animateGradient { + .easeOut(duration: 2).repeatForever() + } else { + .easeOut(duration: 2) + } + } + /// Initializer. /// - Parameter enabled: flag indicating if the assistant should be enabled or not - init?(vim: Vim, _ enabled: Bool = false) { + public init?(vim: Vim, _ enabled: Bool = false) { if !enabled { return nil } self.vim = vim } public var body: some View { + VStack { + inputView + responseView + } + } + + var inputView: some View { HStack { Image(systemName: "apple.intelligence") + .padding() .symbolRenderingMode(.palette) .foregroundStyle( .angularGradient( @@ -36,25 +59,95 @@ public struct VimAssistantView: View { center: .center, startAngle: .zero, endAngle: .degrees(360) ) ) + .font(.title) - TextField(text: $inputText, prompt: Text("Type here to use the assistant.")) { + TextField(text: $inputText, prompt: Text("Type or tap microphone to use the AI assistant.")) { Image(systemName: "microphone") - } - .textFieldStyle(.plain) - microPhoneButton + .textFieldStyle(.plain) + + microphoneButton + .padding() } - .padding() + .background(Color.black.opacity(0.65)) + .cornerRadius(8) + .overlay { + overlayView + } + .padding([.leading, .top, .trailing]) } - var microPhoneButton: some View { - Button(action: { + // The stroke gradient + private var gradient: Gradient { + .init(colors: animateGradient ? [.red, .orange] : [.teal, .purple]) + } + + // The gradient style + private var gradientStyle: some ShapeStyle { + LinearGradient( + gradient: gradient, + startPoint: .leading, + endPoint: .trailing + ) + } + + // The overlay view of the text box that animates the stroke + private var overlayView: some View { + RoundedRectangle(cornerRadius: 8) + .stroke(gradientStyle, lineWidth: 4) + .hueRotation(.degrees(animateGradient ? 90 : 0)) + .animation(animation, value: animateGradient) + } + + private var microphoneButton: some View { + Button(action: { + animateGradient.toggle() + speechRecognizer.run.toggle() }) { Image(systemName: "microphone") + .font(.title) } .buttonStyle(.plain) + } + + var responseView: some View { + + VStack(spacing: 4) { + if displayResponse { + Text(speechRecognizer.transcript) + .frame(maxWidth: .infinity, alignment: .leading) + .font(.title2) + .padding() + HStack { + Spacer() + goodResponseButton + badResponseButton + } + .padding([.bottom, .trailing]) + } + } + .background(Color.black.opacity(0.65)) + .cornerRadius(8) + .padding([.leading, .bottom, .trailing]) + + } + var goodResponseButton: some View { + Button(action: { + // TODO: Report a good response + }) { + Image(systemName: "hand.thumbsup") + .buttonStyle(.plain) + } + } + + var badResponseButton: some View { + Button(action: { + // TODO: Report a bad response + }) { + Image(systemName: "hand.thumbsdown") + } } }