Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions Sources/VimAssistant/SpeechRecognizer/SpeechRecognizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import Foundation
import Speech
import SwiftUI

private let bus: AVAudioNodeBus = 0
private let bufferSize: AVAudioFrameCount = 1024

public actor SpeechRecognizer: ObservableObject {

enum RecognizerError: Error {
Expand All @@ -30,9 +33,22 @@ public actor SpeechRecognizer: ObservableObject {
}
}

/// The speech recognition transcript result.
@MainActor
public var transcript: String = .empty

@MainActor
public var run: Bool = false {
didSet {
if run {
resetTranscript()
startTranscribing()
} else {
stopTranscribing()
}
}
}

private var audioEngine: AVAudioEngine?
private var request: SFSpeechAudioBufferRecognitionRequest?
private var task: SFSpeechRecognitionTask?
Expand Down Expand Up @@ -92,13 +108,18 @@ public actor SpeechRecognizer: ObservableObject {
}
}

/// Handles speech recognition results.
/// - Parameters:
/// - audioEngine: the audio engine that processed the task
/// - result: the speech recognition result
/// - error: errors that could have occurred during recognition
nonisolated private func recognitionHandler(audioEngine: AVAudioEngine, result: SFSpeechRecognitionResult?, error: Error?) {
let receivedFinalResult = result?.isFinal ?? false
let receivedError = error != nil

if receivedFinalResult || receivedError {
audioEngine.stop()
audioEngine.inputNode.removeTap(onBus: 0)
audioEngine.inputNode.removeTap(onBus: bus)
}

if let result {
Expand Down Expand Up @@ -130,12 +151,14 @@ public actor SpeechRecognizer: ObservableObject {
#endif

let inputNode = audioEngine.inputNode
let recordingFormat = inputNode.outputFormat(forBus: bus)
let inputFormat = inputNode.inputFormat(forBus: bus)

let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
inputNode.installTap(onBus: bus, bufferSize: bufferSize, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
request.append(buffer)
}
audioEngine.prepare()

try audioEngine.start()

return (audioEngine, request)
Expand Down
109 changes: 101 additions & 8 deletions Sources/VimAssistant/Views/VimAssistantView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,43 +18,136 @@ public struct VimAssistantView: View {
@State
var inputText: String = .empty

@State
private var animateGradient = false

private var displayResponse: Bool {
speechRecognizer.transcript.isNotEmpty
}

private var animation: Animation {
if animateGradient {
.easeOut(duration: 2).repeatForever()
} else {
.easeOut(duration: 2)
}
}

/// Initializer.
/// - Parameter enabled: flag indicating if the assistant should be enabled or not
init?(vim: Vim, _ enabled: Bool = false) {
public init?(vim: Vim, _ enabled: Bool = false) {
if !enabled { return nil }
self.vim = vim
}

public var body: some View {
VStack {
inputView
responseView
}
}

var inputView: some View {

HStack {
Image(systemName: "apple.intelligence")
.padding()
.symbolRenderingMode(.palette)
.foregroundStyle(
.angularGradient(
colors: [.red, .yellow, .green, .blue, .purple, .red],
center: .center, startAngle: .zero, endAngle: .degrees(360)
)
)
.font(.title)

TextField(text: $inputText, prompt: Text("Type here to use the assistant.")) {
TextField(text: $inputText, prompt: Text("Type or tap microphone to use the AI assistant.")) {
Image(systemName: "microphone")

}
.textFieldStyle(.plain)
microPhoneButton
.textFieldStyle(.plain)

microphoneButton
.padding()
}
.padding()
.background(Color.black.opacity(0.65))
.cornerRadius(8)
.overlay {
overlayView
}
.padding([.leading, .top, .trailing])
}

var microPhoneButton: some View {
Button(action: {

// The stroke gradient
private var gradient: Gradient {
.init(colors: animateGradient ? [.red, .orange] : [.teal, .purple])
}

// The gradient style
private var gradientStyle: some ShapeStyle {
LinearGradient(
gradient: gradient,
startPoint: .leading,
endPoint: .trailing
)
}

// The overlay view of the text box that animates the stroke
private var overlayView: some View {
RoundedRectangle(cornerRadius: 8)
.stroke(gradientStyle, lineWidth: 4)
.hueRotation(.degrees(animateGradient ? 90 : 0))
.animation(animation, value: animateGradient)
}

private var microphoneButton: some View {
Button(action: {
animateGradient.toggle()
speechRecognizer.run.toggle()
}) {
Image(systemName: "microphone")
.font(.title)
}
.buttonStyle(.plain)
}

var responseView: some View {

VStack(spacing: 4) {
if displayResponse {
Text(speechRecognizer.transcript)
.frame(maxWidth: .infinity, alignment: .leading)
.font(.title2)
.padding()
HStack {
Spacer()
goodResponseButton
badResponseButton
}
.padding([.bottom, .trailing])
}
}
.background(Color.black.opacity(0.65))
.cornerRadius(8)
.padding([.leading, .bottom, .trailing])

}

var goodResponseButton: some View {
Button(action: {
// TODO: Report a good response
}) {
Image(systemName: "hand.thumbsup")
.buttonStyle(.plain)
}
}

var badResponseButton: some View {
Button(action: {
// TODO: Report a bad response
}) {
Image(systemName: "hand.thumbsdown")
}
}
}

Expand Down