Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions execuwhisper/macos/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Xcode generated files (run `xcodegen generate` to recreate the project)
*.xcodeproj/
xcuserdata/
*.xcuserstate
*.xcuserdatad/

# Build outputs
build/
DerivedData/
*.dmg

# Python
__pycache__/
*.pyc
.venv/

# macOS
.DS_Store

# Local-only dictation samples and prompt-quality corpus.
test_audio/
evaluation/

# Local notes / scratch
docs/superpowers/
21 changes: 21 additions & 0 deletions execuwhisper/macos/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# ExecuWhisper Changelog

## v0.1.0 — Initial open-source release

- Initial open-source publication of the ExecuWhisper macOS dictation app.
- Apple Silicon-only (M1+); requires macOS 14.0 or newer.
- ASR: NVIDIA Parakeet-TDT via the Metal backend (executorch helper from
pytorch/executorch#18861).
- Formatter: fine-tuned LFM2.5-350M via the MLX delegate (executorch helper
from pytorch/executorch#19562; export pipeline from #19195).
- Models distributed via Hugging Face Hub:
- `younghan-meta/Parakeet-TDT-ExecuTorch-Metal`
- `younghan-meta/LFM2.5-350M-ExecuWhisper-Formatter`
- AMI release-gate eval for the formatter: forbidden 0.030 ≤ 0.10,
coverage 0.874 ≥ 0.85 (RELEASE-READY).
- Build via `xcodegen generate` + `xcodebuild`. Set `DEVELOPMENT_TEAM` to
your Apple Developer team via env var; the project no longer hard-codes
a team identifier.
- Helpers signed with the hardened runtime + `disable-library-validation`
+ `allow-dyld-environment-variables` entitlements so they can load the
user-supplied `libomp.dylib` (install with `brew install libomp`).
12 changes: 12 additions & 0 deletions execuwhisper/macos/ExecuWhisper/ExecuWhisper.entitlements
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>com.apple.security.cs.disable-library-validation</key>
<true/>
<key>com.apple.security.device.audio-input</key>
<true/>
<key>com.apple.security.network.client</key>
<true/>
</dict>
</plist>
159 changes: 159 additions & 0 deletions execuwhisper/macos/ExecuWhisper/ExecuWhisperApp.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

import AppKit
import SwiftUI

@main
struct ExecuWhisperApp: App {
@State private var preferences = Preferences()
@State private var downloader = ModelDownloader()
@State private var replacementStore = ReplacementStore()
@State private var store: TranscriptStore
@State private var dictationManager: DictationManager

init() {
let prefs = Preferences()
let downloader = ModelDownloader()
let replacementStore = ReplacementStore()
let formatterBridge = FormatterBridge()
let textPipeline = TextPipeline(
replacementStore: replacementStore,
formatterBridge: formatterBridge
) {
TextPipeline.FormatterPaths(
runnerPath: prefs.formatterRunnerPath,
modelPath: prefs.formatterModelPath,
tokenizerPath: prefs.formatterTokenizerPath,
tokenizerConfigPath: prefs.formatterTokenizerConfigPath
)
}
let store = TranscriptStore(
preferences: prefs,
downloader: downloader,
textPipeline: textPipeline
)
let dictationManager = DictationManager(store: store, preferences: prefs)
_preferences = State(initialValue: prefs)
_downloader = State(initialValue: downloader)
_replacementStore = State(initialValue: replacementStore)
_store = State(initialValue: store)
_dictationManager = State(initialValue: dictationManager)
}

var body: some Scene {
WindowGroup {
ContentView()
.environment(store)
.environment(preferences)
.environment(downloader)
.environment(replacementStore)
.environment(dictationManager)
.frame(minWidth: 700, minHeight: 460)
.onReceive(NotificationCenter.default.publisher(for: NSApplication.didBecomeActiveNotification)) { _ in
Task { await store.runHealthCheck() }
}
}
.defaultSize(width: 960, height: 640)
.windowToolbarStyle(.unified)
.commands {
CommandGroup(replacing: .newItem) {}

CommandMenu("Transcription") {
switch store.sessionState {
case .idle:
Button("Start Recording") {
Task { await store.startRecording() }
}
.keyboardShortcut("R", modifiers: [.command, .shift])
.disabled(!store.isModelReady)

case .recording:
Button("Stop and Transcribe") {
Task { await store.stopRecordingAndTranscribe() }
}
.keyboardShortcut("R", modifiers: [.command, .shift])

case .transcribing:
Button("Transcribing...") {}
.disabled(true)
}

Button("Import Audio...") {
store.importAudioFileWithPanel()
}
.disabled(store.hasActiveSession || downloader.isDownloading)

if store.healthResult?.shouldOfferModelDownload == true && !downloader.isDownloading {
Divider()
Button("Download Model") {
Task { await store.downloadModel() }
}
}

if store.resourcesReady && !store.hasActiveSession {
Divider()
switch store.helperState {
case .unloaded:
Button("Preload Model") {
Task { await store.preloadModel() }
}
.keyboardShortcut("L", modifiers: [.command, .shift])

case .loading:
Button("Warming Model...") {}
.disabled(true)

case .warm:
Button("Unload Model") {
Task { await store.unloadModel() }
}
.keyboardShortcut("U", modifiers: [.command, .shift])

case .failed:
Button("Retry Preload") {
Task { await store.preloadModel() }
}
}
}

Divider()

Button("Copy Transcript") {
let text = currentTranscript
guard !text.isEmpty else { return }
NSPasteboard.general.clearContents()
NSPasteboard.general.setString(text, forType: .string)
}
.keyboardShortcut("C", modifiers: [.command, .shift])
.disabled(currentTranscript.isEmpty)
}

CommandMenu("Dictation") {
Button(dictationManager.isListening ? "Stop Dictation" : "Start Dictation") {
Task { await dictationManager.toggle() }
}
.disabled(store.isTranscribing)
}
}

Settings {
SettingsView(usesFixedWindowSize: true)
.environment(preferences)
.environment(dictationManager)
}
}

private var currentTranscript: String {
if store.hasActiveSession {
return store.liveTranscript
}
guard let id = store.selectedSessionID else { return "" }
return store.sessions.first(where: { $0.id == id })?.transcript ?? ""
}
}
8 changes: 8 additions & 0 deletions execuwhisper/macos/ExecuWhisper/Info.plist
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>NSMicrophoneUsageDescription</key>
<string>ExecuWhisper needs microphone access to record audio for on-device transcription.</string>
</dict>
</plist>
110 changes: 110 additions & 0 deletions execuwhisper/macos/ExecuWhisper/Models/DictationShortcut.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

import AppKit
import Carbon.HIToolbox
import Foundation

struct DictationShortcut: Codable, Equatable, Sendable {
var keyCode: UInt32
var carbonModifiers: UInt32
var keyDisplay: String

static let controlSpace = DictationShortcut(
keyCode: UInt32(kVK_Space),
carbonModifiers: UInt32(controlKey),
keyDisplay: "Space"
)

init(keyCode: UInt32, carbonModifiers: UInt32, keyDisplay: String) {
self.keyCode = keyCode
self.carbonModifiers = carbonModifiers
self.keyDisplay = keyDisplay
}

init?(event: NSEvent) {
let carbonModifiers = Self.carbonModifiers(from: event.modifierFlags)
guard carbonModifiers != 0 else { return nil }
guard let keyDisplay = Self.keyDisplay(for: event) else { return nil }
self.init(
keyCode: UInt32(event.keyCode),
carbonModifiers: carbonModifiers,
keyDisplay: keyDisplay
)
}

var displayString: String {
var value = ""
if carbonModifiers & UInt32(controlKey) != 0 {
value += "⌃"
}
if carbonModifiers & UInt32(optionKey) != 0 {
value += "⌥"
}
if carbonModifiers & UInt32(shiftKey) != 0 {
value += "⇧"
}
if carbonModifiers & UInt32(cmdKey) != 0 {
value += "⌘"
}
return value + keyDisplay
}

static func carbonModifiers(from flags: NSEvent.ModifierFlags) -> UInt32 {
let sanitized = flags.intersection(.deviceIndependentFlagsMask)
var value: UInt32 = 0
if sanitized.contains(.control) {
value |= UInt32(controlKey)
}
if sanitized.contains(.option) {
value |= UInt32(optionKey)
}
if sanitized.contains(.shift) {
value |= UInt32(shiftKey)
}
if sanitized.contains(.command) {
value |= UInt32(cmdKey)
}
return value
}

private static func keyDisplay(for event: NSEvent) -> String? {
switch Int(event.keyCode) {
case kVK_Space:
return "Space"
case kVK_Return:
return "Return"
case kVK_Tab:
return "Tab"
case kVK_Delete:
return "Delete"
case kVK_ForwardDelete:
return "Fn-Delete"
case kVK_Escape:
return "Esc"
case kVK_LeftArrow:
return "Left"
case kVK_RightArrow:
return "Right"
case kVK_UpArrow:
return "Up"
case kVK_DownArrow:
return "Down"
default:
break
}

guard let characters = event.charactersIgnoringModifiers?
.trimmingCharacters(in: .whitespacesAndNewlines),
!characters.isEmpty
else {
return nil
}
return characters.uppercased()
}
}
Loading
Loading