Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env python3
"""Transcribe a WAV file using faster-whisper (base model, CPU, int8)."""

import sys
from faster_whisper import WhisperModel


def main():
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <wav_file>", file=sys.stderr)
sys.exit(1)

model = WhisperModel("base", device="cpu", compute_type="int8")
segments, _ = model.transcribe(sys.argv[1], beam_size=5)
text = " ".join(seg.text.strip() for seg in segments)
print(text)


if __name__ == "__main__":
main()
69 changes: 69 additions & 0 deletions voice-to-text
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash
#
# voice-to-text — push-to-talk toggle for Wayland
# 1st press: start recording 2nd press: stop, transcribe, type into terminal
#

set -euo pipefail

PID_FILE="/tmp/voice-to-text.pid"
WAV_FILE="/tmp/voice-to-text.wav"
LOG_FILE="/tmp/voice-to-text.log"
TRANSCRIBE="$HOME/scripts/transcribe.py"
VENV_PYTHON="$HOME/venvs/voice-to-text/bin/python"
HINT="string:x-canonical-private-synchronous:voice-to-text"

notify() {
local timeout="${1:--1}"; shift
notify-send -a "Voice Recorder" -t "$timeout" -h "$HINT" "$@"
}

# ── Stop recording & transcribe ──────────────────────────────────────
if [[ -f "$PID_FILE" ]]; then
pid=$(cat "$PID_FILE")
rm -f "$PID_FILE"

if kill -0 "$pid" 2>/dev/null; then
kill -INT "$pid"
# Poll until ffmpeg exits (can't use wait — different shell)
for _ in $(seq 1 50); do
kill -0 "$pid" 2>/dev/null || break
sleep 0.1
done
fi

if [[ ! -s "$WAV_FILE" ]]; then
notify 5000 "Error" "Recording is empty — nothing to transcribe."
exit 1
fi

notify -1 "Transcribing..."

text=$("$VENV_PYTHON" "$TRANSCRIBE" "$WAV_FILE" 2>"$LOG_FILE") || true

if [[ -z "$text" ]]; then
err=$(cat "$LOG_FILE" 2>/dev/null || echo "unknown error")
notify 5000 "Error" "Transcription failed: $err"
exit 1
fi

printf '%s' "$text" | wl-copy
wtype -- "$text"

notify 5000 "Transcribed" "$text"
exit 0
fi

# ── Start recording ──────────────────────────────────────────────────

if ! pactl info &>/dev/null; then
notify 5000 "Error" "PulseAudio/PipeWire not available."
exit 1
fi

rm -f "$WAV_FILE"

ffmpeg -y -f pulse -i default -ac 1 -ar 16000 "$WAV_FILE" &>/dev/null &
echo $! > "$PID_FILE"

notify 0 "Recording..." "Press ALT+R again to stop."