diff --git a/transcribe.py b/transcribe.py new file mode 100755 index 0000000..556a37e --- /dev/null +++ b/transcribe.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +"""Transcribe a WAV file using faster-whisper (base model, CPU, int8).""" + +import sys +from faster_whisper import WhisperModel + + +def main(): + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + + model = WhisperModel("base", device="cpu", compute_type="int8") + segments, _ = model.transcribe(sys.argv[1], beam_size=5) + text = " ".join(seg.text.strip() for seg in segments) + print(text) + + +if __name__ == "__main__": + main() diff --git a/voice-to-text b/voice-to-text new file mode 100755 index 0000000..1aaae76 --- /dev/null +++ b/voice-to-text @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# voice-to-text — push-to-talk toggle for Wayland +# 1st press: start recording 2nd press: stop, transcribe, type into terminal +# + +set -euo pipefail + +PID_FILE="/tmp/voice-to-text.pid" +WAV_FILE="/tmp/voice-to-text.wav" +LOG_FILE="/tmp/voice-to-text.log" +TRANSCRIBE="$HOME/scripts/transcribe.py" +VENV_PYTHON="$HOME/venvs/voice-to-text/bin/python" +HINT="string:x-canonical-private-synchronous:voice-to-text" + +notify() { + local timeout="${1:--1}"; shift + notify-send -a "Voice Recorder" -t "$timeout" -h "$HINT" "$@" +} + +# ── Stop recording & transcribe ────────────────────────────────────── +if [[ -f "$PID_FILE" ]]; then + pid=$(cat "$PID_FILE") + rm -f "$PID_FILE" + + if kill -0 "$pid" 2>/dev/null; then + kill -INT "$pid" + # Poll until ffmpeg exits (can't use wait — different shell) + for _ in $(seq 1 50); do + kill -0 "$pid" 2>/dev/null || break + sleep 0.1 + done + fi + + if [[ ! -s "$WAV_FILE" ]]; then + notify 5000 "Error" "Recording is empty — nothing to transcribe." + exit 1 + fi + + notify -1 "Transcribing..." + + text=$("$VENV_PYTHON" "$TRANSCRIBE" "$WAV_FILE" 2>"$LOG_FILE") || true + + if [[ -z "$text" ]]; then + err=$(cat "$LOG_FILE" 2>/dev/null || echo "unknown error") + notify 5000 "Error" "Transcription failed: $err" + exit 1 + fi + + printf '%s' "$text" | wl-copy + wtype -- "$text" + + notify 5000 "Transcribed" "$text" + exit 0 +fi + +# ── Start recording ────────────────────────────────────────────────── + +if ! pactl info &>/dev/null; then + notify 5000 "Error" "PulseAudio/PipeWire not available." + exit 1 +fi + +rm -f "$WAV_FILE" + +ffmpeg -y -f pulse -i default -ac 1 -ar 16000 "$WAV_FILE" &>/dev/null & +echo $! > "$PID_FILE" + +notify 0 "Recording..." "Press ALT+R again to stop."