diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e0c1a0c9..4803d7f7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # CHANGELOG - Uses semantic versioning (MAJOR.MINOR.PATCH) +## [4.11.5] - 2025-11-02 +### 🔊 Saturn ElevenLabs narration +**Problem**: Saturn Visual Solver streams rich reasoning text, but users must watch the terminal feed to follow progress. The user asked for a spoken version powered by their ElevenLabs API key so they can listen to the solver thinking in real time. + +**Solution**: Added a secure ElevenLabs proxy on the backend and optional narration controls in the Saturn UI. When enabled, reasoning deltas are buffered client-side, sent to the new `/api/audio/narrate` endpoint, and streamed back as audio clips for continuous playback. + +**Implementation**: +- `server/services/audio/elevenLabsService.ts` wraps ElevenLabs streaming with proper logging and env-driven defaults. +- `server/controllers/audioController.ts` exposes `/api/audio/status` and `/api/audio/narrate` routes, registered in `routes.ts`. +- `client/src/hooks/useSaturnAudioNarration.ts` manages buffering, queueing, playback, and UI state. +- `client/src/pages/SaturnVisualSolver.tsx` surfaces narration controls (toggle, volume slider, status) and forwards reasoning deltas. +- `shared/config/audio.ts` centralises feature-flag resolution so both backend and frontend detect availability. + +**Benefits**: +- No secret leakage: the API key stays on the server while the client receives only audio. +- Narration is optional and non-blocking—users can toggle it per run without affecting solver logic. +- Graceful fallbacks when ElevenLabs credentials are absent (controls hidden, helpful error copy rendered). + ## [4.11.4] - 2025-11-01 ### ✅ Saturn Correctness Display **Problem**: Saturn Visual Solver showed completion status ("COMPLETED") but not correctness status (whether predictions were RIGHT or WRONG). Users couldn't tell at a glance if Saturn solved the puzzle correctly. diff --git a/client/src/hooks/useSaturnAudioNarration.ts b/client/src/hooks/useSaturnAudioNarration.ts new file mode 100644 index 000000000..2c0e18a39 --- /dev/null +++ b/client/src/hooks/useSaturnAudioNarration.ts @@ -0,0 +1,287 @@ +/** + * Author: gpt-5-codex + * Date: 2025-11-02T00:00:00Z + * PURPOSE: React hook that converts Saturn reasoning deltas into ElevenLabs audio narration + * by proxying through the backend. Manages buffering, playback queueing, volume control, + * and graceful fallbacks when audio is unavailable. + * SRP/DRY check: Pass — isolates narration concerns away from SaturnVisualSolver. + */ + +import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; +import { resolveSaturnAudioConfig } from '@shared/config/audio'; + +interface NarrationStatus { + enabled: boolean; + available: boolean; + status: 'idle' | 'buffering' | 'playing' | 'error'; + error?: string | null; + volume: number; +} + +const audioConfig = resolveSaturnAudioConfig(); + +const STATUS_IDLE: NarrationStatus['status'] = 'idle'; + +export interface UseSaturnAudioNarration { + enabled: boolean; + available: boolean; + status: NarrationStatus['status']; + error: string | null; + volume: number; + toggleEnabled: () => void; + setEnabled: (value: boolean) => void; + setVolume: (value: number) => void; + enqueueReasoning: (delta: string) => void; + flush: () => void; + reset: () => void; +} + +const AUDIO_ENDPOINT = '/api/audio/narrate'; +const STATUS_ENDPOINT = '/api/audio/status'; + +export function useSaturnAudioNarration(): UseSaturnAudioNarration { + const [enabled, setEnabled] = useState(audioConfig.enabled); + const [available, setAvailable] = useState(audioConfig.enabled); + const [status, setStatus] = useState(STATUS_IDLE); + const [error, setError] = useState(null); + const [volume, setVolume] = useState(0.8); + + const volumeRef = useRef(volume); + const queueRef = useRef>(Promise.resolve()); + const bufferRef = useRef(''); + const flushTimeoutRef = useRef(null); + const currentAudioRef = useRef(null); + const destroyedRef = useRef(false); + + const clearFlushTimeout = useCallback(() => { + if (flushTimeoutRef.current !== null) { + window.clearTimeout(flushTimeoutRef.current); + flushTimeoutRef.current = null; + } + }, []); + + const resetBuffer = useCallback(() => { + bufferRef.current = ''; + clearFlushTimeout(); + }, [clearFlushTimeout]); + + const stopPlayback = useCallback(() => { + const audio = currentAudioRef.current; + if (audio) { + try { + audio.pause(); + audio.currentTime = 0; + } catch { + // ignore best-effort cleanup errors + } + currentAudioRef.current = null; + } + queueRef.current = Promise.resolve(); + setStatus(STATUS_IDLE); + }, []); + + useEffect(() => { + volumeRef.current = volume; + if (currentAudioRef.current) { + currentAudioRef.current.volume = volume; + } + }, [volume]); + + useEffect(() => { + let cancelled = false; + async function fetchStatus() { + try { + const response = await fetch(STATUS_ENDPOINT, { cache: 'no-store' }); + if (!response.ok) { + throw new Error(`Status request failed (${response.status})`); + } + const json = await response.json(); + const isEnabled = Boolean(json?.data?.enabled); + if (!cancelled) { + setAvailable(isEnabled); + if (!isEnabled) { + setEnabled(false); + } + } + } catch (err) { + if (!cancelled) { + setAvailable(false); + setEnabled(false); + setError(err instanceof Error ? err.message : 'Audio status unavailable'); + } + } + } + fetchStatus(); + return () => { + cancelled = true; + }; + }, []); + + useEffect(() => () => { + destroyedRef.current = true; + clearFlushTimeout(); + stopPlayback(); + }, [clearFlushTimeout, stopPlayback]); + + const playAudio = useCallback(async (blob: Blob) => { + const url = URL.createObjectURL(blob); + try { + await new Promise((resolve, reject) => { + if (destroyedRef.current) { + URL.revokeObjectURL(url); + return resolve(); + } + const audio = new Audio(url); + audio.volume = volumeRef.current; + audio.onended = () => { + audio.src = ''; + URL.revokeObjectURL(url); + if (currentAudioRef.current === audio) { + currentAudioRef.current = null; + } + resolve(); + }; + audio.onerror = () => { + audio.src = ''; + URL.revokeObjectURL(url); + if (currentAudioRef.current === audio) { + currentAudioRef.current = null; + } + reject(new Error('Audio playback error')); + }; + const playPromise = audio.play(); + currentAudioRef.current = audio; + if (playPromise) { + playPromise.catch((err) => { + audio.src = ''; + URL.revokeObjectURL(url); + if (currentAudioRef.current === audio) { + currentAudioRef.current = null; + } + reject(err instanceof Error ? err : new Error('Audio play failed')); + }); + } + }); + } finally { + URL.revokeObjectURL(url); + } + }, []); + + const requestAudio = useCallback(async (text: string) => { + setStatus('buffering'); + setError(null); + + const response = await fetch(AUDIO_ENDPOINT, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ text }), + }); + + if (!response.ok) { + const message = await response.text().catch(() => '') || `Narration failed (${response.status})`; + throw new Error(message); + } + + const arrayBuffer = await response.arrayBuffer(); + const blob = new Blob([arrayBuffer], { type: response.headers.get('Content-Type') || 'audio/mpeg' }); + + setStatus('playing'); + await playAudio(blob); + }, [playAudio]); + + const enqueuePlayback = useCallback( + (text: string) => { + if (!text.trim()) { + return; + } + const tail = queueRef.current + .catch(() => undefined) + .then(() => requestAudio(text)) + .catch((err) => { + setStatus('error'); + setError(err instanceof Error ? err.message : 'Narration failed'); + }); + const finalPromise = tail.finally(() => { + if (queueRef.current === finalPromise) { + setStatus(STATUS_IDLE); + } + }); + queueRef.current = finalPromise; + }, + [requestAudio], + ); + + const flushBuffer = useCallback(() => { + clearFlushTimeout(); + const payload = bufferRef.current.trim(); + bufferRef.current = ''; + if (!payload) { + return; + } + enqueuePlayback(payload); + }, [clearFlushTimeout, enqueuePlayback]); + + const enqueueReasoning = useCallback( + (delta: string) => { + if (!enabled || !available) { + return; + } + bufferRef.current += delta; + clearFlushTimeout(); + flushTimeoutRef.current = window.setTimeout(() => { + flushTimeoutRef.current = null; + flushBuffer(); + }, 400); + }, + [available, clearFlushTimeout, enabled, flushBuffer], + ); + + const reset = useCallback(() => { + resetBuffer(); + stopPlayback(); + setError(null); + }, [resetBuffer, stopPlayback]); + + const setEnabledSafe = useCallback( + (value: boolean) => { + if (!available) { + setEnabled(false); + return; + } + setEnabled(value); + if (!value) { + reset(); + } + }, + [available, reset], + ); + + const toggleEnabled = useCallback(() => { + setEnabledSafe(!enabled); + }, [enabled, setEnabledSafe]); + + useEffect(() => { + if (!enabled) { + resetBuffer(); + } + }, [enabled, resetBuffer]); + + return useMemo( + () => ({ + enabled, + available, + status, + error, + volume, + toggleEnabled, + setEnabled: setEnabledSafe, + setVolume, + enqueueReasoning, + flush: flushBuffer, + reset, + }), + [available, enabled, error, flushBuffer, enqueueReasoning, reset, setEnabledSafe, status, toggleEnabled, volume], + ); +} diff --git a/client/src/pages/SaturnVisualSolver.tsx b/client/src/pages/SaturnVisualSolver.tsx index b7da84393..564ddebe0 100644 --- a/client/src/pages/SaturnVisualSolver.tsx +++ b/client/src/pages/SaturnVisualSolver.tsx @@ -25,11 +25,26 @@ import { getDefaultSaturnModel, getModelProvider, modelSupportsTemperature } fro import { PuzzleGridDisplay } from '@/components/puzzle/PuzzleGridDisplay'; import { PuzzleGrid } from '@/components/puzzle/PuzzleGrid'; import { DEFAULT_EMOJI_SET } from '@/lib/spaceEmojis'; +import { useSaturnAudioNarration } from '@/hooks/useSaturnAudioNarration'; export default function SaturnVisualSolver() { const { taskId } = useParams<{ taskId: string }>(); const { currentTask: task, isLoadingTask, taskError } = usePuzzle(taskId); const { state, start, cancel } = useSaturnProgress(taskId); + const { + enabled: audioEnabled, + available: audioAvailable, + status: audioStatus, + error: audioError, + volume: audioVolume, + toggleEnabled: toggleAudio, + setVolume: setAudioVolume, + enqueueReasoning, + flush: flushAudio, + reset: resetAudio, + } = useSaturnAudioNarration(); + const reasoningCursorRef = React.useRef(0); + const previousStatusRef = React.useRef(state.status); // Settings state - GPT-5 Nano with balanced (low) reasoning depth and detailed summary by default const defaultModel = getDefaultSaturnModel(); @@ -56,6 +71,50 @@ export default function SaturnVisualSolver() { const showTemperatureControl = React.useMemo(() => isGrokFamily && modelSupportsTemperature(model), [isGrokFamily, model]); const showReasoningControls = React.useMemo(() => !isGrokFamily, [isGrokFamily]); + React.useEffect(() => { + const prev = previousStatusRef.current; + if (state.status === 'running' && prev !== 'running') { + reasoningCursorRef.current = state.streamingReasoning?.length ?? 0; + resetAudio(); + } + if ((state.status === 'completed' || state.status === 'error') && prev === 'running') { + flushAudio(); + } + if (state.status === 'idle') { + reasoningCursorRef.current = 0; + } + previousStatusRef.current = state.status; + }, [flushAudio, resetAudio, state.status, state.streamingReasoning]); + + React.useEffect(() => { + if (!audioEnabled) { + return; + } + const reasoningText = state.streamingReasoning ?? ''; + if (reasoningText.length < reasoningCursorRef.current) { + reasoningCursorRef.current = reasoningText.length; + return; + } + if (reasoningText.length > reasoningCursorRef.current) { + const delta = reasoningText.slice(reasoningCursorRef.current); + reasoningCursorRef.current = reasoningText.length; + enqueueReasoning(delta); + } + }, [audioEnabled, enqueueReasoning, state.streamingReasoning]); + + const previousAudioEnabledRef = React.useRef(audioEnabled); + + React.useEffect(() => { + const wasEnabled = previousAudioEnabledRef.current; + if (audioEnabled && !wasEnabled) { + reasoningCursorRef.current = state.streamingReasoning?.length ?? 0; + } + if (!audioEnabled && wasEnabled) { + resetAudio(); + } + previousAudioEnabledRef.current = audioEnabled; + }, [audioEnabled, resetAudio, state.streamingReasoning]); + // Error states if (!taskId) { @@ -129,6 +188,28 @@ export default function SaturnVisualSolver() { )} README + {audioAvailable && ( +
+ + setAudioVolume(parseFloat(e.target.value))} + className="range range-xs w-20" + disabled={!audioEnabled} + /> +
+ )} {isRunning && (