Ready to chat!
+Start a conversation by clicking the "Start Audio" button and speaking into your microphone.
+diff --git a/examples/speech-to-speech/Dockerfile b/examples/speech-to-speech/Dockerfile index c27225f..ff0daba 100644 --- a/examples/speech-to-speech/Dockerfile +++ b/examples/speech-to-speech/Dockerfile @@ -26,6 +26,7 @@ COPY pyproject.toml uv.lock \ COPY src/ ./src/ COPY examples/static/ ./examples/static/ COPY examples/speech-to-speech/ ./examples/speech-to-speech/ +COPY examples/static/ /app/static/ # Example app directory WORKDIR /app/examples/speech-to-speech diff --git a/examples/speech-to-speech/bot.py b/examples/speech-to-speech/bot.py index 23b09ea..bbef19b 100644 --- a/examples/speech-to-speech/bot.py +++ b/examples/speech-to-speech/bot.py @@ -56,14 +56,14 @@ async def create_pipeline_task(pipeline_metadata: PipelineMetadata): ) llm = NvidiaLLMService( - api_key=os.getenv("NVIDIA_API_KEY"), + api_key=os.getenv("NVIDIA_API_KEY", "not-needed-for-local-nim"), base_url=os.getenv("NVIDIA_LLM_URL", "https://integrate.api.nvidia.com/v1"), model=os.getenv("NVIDIA_LLM_MODEL", "meta/llama-3.1-8b-instruct"), ) stt = RivaASRService( server=os.getenv("RIVA_ASR_URL", "localhost:50051"), - api_key=os.getenv("NVIDIA_API_KEY"), + api_key=os.getenv("NVIDIA_API_KEY", "not-needed-for-local-nim"), language=os.getenv("RIVA_ASR_LANGUAGE", "en-US"), sample_rate=16000, model=os.getenv("RIVA_ASR_MODEL", "parakeet-1.1b-en-US-asr-streaming-silero-vad-asr-bls-ensemble"), @@ -71,7 +71,7 @@ async def create_pipeline_task(pipeline_metadata: PipelineMetadata): tts = RivaTTSService( server=os.getenv("RIVA_TTS_URL", "localhost:50051"), - api_key=os.getenv("NVIDIA_API_KEY"), + api_key=os.getenv("NVIDIA_API_KEY", "not-needed-for-local-nim"), voice_id=os.getenv("RIVA_TTS_VOICE_ID", "Magpie-Multilingual.EN-US.Sofia"), model=os.getenv("RIVA_TTS_MODEL", "magpie_tts_ensemble-Magpie-Multilingual"), language=os.getenv("RIVA_TTS_LANGUAGE", "en-US"), diff --git a/examples/static/audio-processor.js b/examples/static/audio-processor.js new file mode 100644 index 0000000..ab6a048 --- /dev/null +++ b/examples/static/audio-processor.js @@ -0,0 +1,34 @@ +class AudioProcessor extends AudioWorkletProcessor { + constructor(options) { + super(); + this.sampleRate = options.processorOptions.sampleRate || 16000; + this.numChannels = options.processorOptions.numChannels || 1; + } + + process(inputs, outputs, parameters) { + const input = inputs[0]; + + if (input && input.length > 0 && input[0]) { + const audioData = input[0]; // Get the first channel + + // Convert Float32Array to Int16Array (PCM S16) + const pcmS16Array = new Int16Array(audioData.length); + for (let i = 0; i < audioData.length; i++) { + const sample = Math.max(-1, Math.min(1, audioData[i])); + pcmS16Array[i] = sample < 0 ? sample * 0x8000 : sample * 0x7FFF; + } + + // Send the processed audio data to the main thread + this.port.postMessage({ + type: 'audioData', + data: pcmS16Array.buffer, + sampleRate: this.sampleRate, + numChannels: this.numChannels + }); + } + + return true; // Keep the processor alive + } +} + +registerProcessor('audio-processor', AudioProcessor); \ No newline at end of file diff --git a/examples/static/index.html b/examples/static/index.html index fa8dd4e..745d210 100644 --- a/examples/static/index.html +++ b/examples/static/index.html @@ -7,14 +7,526 @@ -
Powered by AI Speech Technology
+Start a conversation by clicking the "Start Audio" button and speaking into your microphone.
+