From ffcb600d0ff4e82d25a9cbb9974053e31dec9cbc Mon Sep 17 00:00:00 2001 From: Rianna Barett Date: Tue, 9 Jul 2024 09:37:00 -0400 Subject: [PATCH 1/4] wake word assistant with assistant api --- wake-word-assistant.py | 179 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 wake-word-assistant.py diff --git a/wake-word-assistant.py b/wake-word-assistant.py new file mode 100644 index 0000000..bbb00a3 --- /dev/null +++ b/wake-word-assistant.py @@ -0,0 +1,179 @@ +from dotenv import load_dotenv +import sounddevice as sd +import struct +import numpy as np +import tempfile +import pvporcupine +import wave +import os +import time +from pvrecorder import PvRecorder +import wavio +from openai import OpenAI + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") +assistant_api_key = os.getenv("ASSSISTANT_API_KEY") + +sd.default.device = None #'seeed-2mic-voicecard' + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) +# paud = pyaudio.PyAudio() +# audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length) + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500): + """ + Record audio from the default microphone until silence is detected. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + print("Silence detected, stopping recording.") + break + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +def is_silent(file, threshold=500): + + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold + +def query_and_record(prompt, mp3_filename): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + # # Create an assistant instance + # assistant = client.beta.assistants.create( + # name="Senior Tech Help", + # instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", + # model="gpt-4o" + # ) + assistant_id = assistant_api_key + + # Create a thread for communication + thread = client.beta.threads.create() + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + # Extract the text content from the response + # text_response = "" + # for message in message_list.data: + # if message.role == "assistant" and message.content: + # text_response += message.content + "\n" + + text_response = message_list.data[-1].content + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + + +# Main loop for keyword detection and interaction +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() +wav_file = None + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone + audio_file = record_audio() + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Example usage + prompt = transcription.text + mp3_filename = "response.mp3" + query_and_record(prompt, mp3_filename) + +except KeyboardInterrupt: + print("Script interrupted.") +finally: +# Ensuring proper release of resources + if porcupine is not None: + porcupine.delete() + # if audio_frame is not None: + # audio_frame.close() + # if paud is not None: + # paud.terminate() From 817bcd47ab76315dc5a7e4fd7521904022c84c73 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Wed, 10 Jul 2024 09:39:30 -0400 Subject: [PATCH 2/4] working with assistant link --- wake-word-assistant.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wake-word-assistant.py b/wake-word-assistant.py index bbb00a3..da94862 100644 --- a/wake-word-assistant.py +++ b/wake-word-assistant.py @@ -16,7 +16,7 @@ # Retrieve the OpenAI API key and Porcupine access key from environment variables openai_api_key = os.getenv("OPENAI_API_KEY") porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") -assistant_api_key = os.getenv("ASSSISTANT_API_KEY") +assistant_api_key = os.getenv("ASSISTANT_API_KEY") sd.default.device = None #'seeed-2mic-voicecard' @@ -118,7 +118,7 @@ def query_and_record(prompt, mp3_filename): # if message.role == "assistant" and message.content: # text_response += message.content + "\n" - text_response = message_list.data[-1].content + text_response = message_list.data[0].content[0].text.value # Generate an audio response from the text response = client.audio.speech.create( From 327a0d401b7dcf7d5297dfc6a69b8327f44eab49 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Wed, 10 Jul 2024 10:46:10 -0400 Subject: [PATCH 3/4] MP3 response plays once generated --- wake-word-assistant.py | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/wake-word-assistant.py b/wake-word-assistant.py index da94862..e1040d8 100644 --- a/wake-word-assistant.py +++ b/wake-word-assistant.py @@ -10,6 +10,7 @@ from pvrecorder import PvRecorder import wavio from openai import OpenAI +import pygame load_dotenv() @@ -19,7 +20,7 @@ assistant_api_key = os.getenv("ASSISTANT_API_KEY") sd.default.device = None #'seeed-2mic-voicecard' - + if not openai_api_key: raise ValueError("OpenAI API key is not set in environment variables.") if not porcupine_access_key: @@ -33,10 +34,8 @@ access_key=porcupine_access_key, keywords=["picovoice", "bumblebee"] ) -# paud = pyaudio.PyAudio() -# audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length) -def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500): +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000): """ Record audio from the default microphone until silence is detected. """ @@ -64,17 +63,16 @@ def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500): raise ValueError("No audio file recorded.") def is_silent(file, threshold=500): - """ Returns True if the audio file is below the silent threshold. """ return np.abs(file).mean() < threshold -def query_and_record(prompt, mp3_filename): +def query_and_record(prompt): """ Send a prompt to the OpenAI assistant and record the response as an MP3 file. """ - # # Create an assistant instance + # # Create an assistant instance # assistant = client.beta.assistants.create( # name="Senior Tech Help", # instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", @@ -111,13 +109,13 @@ def query_and_record(prompt, mp3_filename): message_list = client.beta.threads.messages.list( thread_id=thread.id ) - - # Extract the text content from the response + # Extract the text content from the response # text_response = "" # for message in message_list.data: # if message.role == "assistant" and message.content: # text_response += message.content + "\n" + text_response = message_list.data[0].content[0].text.value # Generate an audio response from the text @@ -127,15 +125,28 @@ def query_and_record(prompt, mp3_filename): input=text_response, ) - response.stream_to_file(mp3_filename) + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: + mp3_filename = tmpfile.name + response.stream_to_file(mp3_filename) print("Response recorded to " + mp3_filename) + # Initialize pygame mixer + pygame.mixer.init() + + # Load the mp3 file + pygame.mixer.music.load(mp3_filename) + + # Play the mp3 file + pygame.mixer.music.play() + + # Wait until the music finishes playing + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) # Main loop for keyword detection and interaction recorder = PvRecorder(frame_length=porcupine.frame_length) recorder.start() -wav_file = None try: while True: @@ -164,8 +175,7 @@ def query_and_record(prompt, mp3_filename): # Example usage prompt = transcription.text - mp3_filename = "response.mp3" - query_and_record(prompt, mp3_filename) + query_and_record(prompt) except KeyboardInterrupt: print("Script interrupted.") @@ -173,7 +183,5 @@ def query_and_record(prompt, mp3_filename): # Ensuring proper release of resources if porcupine is not None: porcupine.delete() - # if audio_frame is not None: - # audio_frame.close() - # if paud is not None: - # paud.terminate() + recorder.stop() + recorder.delete() From 0e4ce4235ca6a472fdb885d5ed596ed51566d4e2 Mon Sep 17 00:00:00 2001 From: RiannaBarrett Date: Thu, 11 Jul 2024 12:44:23 -0400 Subject: [PATCH 4/4] Create New Thread when voice AI is started --- wake-word-thread.py | 202 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 wake-word-thread.py diff --git a/wake-word-thread.py b/wake-word-thread.py new file mode 100644 index 0000000..7e91f51 --- /dev/null +++ b/wake-word-thread.py @@ -0,0 +1,202 @@ +from dotenv import load_dotenv +import sounddevice as sd +import struct +import numpy as np +import tempfile +import pvporcupine +import wave +import os +import time +from pvrecorder import PvRecorder +import wavio +from openai import OpenAI +import pygame +import threading + +load_dotenv() + +# Retrieve the OpenAI API key and Porcupine access key from environment variables +openai_api_key = os.getenv("OPENAI_API_KEY") +porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY") +assistant_api_key = os.getenv("ASSISTANT_API_KEY") + +sd.default.device = None # 'seeed-2mic-voicecard' + +if not openai_api_key: + raise ValueError("OpenAI API key is not set in environment variables.") +if not porcupine_access_key: + raise ValueError("Porcupine access key is not set in environment variables.") + +# Initialize OpenAI client +client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"}) + +# Initialize Porcupine +porcupine = pvporcupine.create( + access_key=porcupine_access_key, + keywords=["picovoice", "bumblebee"] +) + +thread_id = None #store the thread ID + +def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000): + """ + Record audio from the default microphone until silence is detected. + """ + print("Recording... Press Ctrl+C to stop.") + audio_file = [] + + try: + while True: + recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16') + sd.wait() + audio_file.append(recording) + + # Check if the last recorded chunk is silent + if is_silent(recording, silence_threshold): + print("Silence detected, stopping recording.") + break + + except KeyboardInterrupt: + print("Recording stopped manually.") + + if audio_file: + audio_file = np.concatenate(audio_file, axis=0) + return audio_file + else: + raise ValueError("No audio file recorded.") + +def is_silent(file, threshold=500): + """ + Returns True if the audio file is below the silent threshold. + """ + return np.abs(file).mean() < threshold + +def query_and_record(prompt): + """ + Send a prompt to the OpenAI assistant and record the response as an MP3 file. + """ + # # Create an assistant instance + # assistant = client.beta.assistants.create( + # name="Senior Tech Help", + # instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.", + # model="gpt-4o" + # ) + assistant_id = assistant_api_key + + global thread_id # Access the global thread ID + + if thread_id is None: + # Create a thread for communication + thread = client.beta.threads.create() + thread_id = thread.id + print(f"New thread created with ID: {thread_id}") + else: + # Retrieve the existing thread + thread = client.beta.threads.retrieve(thread_id) + print(f"Using existing thread with ID: {thread_id}") + + # Send user's prompt to the AI + message = client.beta.threads.messages.create( + thread_id=thread.id, + role="user", + content=prompt + ) + + # Start the AI to process the user prompt + run = client.beta.threads.runs.create( + thread_id=thread.id, + assistant_id=assistant_id, + instructions="Please address the user as Jane Doe. The user has a premium account." + ) + + # Wait until AI is complete with processing + while run.status in ["in_progress", "queued"]: + time.sleep(1) + run = client.beta.threads.runs.retrieve( + thread_id=thread.id, + run_id=run.id + ) + + if run.status == "completed": + message_list = client.beta.threads.messages.list( + thread_id=thread.id + ) + + text_response = message_list.data[0].content[0].text.value + + # Generate an audio response from the text + response = client.audio.speech.create( + model="tts-1-hd", + voice="echo", + input=text_response, + ) + + with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile: + mp3_filename = tmpfile.name + response.stream_to_file(mp3_filename) + + print("Response recorded to " + mp3_filename) + + # Initialize pygame mixer + pygame.mixer.init() + + # Load the mp3 file + pygame.mixer.music.load(mp3_filename) + + # Play the mp3 file + pygame.mixer.music.play() + + # Wait until the response finishes playing + while pygame.mixer.music.get_busy(): + pygame.time.Clock().tick(10) + +def handle_interaction(prompt): + """ + Handle the interaction with the AI in a separate thread. + """ + query_and_record(prompt) + +# Main loop for keyword detection and interaction +recorder = PvRecorder(frame_length=porcupine.frame_length) +recorder.start() + +try: + while True: + pcm = recorder.read() + keyword_index = porcupine.process(pcm) + + if keyword_index == 0: + print("Detected 'picovoice'") + elif keyword_index == 1: + print("Detected 'bumblebee'") + + # Record audio from the microphone + audio_file = record_audio() + + # Convert audio to text using OpenAI API + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile: + tmpfilename = tmpfile.name + wavio.write(tmpfilename, audio_file, 44100, sampwidth=2) + + transcription = client.audio.transcriptions.create( + model="whisper-1", + file=open(tmpfilename, "rb"), + ) + + print("Transcription:", transcription.text) + + # Example usage + prompt = transcription.text + + # Start a new thread for handling the interaction + interaction_thread = threading.Thread(target=handle_interaction, args=(prompt,)) + interaction_thread.start() + +except KeyboardInterrupt: + print("Script interrupted.") +finally: +# Ensuring proper release of resources + if porcupine is not None: + porcupine.delete() + recorder.stop() + recorder.delete()