From ffcb600d0ff4e82d25a9cbb9974053e31dec9cbc Mon Sep 17 00:00:00 2001
From: Rianna Barett <riannabarrett11@gmail.com>
Date: Tue, 9 Jul 2024 09:37:00 -0400
Subject: [PATCH 1/4] wake word assistant with assistant api

---
 wake-word-assistant.py | 179 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 wake-word-assistant.py

diff --git a/wake-word-assistant.py b/wake-word-assistant.py
new file mode 100644
index 0000000..bbb00a3
--- /dev/null
+++ b/wake-word-assistant.py
@@ -0,0 +1,179 @@
+from dotenv import load_dotenv
+import sounddevice as sd
+import struct
+import numpy as np
+import tempfile
+import pvporcupine
+import wave
+import os
+import time
+from pvrecorder import PvRecorder
+import wavio
+from openai import OpenAI
+
+load_dotenv()
+
+# Retrieve the OpenAI API key and Porcupine access key from environment variables
+openai_api_key = os.getenv("OPENAI_API_KEY")
+porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY")
+assistant_api_key = os.getenv("ASSSISTANT_API_KEY")
+
+sd.default.device = None #'seeed-2mic-voicecard'
+ 
+if not openai_api_key:
+    raise ValueError("OpenAI API key is not set in environment variables.")
+if not porcupine_access_key:
+    raise ValueError("Porcupine access key is not set in environment variables.")
+
+# Initialize OpenAI client
+client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"})
+
+# Initialize Porcupine
+porcupine = pvporcupine.create(
+    access_key=porcupine_access_key,
+    keywords=["picovoice", "bumblebee"]
+)
+# paud = pyaudio.PyAudio()
+# audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length)
+
+def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500):
+    """
+    Record audio from the default microphone until silence is detected.
+    """
+    print("Recording... Press Ctrl+C to stop.")
+    audio_file = []
+
+    try:
+        while True:
+            recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16')
+            sd.wait()
+            audio_file.append(recording)
+
+            # Check if the last recorded chunk is silent
+            if is_silent(recording, silence_threshold):
+                print("Silence detected, stopping recording.")
+                break
+
+    except KeyboardInterrupt:
+        print("Recording stopped manually.")
+
+    if audio_file:
+        audio_file = np.concatenate(audio_file, axis=0)
+        return audio_file
+    else:
+        raise ValueError("No audio file recorded.")
+    
+def is_silent(file, threshold=500):
+
+    """
+    Returns True if the audio file is below the silent threshold.
+    """
+    return np.abs(file).mean() < threshold
+
+def query_and_record(prompt, mp3_filename):
+    """
+    Send a prompt to the OpenAI assistant and record the response as an MP3 file.
+    """
+    # # Create an assistant instance
+    # assistant = client.beta.assistants.create(
+    #     name="Senior Tech Help",
+    #     instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.",
+    #     model="gpt-4o"
+    # )
+    assistant_id = assistant_api_key
+
+    # Create a thread for communication
+    thread = client.beta.threads.create()
+    
+    # Send user's prompt to the AI
+    message = client.beta.threads.messages.create(
+        thread_id=thread.id,
+        role="user",
+        content=prompt
+    )
+    
+    # Start the AI to process the user prompt
+    run = client.beta.threads.runs.create(
+        thread_id=thread.id,
+        assistant_id=assistant_id,
+        instructions="Please address the user as Jane Doe. The user has a premium account."
+    )
+    
+    # Wait until AI is complete with processing
+    while run.status in ["in_progress", "queued"]:
+        time.sleep(1)
+        run = client.beta.threads.runs.retrieve(
+            thread_id=thread.id,
+            run_id=run.id
+        )
+
+    if run.status == "completed":
+        message_list = client.beta.threads.messages.list(
+            thread_id=thread.id
+        )
+
+        # Extract the text content from the response
+        # text_response = ""
+        # for message in message_list.data:
+        #     if message.role == "assistant" and message.content:
+        #         text_response += message.content + "\n"
+
+        text_response = message_list.data[-1].content
+
+        # Generate an audio response from the text
+        response = client.audio.speech.create(
+            model="tts-1-hd",
+            voice="echo",
+            input=text_response,
+        )
+
+        response.stream_to_file(mp3_filename)
+
+        print("Response recorded to " + mp3_filename)
+
+
+# Main loop for keyword detection and interaction
+recorder = PvRecorder(frame_length=porcupine.frame_length)
+recorder.start()
+wav_file = None
+
+try:
+    while True:
+        pcm = recorder.read()
+        keyword_index = porcupine.process(pcm)
+
+        if keyword_index == 0:
+            print("Detected 'picovoice'")
+        elif keyword_index == 1:
+            print("Detected 'bumblebee'")
+
+            # Record audio from the microphone
+            audio_file = record_audio()
+
+            # Convert audio to text using OpenAI API
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
+                tmpfilename = tmpfile.name
+                wavio.write(tmpfilename, audio_file, 44100, sampwidth=2)
+
+            transcription = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=open(tmpfilename, "rb"),
+            )
+
+            print("Transcription:", transcription.text)
+
+            # Example usage
+            prompt = transcription.text
+            mp3_filename = "response.mp3"
+            query_and_record(prompt, mp3_filename)
+
+except KeyboardInterrupt:
+    print("Script interrupted.")
+finally:
+# Ensuring proper release of resources
+    if porcupine is not None:
+        porcupine.delete()
+    # if audio_frame is not None:
+    #     audio_frame.close()
+    # if paud is not None:
+    #     paud.terminate()

From 817bcd47ab76315dc5a7e4fd7521904022c84c73 Mon Sep 17 00:00:00 2001
From: RiannaBarrett <riannabarrett11@gmail.com>
Date: Wed, 10 Jul 2024 09:39:30 -0400
Subject: [PATCH 2/4] working with assistant link

---
 wake-word-assistant.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wake-word-assistant.py b/wake-word-assistant.py
index bbb00a3..da94862 100644
--- a/wake-word-assistant.py
+++ b/wake-word-assistant.py
@@ -16,7 +16,7 @@
 # Retrieve the OpenAI API key and Porcupine access key from environment variables
 openai_api_key = os.getenv("OPENAI_API_KEY")
 porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY")
-assistant_api_key = os.getenv("ASSSISTANT_API_KEY")
+assistant_api_key = os.getenv("ASSISTANT_API_KEY")
 
 sd.default.device = None #'seeed-2mic-voicecard'
  
@@ -118,7 +118,7 @@ def query_and_record(prompt, mp3_filename):
         #     if message.role == "assistant" and message.content:
         #         text_response += message.content + "\n"
 
-        text_response = message_list.data[-1].content
+        text_response = message_list.data[0].content[0].text.value
 
         # Generate an audio response from the text
         response = client.audio.speech.create(

From 327a0d401b7dcf7d5297dfc6a69b8327f44eab49 Mon Sep 17 00:00:00 2001
From: RiannaBarrett <riannabarrett11@gmail.com>
Date: Wed, 10 Jul 2024 10:46:10 -0400
Subject: [PATCH 3/4] MP3 response plays once generated

---
 wake-word-assistant.py | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/wake-word-assistant.py b/wake-word-assistant.py
index da94862..e1040d8 100644
--- a/wake-word-assistant.py
+++ b/wake-word-assistant.py
@@ -10,6 +10,7 @@
 from pvrecorder import PvRecorder
 import wavio
 from openai import OpenAI
+import pygame
 
 load_dotenv()
 
@@ -19,7 +20,7 @@
 assistant_api_key = os.getenv("ASSISTANT_API_KEY")
 
 sd.default.device = None #'seeed-2mic-voicecard'
- 
+
 if not openai_api_key:
     raise ValueError("OpenAI API key is not set in environment variables.")
 if not porcupine_access_key:
@@ -33,10 +34,8 @@
     access_key=porcupine_access_key,
     keywords=["picovoice", "bumblebee"]
 )
-# paud = pyaudio.PyAudio()
-# audio_frame = paud.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length)
 
-def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500):
+def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000):
     """
     Record audio from the default microphone until silence is detected.
     """
@@ -64,17 +63,16 @@ def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=500):
         raise ValueError("No audio file recorded.")
     
 def is_silent(file, threshold=500):
-
     """
     Returns True if the audio file is below the silent threshold.
     """
     return np.abs(file).mean() < threshold
 
-def query_and_record(prompt, mp3_filename):
+def query_and_record(prompt):
     """
     Send a prompt to the OpenAI assistant and record the response as an MP3 file.
     """
-    # # Create an assistant instance
+     # # Create an assistant instance
     # assistant = client.beta.assistants.create(
     #     name="Senior Tech Help",
     #     instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.",
@@ -111,13 +109,13 @@ def query_and_record(prompt, mp3_filename):
         message_list = client.beta.threads.messages.list(
             thread_id=thread.id
         )
-
-        # Extract the text content from the response
+         # Extract the text content from the response
         # text_response = ""
         # for message in message_list.data:
         #     if message.role == "assistant" and message.content:
         #         text_response += message.content + "\n"
 
+
         text_response = message_list.data[0].content[0].text.value
 
         # Generate an audio response from the text
@@ -127,15 +125,28 @@ def query_and_record(prompt, mp3_filename):
             input=text_response,
         )
 
-        response.stream_to_file(mp3_filename)
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile:
+            mp3_filename = tmpfile.name
+            response.stream_to_file(mp3_filename)
 
         print("Response recorded to " + mp3_filename)
 
+        # Initialize pygame mixer
+        pygame.mixer.init()
+
+        # Load the mp3 file
+        pygame.mixer.music.load(mp3_filename)
+
+        # Play the mp3 file
+        pygame.mixer.music.play()
+
+        # Wait until the music finishes playing
+        while pygame.mixer.music.get_busy():
+            pygame.time.Clock().tick(10)
 
 # Main loop for keyword detection and interaction
 recorder = PvRecorder(frame_length=porcupine.frame_length)
 recorder.start()
-wav_file = None
 
 try:
     while True:
@@ -164,8 +175,7 @@ def query_and_record(prompt, mp3_filename):
 
             # Example usage
             prompt = transcription.text
-            mp3_filename = "response.mp3"
-            query_and_record(prompt, mp3_filename)
+            query_and_record(prompt)
 
 except KeyboardInterrupt:
     print("Script interrupted.")
@@ -173,7 +183,5 @@ def query_and_record(prompt, mp3_filename):
 # Ensuring proper release of resources
     if porcupine is not None:
         porcupine.delete()
-    # if audio_frame is not None:
-    #     audio_frame.close()
-    # if paud is not None:
-    #     paud.terminate()
+    recorder.stop()
+    recorder.delete()

From 0e4ce4235ca6a472fdb885d5ed596ed51566d4e2 Mon Sep 17 00:00:00 2001
From: RiannaBarrett <riannabarrett11@gmail.com>
Date: Thu, 11 Jul 2024 12:44:23 -0400
Subject: [PATCH 4/4] Create New Thread when voice AI is started

---
 wake-word-thread.py | 202 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 202 insertions(+)
 create mode 100644 wake-word-thread.py

diff --git a/wake-word-thread.py b/wake-word-thread.py
new file mode 100644
index 0000000..7e91f51
--- /dev/null
+++ b/wake-word-thread.py
@@ -0,0 +1,202 @@
+from dotenv import load_dotenv
+import sounddevice as sd
+import struct
+import numpy as np
+import tempfile
+import pvporcupine
+import wave
+import os
+import time
+from pvrecorder import PvRecorder
+import wavio
+from openai import OpenAI
+import pygame
+import threading
+
+load_dotenv()
+
+# Retrieve the OpenAI API key and Porcupine access key from environment variables
+openai_api_key = os.getenv("OPENAI_API_KEY")
+porcupine_access_key = os.getenv("PORCUPINE_ACCESS_KEY")
+assistant_api_key = os.getenv("ASSISTANT_API_KEY")
+
+sd.default.device = None  # 'seeed-2mic-voicecard'
+
+if not openai_api_key:
+    raise ValueError("OpenAI API key is not set in environment variables.")
+if not porcupine_access_key:
+    raise ValueError("Porcupine access key is not set in environment variables.")
+
+# Initialize OpenAI client
+client = OpenAI(api_key=openai_api_key, default_headers={"OpenAI-Beta": "assistants=v2"})
+
+# Initialize Porcupine
+porcupine = pvporcupine.create(
+    access_key=porcupine_access_key,
+    keywords=["picovoice", "bumblebee"]
+)
+
+thread_id = None  #store the thread ID
+
+def record_audio(samplerate=44100, chunk_duration=1, silence_threshold=2000):
+    """
+    Record audio from the default microphone until silence is detected.
+    """
+    print("Recording... Press Ctrl+C to stop.")
+    audio_file = []
+
+    try:
+        while True:
+            recording = sd.rec(int(chunk_duration * samplerate), samplerate=samplerate, channels=1, dtype='int16')
+            sd.wait()
+            audio_file.append(recording)
+
+            # Check if the last recorded chunk is silent
+            if is_silent(recording, silence_threshold):
+                print("Silence detected, stopping recording.")
+                break
+
+    except KeyboardInterrupt:
+        print("Recording stopped manually.")
+
+    if audio_file:
+        audio_file = np.concatenate(audio_file, axis=0)
+        return audio_file
+    else:
+        raise ValueError("No audio file recorded.")
+    
+def is_silent(file, threshold=500):
+    """
+    Returns True if the audio file is below the silent threshold.
+    """
+    return np.abs(file).mean() < threshold
+
+def query_and_record(prompt):
+    """
+    Send a prompt to the OpenAI assistant and record the response as an MP3 file.
+    """
+     # # Create an assistant instance
+    # assistant = client.beta.assistants.create(
+    #     name="Senior Tech Help",
+    #     instructions="You are a helpful tech teacher specifically for seniors. You will help older adults (ages 50+) with quick questions about smartphones, voice assistants, computers, cameras, the internet, digital shopping, or any other technology-related topic. You will always ask for specifics, like what device or phone they are using, and provide them with step-by-step instructions for their response.",
+    #     model="gpt-4o"
+    # )
+    assistant_id = assistant_api_key
+
+    global thread_id  # Access the global thread ID
+
+    if thread_id is None:
+        # Create a thread for communication
+        thread = client.beta.threads.create()
+        thread_id = thread.id
+        print(f"New thread created with ID: {thread_id}")
+    else:
+        # Retrieve the existing thread
+        thread = client.beta.threads.retrieve(thread_id)
+        print(f"Using existing thread with ID: {thread_id}")
+
+    # Send user's prompt to the AI
+    message = client.beta.threads.messages.create(
+        thread_id=thread.id,
+        role="user",
+        content=prompt
+    )
+    
+    # Start the AI to process the user prompt
+    run = client.beta.threads.runs.create(
+        thread_id=thread.id,
+        assistant_id=assistant_id,
+        instructions="Please address the user as Jane Doe. The user has a premium account."
+    )
+    
+    # Wait until AI is complete with processing
+    while run.status in ["in_progress", "queued"]:
+        time.sleep(1)
+        run = client.beta.threads.runs.retrieve(
+            thread_id=thread.id,
+            run_id=run.id
+        )
+
+    if run.status == "completed":
+        message_list = client.beta.threads.messages.list(
+            thread_id=thread.id
+        )
+
+        text_response = message_list.data[0].content[0].text.value
+
+        # Generate an audio response from the text
+        response = client.audio.speech.create(
+            model="tts-1-hd",
+            voice="echo",
+            input=text_response,
+        )
+
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmpfile:
+            mp3_filename = tmpfile.name
+            response.stream_to_file(mp3_filename)
+
+        print("Response recorded to " + mp3_filename)
+
+        # Initialize pygame mixer
+        pygame.mixer.init()
+
+        # Load the mp3 file
+        pygame.mixer.music.load(mp3_filename)
+
+        # Play the mp3 file
+        pygame.mixer.music.play()
+
+        # Wait until the response finishes playing
+        while pygame.mixer.music.get_busy():
+            pygame.time.Clock().tick(10)
+
+def handle_interaction(prompt):
+    """
+    Handle the interaction with the AI in a separate thread.
+    """
+    query_and_record(prompt)
+
+# Main loop for keyword detection and interaction
+recorder = PvRecorder(frame_length=porcupine.frame_length)
+recorder.start()
+
+try:
+    while True:
+        pcm = recorder.read()
+        keyword_index = porcupine.process(pcm)
+
+        if keyword_index == 0:
+            print("Detected 'picovoice'")
+        elif keyword_index == 1:
+            print("Detected 'bumblebee'")
+
+            # Record audio from the microphone
+            audio_file = record_audio()
+
+            # Convert audio to text using OpenAI API
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
+                tmpfilename = tmpfile.name
+                wavio.write(tmpfilename, audio_file, 44100, sampwidth=2)
+
+            transcription = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=open(tmpfilename, "rb"),
+            )
+
+            print("Transcription:", transcription.text)
+
+            # Example usage
+            prompt = transcription.text
+
+            # Start a new thread for handling the interaction
+            interaction_thread = threading.Thread(target=handle_interaction, args=(prompt,))
+            interaction_thread.start()
+
+except KeyboardInterrupt:
+    print("Script interrupted.")
+finally:
+# Ensuring proper release of resources
+    if porcupine is not None:
+        porcupine.delete()
+    recorder.stop()
+    recorder.delete()