-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinal.py
More file actions
159 lines (129 loc) · 6.71 KB
/
final.py
File metadata and controls
159 lines (129 loc) · 6.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import asyncio
from viam.robot.client import RobotClient
from viam.rpc.dial import Credentials, DialOptions
from viam.components.board import Board
from viam.components.camera import Camera
from viam.services.vision import VisionClient
from speech_service_api import SpeechService
import google.generativeai as genai
import speech_recognition as sr
async def connect():
opts = RobotClient.Options.with_api_key(
api_key='YOUR_API_KEY',
api_key_id='YOUR_API_KEY_ID'
)
return await RobotClient.at_address('samraspi4-main.7gp2t3f5ie.viam.cloud', opts)
async def recognize_speech():
mic = sr.Microphone(device_index = 0)
r = sr.Recognizer()
text = ''
with mic as source:
print('Say Sommething')
audio = r.listen(source)
print("Audio: ", audio)
text = r.recognize_whisper_api(audio, api_key="YOUR_API_KEY")
return text
async def main():
robot = await connect()
# make sure that your detector name in the app matches "myPeopleDetector"
myPeopleDetector = VisionClient.from_robot(robot, "myPeopleDetector")
# make sure that your camera name in the app matches "my-camera"
my_camera = Camera.from_robot(robot, name="cam")
#For Speechio
speech = SpeechService.from_robot(robot, name="speech")
api_key = "YOUR_API_KEY"
#print(api_key)
genai.configure(api_key=api_key)
#Chat model
model = genai.GenerativeModel('gemini-1.0-pro-latest')
chat = model.start_chat()
#Vision model
visionModel = genai.GenerativeModel('gemini-pro-vision')
#prompt instruction
answer_criteria = "Answer the question in no more than two lines."
#Person Detection loop
while (True):
print("Looking for People...")
img = await my_camera.get_image(mime_type="image/jpeg")
detections = await myPeopleDetector.get_detections(img)
#detecting person based on curstom threshold value
found = False
for d in detections:
if d.confidence > 0.5 and d.class_name.lower() == "person":
print("This is a person!")
found = True
#the interaction mode starts from here if the system founds someone
while (found):
await speech.say('Hello There, How May I helpyou today?', True)
print('Ready To Listen to know...........')
#Get User Response
user_input = await recognize_speech()
print('Recognized Speech: ', user_input)
#Chat loop
if 'tell me' in user_input.lower():
#constructing the prompt for chat model
prompt = answer_criteria + "question: " + user_input
print("Prompt: ", prompt)
response = chat.send_message(prompt)
print(response.text)
#get output via speaker upon getting reponse back from Gemini
if len(response.text) > 0:
await speech.say(response.text, True)
#Follow-Up loop
while (True):
await speech.say("Is there anything else I can help you with?", True)
print("Ready to Listen...")
follow_up_input = await recognize_speech()
print('follow_up_input: ', follow_up_input)
if "yes" in follow_up_input.lower():
follow_up_prompt = answer_criteria + "question: " + follow_up_input
print("Follow_up Prompt: ", follow_up_prompt)
response = chat.send_message(follow_up_prompt)
if len(response.text) > 0:
await speech.say(response.text, True) #continues the follow-up loop
elif "no" in follow_up_input.lower():
await speech.say("Cool. Will go back to our main Menu.", True)
break #breaks the follow-up loop
else:
await speech.say("Sorry, I didn't understand. Going back to main menu", True)
break #breaks the follow-up loop
#vision loop
elif 'picture' in user_input.lower():
cam_return_value = await my_camera.get_image()
print(f"cam get_image return value: {cam_return_value}")
response = visionModel.generate_content([cam_return_value, "Explain what is this image?"])
print(response.text)
if len(response.text) > 0:
await speech.say(response.text, True)
#Follow-Up loop
while (True):
await speech.say("Is there anything else I can help you with?", True)
print("Ready to Listen...")
follow_up_input = await recognize_speech()
print('follow_up_input: ', follow_up_input)
if "yes" in follow_up_input.lower():
#Chat model for Image questions
model = genai.GenerativeModel('gemini-1.0-pro-latest')
# chat model instance for follow-up questions for the taken image
chatImageInstance = model.start_chat()
#adds the previous response to the prompt to facilitates follow-up questions for the given image input
image_answer_criteria = "previous response: " + response.text+ "Use the context to answer the question. If answer can not found from the context you can use your knowledge to answer the question." + answer_criteria
follow_up_prompt = image_answer_criteria + "question: " + follow_up_input
print("Follow_up Prompt: ", follow_up_prompt)
response = chatImageInstance.send_message(follow_up_prompt)
if len(response.text) > 0:
await speech.say(response.text, True) #continues the follow-up loop
elif "no" in follow_up_input.lower():
await speech.say("Cool. Will go back to our main Menu.", True)
break #breaks the follow-up loop
else:
await speech.say("Sorry, I didn't understand. Going back to main menu", True)
break #breaks the follow-up loop
#end of interaction
else:
await speech.say("Cool. Hope you have a great rest of the day", True)
break
# Don't forget to close the machine when you're done!
await robot.close()
if __name__ == '__main__':
asyncio.run(main())