-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
115 lines (88 loc) · 3.93 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Libraries required for basic operation
import speech_recognition
import openai
import dotenv
import threading
# Libraries required for spawning listening and GUI subprocesses
import os
import signal
from subprocess import Popen
from enum import Enum
from multiprocessing.connection import Client, Listener
ACTIVATE_KEY = "hey there"
SHUTDOWN_KEY = "hey shut down"
# Enums represent state of speaker
# PASSIVE_LISTENING - Speaker is listening for ACTIVATE_KEY in transcribed speech
# ACTIVE_LISTENING - Speaker is listening for next speech prompt to send to OpenAI API
# SPEAKER - Speaker spawns speaking subprocess and plays audio
State = Enum('State', ['PASSIVE_LISTENING', 'ACTIVE_LISTENING', 'SPEAKING'])
def handle_updates():
pass
def activate_speaker():
pass
def main():
speech_recognizer = speech_recognition.Recognizer() # Initialize speech recognizer
openai.api_key = dotenv.dotenv_values()['OPEN_AI_API_KEY'] # Configure OpenAI API key
# Initial state starts at passive listening
current_state = State.PASSIVE_LISTENING
current_speaking_text = "" # Text sent to text to speech process
speaking_PID = None # Text to speech process ID
# Addresses for bi-directional IPC
window_address = ('localhost', 6000)
main_address = ('localhost', 6001)
# Set up listener on main application
main_listener = Listener(main_address, authkey=b"%s" % dotenv.dotenv_values()['MAIN_LISTENER_KEY'].encode())
# Spawn UI window
window_PID = Popen(["venv/Scripts/python", "window.py", str(os.getpid())]).pid
# Receive connection from window application
window_to_main_conn = main_listener.accept()
# Connect to listener on window application
main_to_window_conn = Client(window_address, authkey=b"%b" % dotenv.dotenv_values()['WINDOW_LISTENER_KEY'].encode())
while True:
print("Current state:", current_state.name)
# Obtain microphone audio data is state is passive listening or active listening
if current_state == State.PASSIVE_LISTENING or current_state == State.ACTIVE_LISTENING:
try:
with speech_recognition.Microphone() as mic:
speech_recognizer.adjust_for_ambient_noise(mic) # Establishes ambient noise level
audio = speech_recognizer.listen(mic, phrase_time_limit=5)
# print(audio.frame_data)
user_prompt_text = speech_recognizer.recognize_google(audio)
print("text:", user_prompt_text)
# This if statement only executes if speech exception is not thrown from listen function
# If audio is heard, kill current speaking process
if current_state == State.PASSIVE_LISTENING:
try:
if speaking_PID:
os.kill(speaking_PID, signal.SIGTERM)
speaking_PID = None
except WindowsError as e:
pass
# Only switch to ACTIVE_LISTENING state if ACTIVATE_KEY is in the user prompt
if ACTIVATE_KEY in user_prompt_text:
current_state = State.ACTIVE_LISTENING
main_to_window_conn.send("active")
elif SHUTDOWN_KEY in user_prompt_text:
try:
os.kill(window_PID, signal.SIGTERM)
return
except WindowsError as e:
pass
# If state is ACTIVE_LISTENING, send next prompt to OpenAI API, then switch to speaking state
elif current_state == State.ACTIVE_LISTENING:
completion = openai.ChatCompletion.create(model="gpt-4", messages=[{"role": "user", "content": f"{user_prompt_text}"}])
print(completion.choices[0].message.content)
current_speaking_text = completion.choices[0].message.content
current_state = State.SPEAKING
except Exception as e:
print("I couldn't recognize that, speak again")
current_state = State.PASSIVE_LISTENING
# If state is SPEAKING, spawn speaking subprocess,
elif current_state == State.SPEAKING:
print("creating tts subprocess...")
speaking_proc = Popen(["venv/Scripts/python", "speak.py", current_speaking_text])
speaking_PID = speaking_proc.pid
current_speaking_text = ""
current_state = State.PASSIVE_LISTENING
if __name__ == "__main__":
main()