-
Notifications
You must be signed in to change notification settings - Fork 1
/
listen-and-reply-using-ChatGPT.py
155 lines (115 loc) · 3.58 KB
/
listen-and-reply-using-ChatGPT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#Developed by Naseem Amjad urdujini@gmail.com [Software Engineer]
import openai
import os
import pyaudio
import wave
import warnings
warnings.filterwarnings("ignore")
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
# Set parameters for recording
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
# Initialize PyAudio
audio = pyaudio.PyAudio()
# Open microphone stream
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
print(f"{bcolors.OKCYAN}Listening...")
# Record audio data
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print(f"{bcolors.OKCYAN}Finished Listening.")
# Stop the stream and close the audio object
stream.stop_stream()
stream.close()
audio.terminate()
# Save the audio data to a WAV file
wave_file = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wave_file.setnchannels(CHANNELS)
wave_file.setsampwidth(audio.get_sample_size(FORMAT))
wave_file.setframerate(RATE)
wave_file.writeframes(b''.join(frames))
wave_file.close()
from google.cloud import speech_v1p1beta1 as speech
# Set up authentication
client = speech.SpeechClient.from_service_account_json('key.json')
# Load audio file
with open('output.wav', 'rb') as audio_file:
content = audio_file.read()
# Configure recognition settings
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=44100,
language_code='en-US'
)
# Perform the transcription
response = client.recognize(config=config, audio=speech.RecognitionAudio(content=content))
suna=""
# Print the transcription
for result in response.results:
suna=result.alternatives[0].transcript
if suna is None or len(suna) == 0:
print(f"{bcolors.FAIL}Nothing heard")
exit()
print(f"{bcolors.OKGREEN}{suna}")
# Set your API key as an environment variable
openai.api_key = "sk-USE-YOUR-OWN-KEY"
# Generate a response from ChatGPT
model_engine = "text-davinci-003"
question = suna
prompt = "Describe " + question
print ("You asked about "+question)
# Generate a response
completion = openai.Completion.create(
engine=model_engine,
prompt=prompt,
max_tokens=1024,
n=1,
stop=None,
temperature=0.5,
)
response = completion.choices[0].text
print(f"{bcolors.BOLD}{response}")
from google.cloud import texttospeech
# Set the path to the service account key file
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "key4speech.json"
# Set up Google Cloud Text-to-Speech API client
client = texttospeech.TextToSpeechClient()
# Set the text input to be synthesized
input_text = texttospeech.SynthesisInput(text=response)
# Select the language and voice to be used for synthesis
voice = texttospeech.VoiceSelectionParams(
language_code="en-US",
ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
)
# Select the audio format
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
)
# Generate the request and get the response
response = client.synthesize_speech(
input=input_text,
voice=voice,
audio_config=audio_config
)
# Write the response audio content to an MP3 file
with open("output.mp3", "wb") as out_file:
out_file.write(response.audio_content)
#print("Audio content written to file 'output.mp3'")
# Play the MP3 file
import playsound
playsound.playsound("output.mp3")