-
Notifications
You must be signed in to change notification settings - Fork 2
/
speech.py
133 lines (115 loc) · 3.64 KB
/
speech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from pynput import keyboard
import pyaudio
import time
import wave
import speech_recognition as sr
import base64
import requests
import json
CHUNK = 8192
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = 'output.wav'
AMBIENT_FILENAME = 'ambient.wav'
API_KEY = "AIzaSyDxPsx9KoRmeK5AnGczYulBC-Qf--RTKLE"
p = pyaudio.PyAudio()
frames = []
class AudioTranscripter:
def __init__(self):
self.listener = MyListener()
self.id = None
#self.google = GoogleTranscripter()
self.listener.start()
self.listener.stream.start_stream()
self.start()
time.sleep(1.5)
self.stop(AMBIENT_FILENAME)
file = sr.AudioFile(AMBIENT_FILENAME)
#with file as source:
#self.google.r.adjust_for_ambient_noise(source)
self.text_ready = False
def start(self):
self.listener.key_pressed = True
frames.clear()
def stop(self, filename=WAVE_OUTPUT_FILENAME):
self.listener.key_pressed = None
wf = wave.open(filename, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
if filename != AMBIENT_FILENAME:
try:
google_text = getText(filename)
print(google_text)
#self.id = resp['id']
return google_text
except Exception:
print("Couldn't Recognize")
return None
return None
class MyListener(keyboard.Listener):
def __init__(self):
super(MyListener, self).__init__(self.on_press, self.on_release)
self.key_pressed = None
self.stream = p.open(
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
stream_callback=self.callback,
)
def on_press(self, key):
if key == keyboard.Key.space:
self.key_pressed = True
def on_release(self, key):
if key == keyboard.Key.space:
self.key_pressed = False
def callback(
self,
in_data,
frame_count,
time_info,
status,
):
if self.key_pressed:
frames.append(in_data)
return (in_data, pyaudio.paContinue)
else:
return (in_data, pyaudio.paContinue)
sending_request ={
"config": {
"languageCode": "en-US",
"maxAlternatives": 1,
"profanityFilter": False
},
"audio": {
"content": ""
}
}
def getText(filename):
global API_KEY
file = open(filename, "rb")
file=file.read()
out = open("testfile.txt", "w")
out.write(base64.urlsafe_b64encode(file).decode("ascii"))
#audio = self.r.record(source)
sending_request["audio"]["content"] = base64.urlsafe_b64encode(file).decode("ascii")
try:
response = requests.post(
url='https://speech.googleapis.com/v1/speech:recognize?key={}'.format(API_KEY),
# import json module
# dumps the object to JSON
data=json.dumps(sending_request),
headers={'Content-Type': 'application/json'}
)
response.raise_for_status()
except requests.exceptions.HTTPError as err:
print(err)
#json_data = json.loads(response)
return json.loads(response.text)['results'][0]['alternatives'][0]['transcript']
#return json_data['alternatives'][0]['transcript']