forked from hbarnard/mema
-
Notifications
You must be signed in to change notification settings - Fork 0
/
label_video.py
executable file
·101 lines (81 loc) · 3.43 KB
/
label_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
import os
import sys
from time import sleep
import board
import subprocess
import requests
import datetime
from pathlib import Path
import logging
import replicate
import sys
from configobj import ConfigObj
import memalib.mema_utility
from configobj import ConfigObj
#FIXME: need to adjust the sleep times and recording length!
def main():
pi = False
#FIXME: mema.ini produces strings! also no test on system name now, unreliable
if config['main']['pi'] == 'yes' :
pi = True
import board
# coloured LEDS on front of voice bonnet, for primitive feedback
from digitalio import DigitalInOut, Direction, Pull
import adafruit_dotstar
DOTSTAR_DATA = board.D5
DOTSTAR_CLOCK = board.D6
dots = adafruit_dotstar.DotStar(DOTSTAR_CLOCK, DOTSTAR_DATA, 3, brightness=0.2)
dots.deinit()
config = ConfigObj('etc/mema.ini')
logging.basicConfig(filename=config['main']['logfile_name'], format='%(asctime)s %(message)s', encoding='utf-8', level=logging.DEBUG)
phrase = config['en_prompts']['start_record'].replace(' ','_')
mu.curl_speak(phrase)
# red: feedback before stopping rhasspy
dots[0] = (0,0,255) if pi else None
# make a file name from the current unix timestamp
unix_time = docker_control('stop', 'mema_rhasspy')
file_path = config['main']['media_directory'] + "tmp/" + str(unix_time) + sys.argv[1] + ".wav"
sleep(2)
# green: ok to talk
dots[0] = (255,0,0) if pi else None
try:
record_command = config['main']['record_command'] + ' ' + file_path
record_array = record_command.split()
subprocess.run(record_array, check=True, capture_output=True, text=True).stdout
except subprocess.CalledProcessError as e:
raise RuntimeError("command '{}' return here with error (code {}): {}".format(e.cmd, e.returncode, e.output))
# red: end of speech
dots[0] = (0,0,255) if pi else None
unix_time = docker_control('start', 'mema_rhasspy')
sleep(config['main']['rhasspy_reload'])
mu.curl_speak(config['en_prompts']['end_record'])
result = {"transcription" : config['en_literals']['unlabelled_video']}
text = ''
#FIXME: 12/12/2022 test with internal whisper, real-soon
if config['main']['use_external_ai'] == 'yes' :
model = replicate.models.get("openai/whisper")
audio_file = Path(file_path)
result = model.predict(audio=audio_file)
text = result['transcription'][:30]
mu.curl_speak(config['en_prompts']['end_transcription'])
else:
#FIXME: this block can go to the library, sooner or later
transcribe_command = config['main']['transcribe_program'] + ' ' + file_path + ' > /tmp/transcription'
log.debug('transcribe command is ' + transcribe_command)
log.debug('result is ' + result)
subprocess.call(transcribe_command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
with open('/tmp/transcription') as trans: f = trans.read()
text = f[:30]
mu.curl_speak(config['en_prompts']['done'])
# green: its done
dots[0] = (255,0,0) if pi else None
sleep(3)
dots.deinit()
# delete temporary files
os.remove(file_path)
# return result and file path to intent server
# the format of reply is maintained although this doesn't keep a file
print(text + "|" + 'no_path')
if __name__ == '__main__':
main()