Skip to content

Commit

Permalink
pvspeaker for Python llm-voice-assistant
Browse files Browse the repository at this point in the history
  • Loading branch information
albho authored Aug 13, 2024
1 parent 4956de3 commit 384b3ba
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 37 deletions.
67 changes: 33 additions & 34 deletions recipes/llm-voice-assistant/python/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import signal
import time
from argparse import ArgumentParser
from collections import deque
from itertools import chain
from multiprocessing import (
Pipe,
Process,
Expand All @@ -15,6 +17,7 @@
import pvorca
import pvporcupine
from pvrecorder import PvRecorder
from pvspeaker import PvSpeaker


class RTFProfiler:
Expand Down Expand Up @@ -57,74 +60,70 @@ def tps(self) -> float:


def orca_worker(access_key: str, connection, warmup_sec: float, stream_frame_sec: int = 0.03) -> None:
# noinspection PyUnresolvedReferences
import numpy as np
from sounddevice import OutputStream

orca = pvorca.create(access_key=access_key)
orca_stream = orca.stream_open()

texts = list()
pcm_buffer = list()
pcm_deque = deque()
warmup = [False]
synthesize = False
flush = False
close = False
utterance_end_sec = 0.
delay_sec = [-1.]

def callback(data, _, __, ___) -> None:
if warmup[0]:
if len(pcm_buffer) < int(warmup_sec * orca.sample_rate):
data[:, 0] = 0
return
else:
warmup[0] = False

if len(pcm_buffer) < data.shape[0]:
pcm_buffer.extend([0] * (data.shape[0] - len(pcm_buffer)))

data[:, 0] = pcm_buffer[:data.shape[0]]
del pcm_buffer[:data.shape[0]]

stream = OutputStream(
samplerate=orca.sample_rate,
blocksize=int(stream_frame_sec * orca.sample_rate),
channels=1,
dtype='int16',
callback=callback)
speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=20)

connection.send({'version': orca.version})

orca_profiler = RTFProfiler(orca.sample_rate)

def buffer_pcm(x: Optional[Sequence[int]]) -> None:
if x is not None:
pcm_buffer.extend(x)
def buffer_pcm(pcm_chunk: Optional[Sequence[int]]) -> None:
if pcm_chunk is not None:
if delay_sec[0] == -1:
delay_sec[0] = time.perf_counter() - utterance_end_sec

pcm_deque.append(pcm_chunk)

def play_buffered_pcm() -> None:
if warmup[0]:
if len(list(chain.from_iterable(pcm_deque))) < int(warmup_sec * orca.sample_rate):
return
else:
warmup[0] = False

if len(pcm_deque) > 0:
pcm_chunk = list(chain.from_iterable(pcm_deque))
pcm_deque.clear()

written = speaker.write(pcm_chunk)
if written < len(pcm_chunk):
pcm_deque.appendleft(pcm_chunk[written:])

while True:
if synthesize and len(texts) > 0:
orca_profiler.tick()
pcm = orca_stream.synthesize(texts.pop(0))
orca_profiler.tock(pcm)
buffer_pcm(pcm)
play_buffered_pcm()
elif flush:
while len(texts) > 0:
orca_profiler.tick()
pcm = orca_stream.synthesize(texts.pop(0))
orca_profiler.tock(pcm)
buffer_pcm(pcm)
play_buffered_pcm()
orca_profiler.tick()
pcm = orca_stream.flush()
orca_profiler.tock(pcm)
buffer_pcm(pcm)
play_buffered_pcm()
connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]})
flush = False
while len(pcm_buffer) > 0:
time.sleep(stream_frame_sec)
stream.stop()
speaker.flush(list(chain.from_iterable(pcm_deque)))
pcm_deque.clear()
speaker.stop()
delay_sec[0] = -1
connection.send({'done': True})
elif close:
Expand All @@ -136,8 +135,8 @@ def buffer_pcm(x: Optional[Sequence[int]]) -> None:
message = connection.recv()
if message['command'] == 'synthesize':
texts.append(message['text'])
if not stream.active:
stream.start()
if not speaker.is_started:
speaker.start()
warmup[0] = True
utterance_end_sec = message['utterance_end_sec']
synthesize = True
Expand All @@ -147,7 +146,7 @@ def buffer_pcm(x: Optional[Sequence[int]]) -> None:
elif message['command'] == 'close':
close = True

stream.close()
speaker.delete()
orca_stream.close()
orca.delete()

Expand Down
3 changes: 1 addition & 2 deletions recipes/llm-voice-assistant/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
numpy
picollm==1.0.0
pvcheetah==2.0.1
pvorca==0.2.3
pvporcupine==3.0.2
pvrecorder==1.2.2
sounddevice
pvspeaker==1.0.2
3 changes: 2 additions & 1 deletion res/.lint/spell-check/dict.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
appendleft
Colour
Compat
dotdotdot
Expand All @@ -17,9 +18,9 @@ pvcheetah
pvorca
pvporcupine
pvrecorder
pvspeaker
samplerate
sdcard
sounddevice
Spannable
tock
tock
Expand Down

0 comments on commit 384b3ba

Please sign in to comment.