Skip to content

Commit

Permalink
Handling no speech video
Browse files Browse the repository at this point in the history
  • Loading branch information
MaleicAcid committed Dec 11, 2024
1 parent 51dd6e4 commit f14eadf
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 8 deletions.
15 changes: 11 additions & 4 deletions openlrc/openlrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,12 +424,19 @@ def to_json(segments: List[Segment], name, lang):
'segments': []
}

for segment in segments:
if not segments:
result['segments'].append({
'start': segment.start,
'end': segment.end,
'text': segment.text
'start': 0.0,
'end': 5.0,
'text': "no speech found"
})
else:
for segment in segments:
result['segments'].append({
'start': segment.start,
'end': segment.end,
'text': segment.text
})

with open(name, 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=4)
Expand Down
10 changes: 6 additions & 4 deletions openlrc/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,12 @@ def transcribe(self, audio_path: Union[str, Path], language: Optional[str] = Non
if timestamps < info.duration: # silence at the end of the audio
pbar.update(info.duration - timestamps)

assert segments, f'No voice found for {audio_path}'

with Timer('Sentence Segmentation'):
result = self.sentence_split(segments, info.language)
if not segments:
logger.warning(f'No speech found for {audio_path}')
result = []
else:
with Timer('Sentence Segmentation'):
result = self.sentence_split(segments, info.language)

info = TranscriptionInfo(language=info.language, duration=get_audio_duration(audio_path),
duration_after_vad=info.duration_after_vad)
Expand Down
Binary file added tests/data/test_nospeech_video.mp4
Binary file not shown.
6 changes: 6 additions & 0 deletions tests/test_openlrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class TestLRCer(unittest.TestCase):
def setUp(self) -> None:
self.audio_path = Path('data/test_audio.wav')
self.video_path = Path('data/test_video.mp4')
self.nospeech_video_path = Path('data/test_nospeech_video.mp4')

def tearDown(self) -> None:
def clear_paths(input_path):
Expand Down Expand Up @@ -78,6 +79,11 @@ def test_video_file_transcription_translation(self):
result = lrcer.run('data/test_video.mp4')
self.assertTrue(result)

def test_nospeech_video_file_transcription_translation(self):
lrcer = LRCer(whisper_model='tiny', device='cpu', compute_type='default')
result = lrcer.run('data/test_nospeech_video.mp4')
self.assertTrue(result)

@patch('openlrc.translate.LLMTranslator.translate', MagicMock(side_effect=Exception('test exception')))
def test_translation_error(self):
lrcer = LRCer(whisper_model='tiny', device='cpu', compute_type='default')
Expand Down

0 comments on commit f14eadf

Please sign in to comment.