Skip to content

Commit

Permalink
Merge pull request #216 from aramlang/blank_id-fix
Browse files Browse the repository at this point in the history
Enable Hebrew support
  • Loading branch information
m-bain authored May 4, 2023
2 parents 9fb5141 + cb53661 commit 1b62c61
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions whisperx/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"fa": "jonatasgrosman/wav2vec2-large-xlsr-53-persian",
"el": "jonatasgrosman/wav2vec2-large-xlsr-53-greek",
"tr": "mpoyraz/wav2vec2-xls-r-300m-cv7-turkish",
"he": "imvladikon/wav2vec2-xls-r-300m-hebrew",
}


Expand Down Expand Up @@ -231,8 +232,13 @@ def align(

emission = emissions[0].cpu().detach()

trellis = get_trellis(emission, tokens)
path = backtrack(trellis, emission, tokens)
blank_id = 0
for char, code in model_dictionary.items():
if char == '[pad]' or char == '<pad>':
blank_id = code

trellis = get_trellis(emission, tokens, blank_id)
path = backtrack(trellis, emission, tokens, blank_id)
if path is None:
print(f'Failed to align segment ("{segment["text"]}"): backtrack failed, resorting to original...')
break
Expand Down

0 comments on commit 1b62c61

Please sign in to comment.