Skip to content

Commit

Permalink
support whisper's turbo model for transcription
Browse files Browse the repository at this point in the history
  • Loading branch information
baxtree committed Oct 9, 2024
1 parent f305017 commit e652e2c
Show file tree
Hide file tree
Showing 8 changed files with 11 additions and 8 deletions.
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ mccabe = "==0.6.1"
numba = ">=0.50.0"
numpy = "<1.24.0"
oauthlib = "==3.1.0"
openai-whisper = "==20231117"
openai-whisper = "==20240930"
pbr = "==4.0.2"
pkgconfig = "~=1.5.5"
pluggy = "==0.13.1"
Expand All @@ -78,6 +78,7 @@ rsa = "==4.7"
scipy = "<1.11.0"
scikit-learn = ">=0.19.1"
sentencepiece = "~=0.1.95"
setuptools = "<65.0.0"
six = "~=1.15.0"
tensorflow = ">=1.15.5,<2.12"
termcolor = "==1.1.0"
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["setuptools", "wheel", "Cython"]
requires = ["setuptools<65.0.0", "wheel", "Cython"]

[tool.pydoclint]
style = 'sphinx'
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ radish-bdd~=0.13.3
pex<=2.1.80
mypy==1.3.0
types-requests==2.27.9
types-setuptools==57.4.9
types-setuptools==64.0.1
typing-extensions==4.5.0
parameterized==0.8.1
pylint~=2.17.2
Expand Down
2 changes: 1 addition & 1 deletion requirements-llm.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sentencepiece~=0.1.95
torch<2.3.0
transformers<4.37.0
openai-whisper==20231117
openai-whisper==20240930
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ PyYAML>=4.2b1
rsa==4.7
scipy<1.12.0
scikit-learn<1.2.0
setuptools<65.0.0
six~=1.15.0
tensorflow>=1.15.5,<2.13
termcolor==1.1.0
Expand Down
4 changes: 2 additions & 2 deletions subaligner/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
[-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
[-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
[-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}] [-tr {helsinki-nlp,whisper,facebook-mbart}] [-tf TRANSLATION_FLAVOUR]
[-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}] [-tr {helsinki-nlp,whisper,facebook-mbart}] [-tf TRANSLATION_FLAVOUR]
[-mpt MEDIA_PROCESS_TIMEOUT] [-sat SEGMENT_ALIGNMENT_TIMEOUT] [-lgs] [-d] [-q] [-ver]
Subaligner command line interface (v0.3.7)
Expand Down Expand Up @@ -32,7 +32,7 @@
Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
-mr {whisper}, --transcription_recipe {whisper}
LLM recipe used for transcribing video files
-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}
-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}
Flavour variation for a specific LLM recipe supporting transcription
-tr {helsinki-nlp,whisper,facebook-mbart}, --translation_recipe {helsinki-nlp,whisper,facebook-mbart}
LLM recipe used for translating subtitles
Expand Down
1 change: 1 addition & 0 deletions subaligner/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class WhisperFlavour(Enum):
LARGE_V2 = "large-v2"
LARGE_V3 = "large-v3"
LARGE = "large"
TURBO = "turbo"


class HelsinkiNLPFlavour(Enum):
Expand Down
4 changes: 2 additions & 2 deletions subaligner/subaligner_batch/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
[-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-od OUTPUT_DIRECTORY] [-of {srt,ytt,ttml,txt,smi,xml,ssa,ass,dfxp,sub,scc,tmp,sami,vtt,stl,sbv}] [-t TRANSLATE]
[-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
[-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}] [-lgs] [-d] [-q] [-ver]
[-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}] [-lgs] [-d] [-q] [-ver]
Batch align multiple subtitle files and audiovisual files
Expand Down Expand Up @@ -36,7 +36,7 @@
Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes]
-mr {whisper}, --transcription_recipe {whisper}
LLM recipe used for transcribing video files
-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}
-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}
Flavour variation for a specific LLM recipe supporting transcription
-lgs, --languages Print out language codes used for stretch and translation
-d, --debug Print out debugging information
Expand Down

0 comments on commit e652e2c

Please sign in to comment.