diff --git a/Pipfile b/Pipfile index 82d9a99..636657c 100644 --- a/Pipfile +++ b/Pipfile @@ -55,7 +55,7 @@ mccabe = "==0.6.1" numba = ">=0.50.0" numpy = "<1.24.0" oauthlib = "==3.1.0" -openai-whisper = "==20231117" +openai-whisper = "==20240930" pbr = "==4.0.2" pkgconfig = "~=1.5.5" pluggy = "==0.13.1" diff --git a/requirements-llm.txt b/requirements-llm.txt index ffc3ebe..e5024d2 100644 --- a/requirements-llm.txt +++ b/requirements-llm.txt @@ -1,4 +1,4 @@ sentencepiece~=0.1.95 torch<2.3.0 transformers<4.37.0 -openai-whisper==20231117 +openai-whisper==20240930 diff --git a/subaligner/__main__.py b/subaligner/__main__.py index 95b84dd..cf0a58d 100755 --- a/subaligner/__main__.py +++ b/subaligner/__main__.py @@ -4,7 +4,7 @@ [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}] [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS] [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}] - [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}] [-tr {helsinki-nlp,whisper,facebook-mbart}] [-tf TRANSLATION_FLAVOUR] + [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}] [-tr {helsinki-nlp,whisper,facebook-mbart}] [-tf TRANSLATION_FLAVOUR] [-mpt MEDIA_PROCESS_TIMEOUT] [-sat SEGMENT_ALIGNMENT_TIMEOUT] [-lgs] [-d] [-q] [-ver] Subaligner command line interface (v0.3.7) @@ -32,7 +32,7 @@ Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes] -mr {whisper}, --transcription_recipe {whisper} LLM recipe used for transcribing video files - -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large} + -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large} Flavour variation for a specific LLM recipe supporting transcription -tr {helsinki-nlp,whisper,facebook-mbart}, --translation_recipe {helsinki-nlp,whisper,facebook-mbart} LLM recipe used for translating subtitles diff --git a/subaligner/llm.py b/subaligner/llm.py index 190dd7d..36927bd 100644 --- a/subaligner/llm.py +++ b/subaligner/llm.py @@ -23,6 +23,7 @@ class WhisperFlavour(Enum): LARGE_V2 = "large-v2" LARGE_V3 = "large-v3" LARGE = "large" + TURBO = "turbo" class HelsinkiNLPFlavour(Enum): diff --git a/subaligner/subaligner_batch/__main__.py b/subaligner/subaligner_batch/__main__.py index a6f439c..76f43ae 100755 --- a/subaligner/subaligner_batch/__main__.py +++ b/subaligner/subaligner_batch/__main__.py @@ -4,7 +4,7 @@ [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}] [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-od OUTPUT_DIRECTORY] [-of {srt,ytt,ttml,txt,smi,xml,ssa,ass,dfxp,sub,scc,tmp,sami,vtt,stl,sbv}] [-t TRANSLATE] [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}] - [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}] [-lgs] [-d] [-q] [-ver] + [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}] [-lgs] [-d] [-q] [-ver] Batch align multiple subtitle files and audiovisual files @@ -36,7 +36,7 @@ Target video's main language as an ISO 639-3 language code [https://en.wikipedia.org/wiki/List_of_ISO_639-3_codes] -mr {whisper}, --transcription_recipe {whisper} LLM recipe used for transcribing video files - -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large} + -mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large,turbo}, --transcription_flavour {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large} Flavour variation for a specific LLM recipe supporting transcription -lgs, --languages Print out language codes used for stretch and translation -d, --debug Print out debugging information