Skip to content

Commit

Permalink
make various timeouts configurable via CLIs
Browse files Browse the repository at this point in the history
  • Loading branch information
baxtree committed Jun 17, 2024
1 parent f7f55ba commit 77dd7a9
Show file tree
Hide file tree
Showing 21 changed files with 414 additions and 325 deletions.
30 changes: 24 additions & 6 deletions subaligner/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
[-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
[-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
[-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
[-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}] [-tr {helsinki-nlp,whisper,facebook-mbart}]
[-tf TRANSLATION_FLAVOUR] [-lgs] [-d] [-q] [-ver]
[-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}] [-tr {helsinki-nlp,whisper,facebook-mbart}] [-tf TRANSLATION_FLAVOUR]
[-mpt MEDIA_PROCESS_TIMEOUT] [-sat SEGMENT_ALIGNMENT_TIMEOUT] [-lgs] [-d] [-q] [-ver]
Subaligner command line interface
Subaligner command line interface (v0.3.7)
optional arguments:
options:
-h, --help show this help message and exit
-s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
File path or URL to the subtitle file (Extensions of supported subtitles: .ttml, .ssa, .stl, .sbv, .dfxp, .srt, .txt, .ytt, .vtt, .sub, .sami, .xml, .scc, .ass, .smi, .tmp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
File path or URL to the subtitle file (Extensions of supported subtitles: .scc, .tmp, .sami, .stl, .ttml, .dfxp, .srt, .ssa, .ass, .sub, .sbv, .xml, .ytt, .smi, .txt, .vtt) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
-l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
Max global log loss for alignment
-so, --stretch_on Switch on stretch on subtitles)
Expand All @@ -38,6 +38,10 @@
LLM recipe used for translating subtitles
-tf TRANSLATION_FLAVOUR, --translation_flavour TRANSLATION_FLAVOUR
Flavour variation for a specific LLM recipe supporting translation
-mpt MEDIA_PROCESS_TIMEOUT, --media_process_timeout MEDIA_PROCESS_TIMEOUT
Maximum waiting time in seconds when processing media files
-sat SEGMENT_ALIGNMENT_TIMEOUT, --segment_alignment_timeout SEGMENT_ALIGNMENT_TIMEOUT
Maximum waiting time in seconds when aligning each segment
-lgs, --languages Print out language codes used for stretch and translation
-d, --debug Print out debugging information
-q, --quiet Switch off logging information
Expand Down Expand Up @@ -191,6 +195,20 @@ def main():
default=None,
help="Flavour variation for a specific LLM recipe supporting translation"
)
parser.add_argument(
"-mpt",
"--media_process_timeout",
type=int,
default=180,
help="Maximum waiting time in seconds when processing media files"
)
parser.add_argument(
"-sat",
"--segment_alignment_timeout",
type=int,
default=60,
help="Maximum waiting time in seconds when aligning each segment"
)
parser.add_argument("-lgs", "--languages", action="store_true",
help="Print out language codes used for stretch and translation")
parser.add_argument("-d", "--debug", action="store_true",
Expand Down Expand Up @@ -301,7 +319,7 @@ def main():
sys.exit(21)

voice_probabilities = None
predictor = Predictor()
predictor = Predictor(media_process_timeout=FLAGS.media_process_timeout, segment_alignment_timeout=FLAGS.segment_alignment_timeout)
if FLAGS.mode == "single":
aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
video_file_path=local_video_path,
Expand Down
2 changes: 1 addition & 1 deletion subaligner/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""The semver for the current release."""
__version__ = "0.3.6"
__version__ = "0.3.7"
51 changes: 24 additions & 27 deletions subaligner/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@

class FeatureEmbedder(object):
"""Audio and subtitle feature embedding.
Keyword Arguments:
n_mfcc {int} -- The number of MFCC components (default: {13}).
frequency {float} -- The sample rate (default: {16000}).
hop_len {int} -- The number of samples per frame (default: {512}).
step_sample {float} -- The space (in seconds) between the beginning of each sample (default: 1s / 25 FPS = 0.04s).
len_sample {float} -- The length in seconds for the input samples (default: {0.075}).
"""

def __init__(
Expand All @@ -21,16 +28,6 @@ def __init__(
step_sample: float = 0.04,
len_sample: float = 0.075,
) -> None:
"""Feature embedder initialiser.
Keyword Arguments:
n_mfcc {int} -- The number of MFCC components (default: {13}).
frequency {float} -- The sample rate (default: {16000}).
hop_len {int} -- The number of samples per frame (default: {512}).
step_sample {float} -- The space (in seconds) between the beginning of each sample (default: 1s / 25 FPS = 0.04s).
len_sample {float} -- The length in seconds for the input samples (default: {0.075}).
"""

self.__n_mfcc = n_mfcc # number of MFCC components
self.__frequency = frequency # sample rate
self.__hop_len = hop_len # number of samples per frame
Expand All @@ -50,7 +47,7 @@ def n_mfcc(self) -> int:
"""Get the number of MFCC components.
Returns:
int -- The number of MFCC components.
int: The number of MFCC components.
"""

return self.__n_mfcc
Expand All @@ -60,7 +57,7 @@ def frequency(self) -> int:
"""Get the sample rate.
Returns:
int -- The sample rate.
int: The sample rate.
"""

return self.__frequency
Expand All @@ -70,23 +67,23 @@ def hop_len(self) -> int:
"""Get the number of samples per frame.
Returns:
int -- The number of samples per frame.
int: The number of samples per frame.
"""

return self.__hop_len

@property
def step_sample(self) -> float:
"""The space (in seconds) between the begining of each sample.
"""The space (in seconds) between the beginning of each sample.
Returns:
float -- The space (in seconds) between the begining of each sample.
float: The space (in seconds) between the beginning of each sample.
"""

return self.__step_sample

@step_sample.setter
def step_sample(self, step_sample: int) -> None:
def step_sample(self, step_sample: float) -> None:
"""Configure the step sample
Arguments:
Expand All @@ -100,7 +97,7 @@ def len_sample(self) -> float:
"""Get the length in seconds for the input samples.
Returns:
float -- The length in seconds for the input samples.
float: The length in seconds for the input samples.
"""

return self.__item_time
Expand All @@ -113,7 +110,7 @@ def time_to_sec(cls, pysrt_time: SubRipTime) -> float:
pysrt_time {pysrt.SubRipTime} -- SubRipTime or coercible.
Returns:
float -- The number of seconds.
float: The number of seconds.
"""
# There is a weird bug in pysrt triggered by a programatically generated
# subtitle with start time "00:00:00,000". When it occurs, .millisecond
Expand All @@ -133,7 +130,7 @@ def get_len_mfcc(self) -> float:
"""Get the number of samples to get LEN_SAMPLE: LEN_SAMPLE/(HOP_LEN/FREQUENCY).
Returns:
float -- The number of samples.
float: The number of samples.
"""

return self.__len_sample / (self.__hop_len / self.__frequency)
Expand All @@ -142,7 +139,7 @@ def get_step_mfcc(self) -> float:
"""Get the number of samples to get STEP_SAMPLE: STEP_SAMPLE/(HOP_LEN/FREQUENCY).
Returns:
float -- The number of samples.
float: The number of samples.
"""

return self.__step_sample / (self.__hop_len / self.__frequency)
Expand All @@ -154,7 +151,7 @@ def time_to_position(self, pysrt_time: SubRipTime) -> int:
pysrt_time {pysrt.SubRipTime} -- SubRipTime or coercible.
Returns:
int -- The cell position.
int: The cell position.
"""

return int(
Expand All @@ -170,7 +167,7 @@ def duration_to_position(self, seconds: float) -> int:
seconds {float} -- The duration in seconds.
Returns:
int -- The cell position.
int: The cell position.
"""

return int(
Expand All @@ -184,7 +181,7 @@ def position_to_duration(self, position: int) -> float:
position {int} -- The cell position.
Returns:
float -- The number of seconds.
float: The number of seconds.
"""

return (
Expand All @@ -198,7 +195,7 @@ def position_to_time_str(self, position: int) -> str:
position {int} -- The cell position.
Returns:
string -- The time string (e.g., 01:23:20,150).
str: The time string (e.g., 01:23:20,150).
"""

td = timedelta(
Expand Down Expand Up @@ -247,11 +244,11 @@ def extract_data_and_label_from_audio(
Keyword Arguments:
subtitles {pysrt.SubRipFile} -- The SubRipFile object (default: {None}).
sound_effect_start_marker: {string} -- A string indicating the start of the ignored sound effect (default: {None}).
sound_effect_end_marker: {string} -- A string indicating the end of the ignored sound effect (default: {None}).
sound_effect_start_marker {string} -- A string indicating the start of the ignored sound effect (default: {None}).
sound_effect_end_marker {string} -- A string indicating the end of the ignored sound effect (default: {None}).
Returns:
tuple -- The training data and the training lables.
tuple: The training data and the training lables.
Raises:
TerminalException: Thrown when the subtitles are missing.
Expand Down
29 changes: 15 additions & 14 deletions subaligner/hparam_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,21 @@


class HyperParameterTuner(object):
"""Hyperparameter tuning using the Tree of Parzen Estimators algorithm"""
"""Hyperparameter tuning using the Tree of Parzen Estimators algorithm
Arguments:
av_file_paths {list}: A list of paths to the input audio/video files.
subtitle_file_paths {list}: A list of paths to the subtitle files.
training_dump_dir {string}: The directory of the training data dump file.
Keyword Arguments:
av_file_paths {List[str]} -- The list of audiovisual file paths.
subtitle_file_paths List[str] -- The list of subtitle files.
training_dump_dir: {string} -- The directory path of the training dump.
num_of_trials {int} -- The number of trials for tuning (default: {5}).
tuning_epochs {int} -- The number of training epochs for each trial (default: {5}).
network_type {string} -- The type of the network (default: {"lstm"}, range: ["lstm", "bi_lstm", "conv_1d"]).
"""

SEARCH_SPACE = {
"learning_rate": hp.loguniform("learning_rate", np.log(0.00001), np.log(0.1)),
Expand All @@ -30,19 +44,6 @@ def __init__(self,
tuning_epochs: int = 5,
network_type: str = Network.LSTM,
**kwargs) -> None:
"""Hyperparameter tuner initialiser
Arguments:
av_file_paths {list} -- A list of paths to the input audio/video files.
subtitle_file_paths {list} -- A list of paths to the subtitle files.
training_dump_dir {string} -- The directory of the training data dump file.
Keyword Arguments:
num_of_trials {int} -- The number of trials for tuning (default: {5}).
tuning_epochs {int} -- The number of training epochs for each trial (default: {5}).
network_type {string} -- The type of the network (default: {"lstm"}, range: ["lstm", "bi_lstm", "conv_1d"]).
"""

assert network_type in Network.TYPES, "Supported network type values: %s" % Network.TYPES
hyperparameters = Hyperparameters()
hyperparameters.network_type = network_type
Expand Down
15 changes: 8 additions & 7 deletions subaligner/hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ class Hyperparameters(object):
OPTIMIZERS = ["adadelta", "adagrad", "adam", "adamax", "ftrl", "nadam", "rmsprop", "sgd"]

def __init__(self) -> None:
"""Hyperparameters initialiser setting default values"""

self.__learning_rate = 0.001
self.__hidden_size = {
"front_layers": [64],
Expand All @@ -33,8 +31,11 @@ def __init__(self) -> None:
def __eq__(self, other: Any) -> bool:
"""Comparator for Hyperparameters objects
Arguments:
other {Any} -- Any comparable object
Returns:
bool -- If True, the compared hyperparameter object is the same
bool: If True, the compared hyperparameter object is the same
"""

if isinstance(other, Hyperparameters):
Expand Down Expand Up @@ -195,7 +196,7 @@ def to_json(self) -> str:
"""Serialise hyperparameters into JSON string
Returns:
string -- The serialised hyperparameters in JSON
str: The serialised hyperparameters in JSON
"""
return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)

Expand All @@ -212,7 +213,7 @@ def clone(self) -> "Hyperparameters":
"""Make a cloned hyperparameters object
Returns:
Hyperparameters -- The cloned Hyperparameters object.
Hyperparameters: The cloned Hyperparameters object.
"""
return self.from_json(self.to_json())

Expand All @@ -224,7 +225,7 @@ def from_json(cls, json_str: str) -> "Hyperparameters":
json_str {string} -- Hyperparameters in JSON.
Returns:
Hyperparameters -- The deserialised Hyperparameters object.
Hyperparameters: The deserialised Hyperparameters object.
"""
hp = cls()
hp.__dict__ = json.loads(json_str)
Expand All @@ -238,7 +239,7 @@ def from_file(cls, file_path: str) -> "Hyperparameters":
file_path {string} -- The path to the file containing hyperparameters.
Returns:
Hyperparameters -- The deserialised Hyperparameters object.
Hyperparameters: The deserialised Hyperparameters object.
"""
with open(file_path, "r", encoding="utf8") as file:
return cls.from_json(file.read())
Loading

0 comments on commit 77dd7a9

Please sign in to comment.