make various timeouts configurable via CLIs

baxtree · Jun 17, 2024 · 77dd7a9 · 77dd7a9
1 parent f7f55ba
commit 77dd7a9
Show file tree

Hide file tree

Showing 21 changed files with 414 additions and 325 deletions.
diff --git a/subaligner/__main__.py b/subaligner/__main__.py
@@ -4,15 +4,15 @@
                   [-sil {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
                   [-fos] [-tod TRAINING_OUTPUT_DIRECTORY] [-o OUTPUT] [-t TRANSLATE] [-os OFFSET_SECONDS]
                   [-ml {afr,amh,ara,arg,asm,aze,ben,bos,bul,cat,ces,cmn,cym,dan,deu,ell,eng,epo,est,eus,fas,fin,fra,gla,gle,glg,grc,grn,guj,heb,hin,hrv,hun,hye,ina,ind,isl,ita,jbo,jpn,kal,kan,kat,kir,kor,kur,lat,lav,lfn,lit,mal,mar,mkd,mlt,msa,mya,nah,nep,nld,nor,ori,orm,pan,pap,pol,por,ron,rus,sin,slk,slv,spa,sqi,srp,swa,swe,tam,tat,tel,tha,tsn,tur,ukr,urd,vie,yue,zho}]
-                  [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}] [-tr {helsinki-nlp,whisper,facebook-mbart}]
-                  [-tf TRANSLATION_FLAVOUR] [-lgs] [-d] [-q] [-ver]
+                  [-mr {whisper}] [-mf {tiny,tiny.en,small,medium,medium.en,base,base.en,large-v1,large-v2,large-v3,large}] [-tr {helsinki-nlp,whisper,facebook-mbart}] [-tf TRANSLATION_FLAVOUR]
+                  [-mpt MEDIA_PROCESS_TIMEOUT] [-sat SEGMENT_ALIGNMENT_TIMEOUT] [-lgs] [-d] [-q] [-ver]
 
-Subaligner command line interface
+Subaligner command line interface (v0.3.7)
 
-optional arguments:
+options:
   -h, --help            show this help message and exit
   -s SUBTITLE_PATH [SUBTITLE_PATH ...], --subtitle_path SUBTITLE_PATH [SUBTITLE_PATH ...]
-                        File path or URL to the subtitle file (Extensions of supported subtitles: .ttml, .ssa, .stl, .sbv, .dfxp, .srt, .txt, .ytt, .vtt, .sub, .sami, .xml, .scc, .ass, .smi, .tmp) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
+                        File path or URL to the subtitle file (Extensions of supported subtitles: .scc, .tmp, .sami, .stl, .ttml, .dfxp, .srt, .ssa, .ass, .sub, .sbv, .xml, .ytt, .smi, .txt, .vtt) or selector for the embedded subtitle (e.g., embedded:page_num=888 or embedded:stream_index=0)
   -l MAX_LOGLOSS, --max_logloss MAX_LOGLOSS
                         Max global log loss for alignment
   -so, --stretch_on     Switch on stretch on subtitles)
@@ -38,6 +38,10 @@
                         LLM recipe used for translating subtitles
   -tf TRANSLATION_FLAVOUR, --translation_flavour TRANSLATION_FLAVOUR
                         Flavour variation for a specific LLM recipe supporting translation
+  -mpt MEDIA_PROCESS_TIMEOUT, --media_process_timeout MEDIA_PROCESS_TIMEOUT
+                        Maximum waiting time in seconds when processing media files
+  -sat SEGMENT_ALIGNMENT_TIMEOUT, --segment_alignment_timeout SEGMENT_ALIGNMENT_TIMEOUT
+                        Maximum waiting time in seconds when aligning each segment
   -lgs, --languages     Print out language codes used for stretch and translation
   -d, --debug           Print out debugging information
   -q, --quiet           Switch off logging information
@@ -191,6 +195,20 @@ def main():
         default=None,
         help="Flavour variation for a specific LLM recipe supporting translation"
     )
+    parser.add_argument(
+        "-mpt",
+        "--media_process_timeout",
+        type=int,
+        default=180,
+        help="Maximum waiting time in seconds when processing media files"
+    )
+    parser.add_argument(
+        "-sat",
+        "--segment_alignment_timeout",
+        type=int,
+        default=60,
+        help="Maximum waiting time in seconds when aligning each segment"
+    )
     parser.add_argument("-lgs", "--languages", action="store_true",
                         help="Print out language codes used for stretch and translation")
     parser.add_argument("-d", "--debug", action="store_true",
@@ -301,7 +319,7 @@ def main():
                         sys.exit(21)
 
                 voice_probabilities = None
-                predictor = Predictor()
+                predictor = Predictor(media_process_timeout=FLAGS.media_process_timeout, segment_alignment_timeout=FLAGS.segment_alignment_timeout)
                 if FLAGS.mode == "single":
                     aligned_subs, audio_file_path, voice_probabilities, frame_rate = predictor.predict_single_pass(
                         video_file_path=local_video_path,

diff --git a/subaligner/_version.py b/subaligner/_version.py
@@ -1,2 +1,2 @@
 """The semver for the current release."""
-__version__ = "0.3.6"
+__version__ = "0.3.7"
diff --git a/subaligner/embedder.py b/subaligner/embedder.py
@@ -11,6 +11,13 @@
 
 class FeatureEmbedder(object):
     """Audio and subtitle feature embedding.
+
+    Keyword Arguments:
+        n_mfcc {int} -- The number of MFCC components (default: {13}).
+        frequency {float} -- The sample rate  (default: {16000}).
+        hop_len {int} -- The number of samples per frame (default: {512}).
+        step_sample {float} -- The space (in seconds) between the beginning of each sample (default: 1s / 25 FPS = 0.04s).
+        len_sample {float} -- The length in seconds for the input samples (default: {0.075}).
     """
 
     def __init__(
@@ -21,16 +28,6 @@ def __init__(
         step_sample: float = 0.04,
         len_sample: float = 0.075,
     ) -> None:
-        """Feature embedder initialiser.
-
-        Keyword Arguments:
-            n_mfcc {int} -- The number of MFCC components (default: {13}).
-            frequency {float} -- The sample rate  (default: {16000}).
-            hop_len {int} -- The number of samples per frame (default: {512}).
-            step_sample {float} -- The space (in seconds) between the beginning of each sample (default: 1s / 25 FPS = 0.04s).
-            len_sample {float} -- The length in seconds for the input samples (default: {0.075}).
-        """
-
         self.__n_mfcc = n_mfcc  # number of MFCC components
         self.__frequency = frequency  # sample rate
         self.__hop_len = hop_len  # number of samples per frame
@@ -50,7 +47,7 @@ def n_mfcc(self) -> int:
         """Get the number of MFCC components.
 
         Returns:
-            int -- The number of MFCC components.
+            int: The number of MFCC components.
         """
 
         return self.__n_mfcc
@@ -60,7 +57,7 @@ def frequency(self) -> int:
         """Get the sample rate.
 
         Returns:
-            int -- The sample rate.
+            int: The sample rate.
         """
 
         return self.__frequency
@@ -70,23 +67,23 @@ def hop_len(self) -> int:
         """Get the number of samples per frame.
 
         Returns:
-            int -- The number of samples per frame.
+            int: The number of samples per frame.
         """
 
         return self.__hop_len
 
     @property
     def step_sample(self) -> float:
-        """The space (in seconds) between the begining of each sample.
+        """The space (in seconds) between the beginning of each sample.
 
         Returns:
-            float -- The space (in seconds) between the begining of each sample.
+            float: The space (in seconds) between the beginning of each sample.
         """
 
         return self.__step_sample
 
     @step_sample.setter
-    def step_sample(self, step_sample: int) -> None:
+    def step_sample(self, step_sample: float) -> None:
         """Configure the step sample
 
         Arguments:
@@ -100,7 +97,7 @@ def len_sample(self) -> float:
         """Get the length in seconds for the input samples.
 
         Returns:
-            float -- The length in seconds for the input samples.
+            float: The length in seconds for the input samples.
         """
 
         return self.__item_time
@@ -113,7 +110,7 @@ def time_to_sec(cls, pysrt_time: SubRipTime) -> float:
             pysrt_time {pysrt.SubRipTime} -- SubRipTime or coercible.
 
         Returns:
-            float -- The number of seconds.
+            float: The number of seconds.
         """
         # There is a weird bug in pysrt triggered by a programatically generated
         # subtitle with start time "00:00:00,000". When it occurs, .millisecond
@@ -133,7 +130,7 @@ def get_len_mfcc(self) -> float:
         """Get the number of samples to get LEN_SAMPLE: LEN_SAMPLE/(HOP_LEN/FREQUENCY).
 
         Returns:
-            float -- The number of samples.
+            float: The number of samples.
         """
 
         return self.__len_sample / (self.__hop_len / self.__frequency)
@@ -142,7 +139,7 @@ def get_step_mfcc(self) -> float:
         """Get the number of samples to get STEP_SAMPLE: STEP_SAMPLE/(HOP_LEN/FREQUENCY).
 
         Returns:
-            float -- The number of samples.
+            float: The number of samples.
         """
 
         return self.__step_sample / (self.__hop_len / self.__frequency)
@@ -154,7 +151,7 @@ def time_to_position(self, pysrt_time: SubRipTime) -> int:
             pysrt_time {pysrt.SubRipTime} -- SubRipTime or coercible.
 
         Returns:
-            int -- The cell position.
+            int: The cell position.
         """
 
         return int(
@@ -170,7 +167,7 @@ def duration_to_position(self, seconds: float) -> int:
             seconds {float} -- The duration in seconds.
 
         Returns:
-            int -- The cell position.
+            int: The cell position.
         """
 
         return int(
@@ -184,7 +181,7 @@ def position_to_duration(self, position: int) -> float:
             position {int} -- The cell position.
 
         Returns:
-            float -- The number of seconds.
+            float: The number of seconds.
         """
 
         return (
@@ -198,7 +195,7 @@ def position_to_time_str(self, position: int) -> str:
             position {int} -- The cell position.
 
         Returns:
-            string -- The time string (e.g., 01:23:20,150).
+            str: The time string (e.g., 01:23:20,150).
         """
 
         td = timedelta(
@@ -247,11 +244,11 @@ def extract_data_and_label_from_audio(
 
         Keyword Arguments:
             subtitles {pysrt.SubRipFile} -- The SubRipFile object (default: {None}).
-            sound_effect_start_marker: {string} -- A string indicating the start of the ignored sound effect (default: {None}).
-            sound_effect_end_marker: {string} -- A string indicating the end of the ignored sound effect (default: {None}).
+            sound_effect_start_marker {string} -- A string indicating the start of the ignored sound effect (default: {None}).
+            sound_effect_end_marker {string} -- A string indicating the end of the ignored sound effect (default: {None}).
 
         Returns:
-            tuple -- The training data and the training lables.
+            tuple: The training data and the training lables.
 
         Raises:
             TerminalException: Thrown when the subtitles are missing.

diff --git a/subaligner/hparam_tuner.py b/subaligner/hparam_tuner.py
@@ -10,7 +10,21 @@
 
 
 class HyperParameterTuner(object):
-    """Hyperparameter tuning using the Tree of Parzen Estimators algorithm"""
+    """Hyperparameter tuning using the Tree of Parzen Estimators algorithm
+
+    Arguments:
+        av_file_paths {list}: A list of paths to the input audio/video files.
+        subtitle_file_paths {list}: A list of paths to the subtitle files.
+        training_dump_dir {string}:  The directory of the training data dump file.
+
+    Keyword Arguments:
+        av_file_paths {List[str]} -- The list of audiovisual file paths.
+        subtitle_file_paths List[str] -- The list of subtitle files.
+        training_dump_dir: {string} -- The directory path of the training dump.
+        num_of_trials {int} -- The number of trials for tuning (default: {5}).
+        tuning_epochs {int} -- The number of training epochs for each trial (default: {5}).
+        network_type {string} -- The type of the network (default: {"lstm"}, range: ["lstm", "bi_lstm", "conv_1d"]).
+    """
 
     SEARCH_SPACE = {
         "learning_rate": hp.loguniform("learning_rate", np.log(0.00001), np.log(0.1)),
@@ -30,19 +44,6 @@ def __init__(self,
                  tuning_epochs: int = 5,
                  network_type: str = Network.LSTM,
                  **kwargs) -> None:
-        """Hyperparameter tuner initialiser
-
-        Arguments:
-            av_file_paths {list} -- A list of paths to the input audio/video files.
-            subtitle_file_paths {list} -- A list of paths to the subtitle files.
-            training_dump_dir {string} --  The directory of the training data dump file.
-
-        Keyword Arguments:
-            num_of_trials {int} -- The number of trials for tuning (default: {5}).
-            tuning_epochs {int} -- The number of training epochs for each trial (default: {5}).
-            network_type {string} -- The type of the network (default: {"lstm"}, range: ["lstm", "bi_lstm", "conv_1d"]).
-        """
-
         assert network_type in Network.TYPES, "Supported network type values: %s" % Network.TYPES
         hyperparameters = Hyperparameters()
         hyperparameters.network_type = network_type

diff --git a/subaligner/hyperparameters.py b/subaligner/hyperparameters.py
@@ -10,8 +10,6 @@ class Hyperparameters(object):
     OPTIMIZERS = ["adadelta", "adagrad", "adam", "adamax", "ftrl", "nadam", "rmsprop", "sgd"]
 
     def __init__(self) -> None:
-        """Hyperparameters initialiser setting default values"""
-
         self.__learning_rate = 0.001
         self.__hidden_size = {
             "front_layers": [64],
@@ -33,8 +31,11 @@ def __init__(self) -> None:
     def __eq__(self, other: Any) -> bool:
         """Comparator for Hyperparameters objects
 
+        Arguments:
+            other {Any} -- Any comparable object
+
         Returns:
-            bool -- If True, the compared hyperparameter object is the same
+            bool: If True, the compared hyperparameter object is the same
         """
 
         if isinstance(other, Hyperparameters):
@@ -195,7 +196,7 @@ def to_json(self) -> str:
         """Serialise hyperparameters into JSON string
 
         Returns:
-            string -- The serialised hyperparameters in JSON
+            str: The serialised hyperparameters in JSON
         """
         return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
 
@@ -212,7 +213,7 @@ def clone(self) -> "Hyperparameters":
         """Make a cloned hyperparameters object
 
         Returns:
-            Hyperparameters -- The cloned Hyperparameters object.
+            Hyperparameters: The cloned Hyperparameters object.
         """
         return self.from_json(self.to_json())
 
@@ -224,7 +225,7 @@ def from_json(cls, json_str: str) -> "Hyperparameters":
             json_str {string} -- Hyperparameters in JSON.
 
         Returns:
-            Hyperparameters -- The deserialised Hyperparameters object.
+            Hyperparameters: The deserialised Hyperparameters object.
         """
         hp = cls()
         hp.__dict__ = json.loads(json_str)
@@ -238,7 +239,7 @@ def from_file(cls, file_path: str) -> "Hyperparameters":
             file_path {string} -- The path to the file containing hyperparameters.
 
         Returns:
-            Hyperparameters -- The deserialised Hyperparameters object.
+            Hyperparameters: The deserialised Hyperparameters object.
         """
         with open(file_path, "r", encoding="utf8") as file:
             return cls.from_json(file.read())