From f7f55ba65a0c1b6ea23ec9398a3f82a1c8220527 Mon Sep 17 00:00:00 2001 From: baxtree Date: Fri, 10 May 2024 18:23:51 +0100 Subject: [PATCH] add docs linting --- .github/workflows/ci-pipeline.yml | 3 ++ requirements-dev.txt | 3 +- requirements.txt | 4 +-- subaligner/embedder.py | 3 ++ subaligner/hyperparameters.py | 6 +++- subaligner/media_helper.py | 17 +++++++++++ subaligner/network.py | 17 +++++++++-- subaligner/predictor.py | 47 ++++++++++++++++++++++--------- subaligner/subtitle.py | 5 +++- subaligner/trainer.py | 9 ++++++ subaligner/transcriber.py | 11 ++++++-- subaligner/translator.py | 8 ++++-- tox.ini | 3 ++ 13 files changed, 111 insertions(+), 25 deletions(-) diff --git a/.github/workflows/ci-pipeline.yml b/.github/workflows/ci-pipeline.yml index 22c081f..aeb4064 100644 --- a/.github/workflows/ci-pipeline.yml +++ b/.github/workflows/ci-pipeline.yml @@ -41,6 +41,9 @@ jobs: - name: Linting run: | pycodestyle subaligner tests examples misc bin/subaligner bin/subaligner_1pass bin/subaligner_2pass bin/subaligner_batch bin/subaligner_convert bin/subaligner_train bin/subaligner_tune setup.py --ignore=E203,E501,W503 --exclude="subaligner/lib" + - name: Linting docstring + run: | + darglint -v 2 subaligner - name: Unit tests and coverage run: | coverage run -m unittest discover diff --git a/requirements-dev.txt b/requirements-dev.txt index f53a0d4..f1c707c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -14,4 +14,5 @@ types-setuptools==57.4.9 typing-extensions==4.5.0 parameterized==0.8.1 pylint~=2.17.2 -pygments==2.7.4 \ No newline at end of file +pygments==2.7.4 +darglint~=1.8.1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5aea88e..9f7ab49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ Markdown==2.6.11 mccabe==0.6.1 networkx>=2.5.1 numba>=0.50.0 -numpy<1.24.0 +numpy<1.27.0 oauthlib==3.1.0 pbr==4.0.2 pkgconfig~=1.5.5 @@ -49,7 +49,7 @@ pystack-debugger==0.8.0 pytz==2018.4 PyYAML>=4.2b1 rsa==4.7 -scipy<1.11.0 +scipy<1.12.0 scikit-learn<1.2.0 six~=1.15.0 tblib==1.3.2 diff --git a/subaligner/embedder.py b/subaligner/embedder.py index 2b6de42..0f47c10 100644 --- a/subaligner/embedder.py +++ b/subaligner/embedder.py @@ -252,6 +252,9 @@ def extract_data_and_label_from_audio( Returns: tuple -- The training data and the training lables. + + Raises: + TerminalException: Thrown when the subtitles are missing. """ len_mfcc = self.get_len_mfcc() diff --git a/subaligner/hyperparameters.py b/subaligner/hyperparameters.py index 1ab1dc5..94aa818 100644 --- a/subaligner/hyperparameters.py +++ b/subaligner/hyperparameters.py @@ -31,7 +31,11 @@ def __init__(self) -> None: self.__network_type = "lstm" def __eq__(self, other: Any) -> bool: - """Comparator for Hyperparameters objects""" + """Comparator for Hyperparameters objects + + Returns: + bool -- If True, the compared hyperparameter object is the same + """ if isinstance(other, Hyperparameters): return all([ diff --git a/subaligner/media_helper.py b/subaligner/media_helper.py index 97f669a..008ecc5 100644 --- a/subaligner/media_helper.py +++ b/subaligner/media_helper.py @@ -52,11 +52,17 @@ def extract_audio(self, video_file_path, decompress: bool = False, freq: int = 1 Arguments: video_file_path {string} -- The input video file path. + Keyword Arguments: decompress {bool} -- Extract WAV if True otherwise extract AAC (default: {False}). freq {int} -- The audio sample frequency (default: {16000}). + Returns: string -- The file path of the extracted audio. + + Raises: + TerminalException: If audio extraction is interrupted by user hitting the interrupt key or timed out. + Exception: Thrown when any other exceptions occur. """ basename = os.path.basename(video_file_path) @@ -173,6 +179,10 @@ def extract_audio_from_start_to_end(self, audio_file_path: str, start: str, end: Returns: tuple -- The file path to the extracted audio and its duration. + + Raises: + TerminalException: If audio extraction is interrupted by user hitting the interrupt key or timed out. + Exception: Thrown when any other exceptions occur. """ segment_duration = self.get_duration_in_seconds(start, end) basename = os.path.basename(audio_file_path) @@ -311,8 +321,14 @@ def get_frame_rate(self, file_path: str) -> float: Arguments: file_path {string} -- The input audiovisual file path. + Returns: float -- The frame rate + + Raises: + TerminalException: If frame rate extraction is interrupted by user hitting the interrupt key or timed out. + NoFrameRateException: If no frame rate is detected on the input audiovisual file. + Exception: Thrown when any other exceptions occur. """ discarded = "NUL:" if os.name == "nt" else "/dev/null" @@ -378,6 +394,7 @@ def refragment_with_min_duration(self, subs: List[SubRipItem], minimum_segment_d Arguments: subs {list} -- A list of SupRip cues. minimum_segment_duration {float} -- The minimum duration in seconds for each output subtitle cue. + Returns: list -- A list of new SupRip cues after fragmentation. """ diff --git a/subaligner/network.py b/subaligner/network.py index f2f6c02..c2e0c9f 100644 --- a/subaligner/network.py +++ b/subaligner/network.py @@ -56,7 +56,7 @@ def __init__( model_path: Optional[str] = None, backend: str = "tensorflow" ) -> None: - """ Network object initialiser used by factory methods. + """Network object initialiser used by factory methods. Arguments: secret {object} -- A hash only known by factory methods. @@ -65,8 +65,9 @@ def __init__( model_path {string} -- The path to the model file. backend {string} -- The tensor manipulation backend (default: {tensorflow}). Only tensorflow is supported by TF 2 and this parameter is here only for a historical reason. + Raises: - NotImplementedError -- Thrown when any network attributes are modified. + ValueError: Thrown when the network type is not supported. """ assert ( secret == Network.__secret @@ -124,6 +125,9 @@ def get_from_model(cls, model_path: str, hyperparameters: Hyperparameters) -> "N Arguments: model_path {string} -- The path to the model file. hyperparameters {Hyperparameters} -- A configuration for hyperparameters used for training. + + Returns: + Network: The model network. """ hp = hyperparameters.clone() @@ -236,8 +240,12 @@ def fit_and_get_history( logs_dir {string} -- The TensorBoard log file directory. training_log {string} -- The path to the log file of epoch results. resume {bool} -- True to continue with previous training result or False to start a new one (default: {False}). + Returns: tuple -- A tuple contains validation losses and validation accuracies. + + Raises: + TerminalException: If the predication is interrupted by user hitting the interrupt key """ csv_logger = ( @@ -324,8 +332,12 @@ def fit_with_generator( logs_dir {string} -- The TensorBoard log file directory. training_log {string} -- The path to the log file of epoch results. resume {bool} -- True to continue with previous training result or False to start a new one (default: {False}). + Returns: tuple -- A tuple contains validation losses and validation accuracies. + + Raises: + TerminalException: If the training is interrupted by user hitting the interrupt key """ initial_epoch = 0 @@ -455,6 +467,7 @@ def simple_fit_with_generator( train_data_raw {list} -- The HDF5 raw training data. labels_raw {list} -- The HDF5 raw training labels. hyperparameters {Hyperparameters} -- A configuration for hyperparameters used for training. + Returns: tuple -- A tuple contains validation losses and validation accuracies. """ diff --git a/subaligner/predictor.py b/subaligner/predictor.py index bede1d2..6da2aac 100644 --- a/subaligner/predictor.py +++ b/subaligner/predictor.py @@ -101,15 +101,15 @@ def predict_dual_pass( """Predict time to shift with single pass Arguments: - video_file_path {string} -- The input video file path. - subtitle_file_path {string} -- The path to the subtitle file. - weights_dir {string} -- The the model weights directory. - stretch {bool} -- True to stretch the subtitle segments (default: {False}) - stretch_in_lang {str} -- The language used for stretching subtitles (default: {"eng"}). - exit_segfail {bool} -- True to exit on any segment alignment failures (default: {False}) + video_file_path {string} -- The input video file path. + subtitle_file_path {string} -- The path to the subtitle file. + weights_dir {string} -- The the model weights directory. + stretch {bool} -- True to stretch the subtitle segments (default: {False}) + stretch_in_lang {str} -- The language used for stretching subtitles (default: {"eng"}). + exit_segfail {bool} -- True to exit on any segment alignment failures (default: {False}) Returns: - tuple -- The shifted subtitles, the globally shifted subtitles and the voice probabilities of the original audio. + tuple -- The shifted subtitles, the globally shifted subtitles and the voice probabilities of the original audio. """ weights_file_path = self.__get_weights_path(weights_dir) @@ -143,12 +143,15 @@ def predict_plain_text(self, video_file_path: str, subtitle_file_path: str, stre """Predict time to shift with plain texts Arguments: - video_file_path {string} -- The input video file path. - subtitle_file_path {string} -- The path to the subtitle file. - stretch_in_lang {str} -- The language used for stretching subtitles (default: {"eng"}). + video_file_path {string} -- The input video file path. + subtitle_file_path {string} -- The path to the subtitle file. + stretch_in_lang {str} -- The language used for stretching subtitles (default: {"eng"}). Returns: - tuple -- The shifted subtitles, the audio file path (None) and the voice probabilities of the original audio (None). + tuple -- The shifted subtitles, the audio file path (None) and the voice probabilities of the original audio (None). + + Raises: + TerminalException: If the predication is interrupted by user hitting the interrupt key. """ from aeneas.executetask import ExecuteTask from aeneas.task import Task @@ -223,8 +226,11 @@ def get_log_loss(self, voice_probabilities: np.ndarray, subs: List[SubRipItem]) voice_probabilities {list} -- A list of probabilities of audio chunks being speech. subs {list} -- A list of subtitle segments. - Returns: - float -- The loss value. + Returns: + float -- The loss value. + + Raises: + TerminalException: If the subtitle mask is empty. """ subtitle_mask = Predictor.__get_subtitle_mask(self, subs) @@ -258,8 +264,12 @@ def get_min_log_loss_and_index(self, voice_probabilities: np.ndarray, subs: SubR Arguments: voice_probabilities {list} -- A list of probabilities of audio chunks being speech. subs {list} -- A list of subtitle segments. + Returns: tuple -- The minimum loss value and its position. + + Raises: + TerminalException: If subtitle is empty or suspicious audio/subtitle duration is detected. """ local_subs = deepcopy(subs) @@ -526,6 +536,13 @@ def __predict_2nd_pass(self, audio_file_path: str, subs: List[SubRipItem], weigh stretch {bool} -- True to stretch the subtitle segments. stretch_in_lang {str} -- The language used for stretching subtitles. exit_segfail {bool} -- True to exit on any segment alignment failures. + + Returns: + list -- A list of aligned SubRip files + + Raises: + TerminalException: If the alignment is interrupted by user hitting the interrupt key or times out + Exception: Thrown when any other exceptions occur. """ segment_starts, segment_ends, subs = self.__media_helper.get_audio_segment_starts_and_ends(subs) @@ -726,6 +743,10 @@ def __predict( Returns: tuple -- The shifted subtitles, the audio file path and the voice probabilities of the original audio. + + Raises: + TerminalException: If the prediction failed on invalid input or on other exceptions. + ValueError: Thrown when no subtitle is passed in. """ if network is None: network = self.__initialise_network(os.path.dirname(weights_file_path), self.__LOGGER) diff --git a/subaligner/subtitle.py b/subaligner/subtitle.py index 105641e..a049047 100644 --- a/subaligner/subtitle.py +++ b/subaligner/subtitle.py @@ -48,7 +48,7 @@ def __init__(self, secret: object, subtitle_file_path: str, subtitle_format: str format {string} -- Supported subtitle formats: subrip and ttml. Raises: - NotImplementedError -- Thrown when any subtitle attributes are modified. + UnsupportedFormatException: Thrown when the input subtitle format is not supported or no subtitle content is found. """ assert ( @@ -337,6 +337,9 @@ def shift_subtitle( Returns: string -- The path to the shifted subtitle file. + + Raises: + UnsupportedFormatException: Thrown when the input subtitle format is not supported. """ _, file_extension = os.path.splitext(subtitle_file_path) if shifted_subtitle_file_path is None: diff --git a/subaligner/trainer.py b/subaligner/trainer.py index 815439b..73c9aec 100644 --- a/subaligner/trainer.py +++ b/subaligner/trainer.py @@ -183,6 +183,9 @@ def pre_train( hyperparameters {Hyperparameters} -- A configuration for hyperparameters used for training. sound_effect_start_marker: {string} -- A string indicating the start of the ignored sound effect (default: {"("}). sound_effect_end_marker: {string} -- A string indicating the end of the ignored sound effect (default: {")"}). + + Returns: + tuple -- The valuation loss and accuracy. """ training_dump = os.path.join(os.path.abspath(training_dump_dir), "training_dump.hdf5") @@ -234,6 +237,9 @@ def get_done_epochs(training_log: str) -> int: Arguments: training_log {string} -- The path to the training log file. + + Returns: + int -- The number of finished epochs. """ if not os.path.isfile(training_log): return 0 @@ -260,6 +266,9 @@ def __extract_data_and_label_from_avs( Returns: tuple -- The training data and labels. + + Raises: + TerminalException: If the extraction is interrupted by user hitting the interrupt key. """ train_data, labels = ( diff --git a/subaligner/transcriber.py b/subaligner/transcriber.py index 658e823..8a7948b 100644 --- a/subaligner/transcriber.py +++ b/subaligner/transcriber.py @@ -23,8 +23,9 @@ def __init__(self, recipe: str = TranscriptionRecipe.WHISPER.value, flavour: str Arguments: recipe {string} -- the LLM recipe used for transcribing video files (default: "whisper"). flavour {string} -- the flavour variation for a specific LLM recipe (default: "small"). + Raises: - NotImplementedError -- Thrown when the LLM recipe is unknown. + NotImplementedError: Thrown when the LLM recipe is unknown. """ if recipe not in [r.value for r in TranscriptionRecipe]: raise NotImplementedError(f"Unknown recipe: {recipe}") @@ -43,9 +44,13 @@ def transcribe(self, video_file_path: str, language_code: str) -> Tuple[Subtitle Arguments: video_file_path {string} -- The input video file path. language_code {string} -- An alpha 3 language code derived from ISO 639-3. + + Returns: + {tuple} -- Generated subtitle after transcription and the detected frame rate + Raises: - TranscriptionException -- Thrown when transcription is failed. - NotImplementedError -- Thrown when the LLM recipe is not supported. + TranscriptionException: Thrown when transcription is failed. + NotImplementedError: Thrown when the LLM recipe is not supported. """ if self.__recipe == "whisper": lang = Utils.get_iso_639_alpha_2(language_code) diff --git a/subaligner/translator.py b/subaligner/translator.py index 89462c4..c3479ac 100644 --- a/subaligner/translator.py +++ b/subaligner/translator.py @@ -62,7 +62,7 @@ def __init__(self, flavour {string} -- the flavour variation for a specific LLM recipe (default: None). Raises: - NotImplementedError -- Thrown when the model of the specified language pair is not found. + NotImplementedError: Thrown when the model of the specified language pair is not found. """ self.__LOGGER = Logger().get_logger(__name__) @@ -89,7 +89,11 @@ def translate(self, language_pair {Tuple[str, str]} -- Used for overriding the default language pair (default: None). Returns: - {list} -- A list of new SubRipItems holding the translation results. + list -- A list of new SubRipItems holding the translation results. + + Raises: + NotImplementedError: Thrown when the input language pair is not supported. + TranslationException: Thrown when the source or the target language is not supported. """ if self.__recipe == TranslationRecipe.HELSINKI_NLP.value: diff --git a/tox.ini b/tox.ini index 9fde141..1ae0ec8 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,9 @@ envlist = skipsdist=True skip_missing_interpreters = True +[darglint] +ignore=DAR101 + [testenv:py36] basepython = python3.6 whitelist_externals = /bin/bash