improved comments

bootphon · Jul 24, 2020 · 20ee9f2 · 20ee9f2
1 parent 5be90ee
commit 20ee9f2
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 16 deletions.
diff --git a/phonemizer/backend/espeak.py b/phonemizer/backend/espeak.py
@@ -167,7 +167,6 @@ def __init__(self, language,
                 'lang_switch argument "{}" invalid, must be in {}'
                 .format(language_switch, ", ".join(valid_lang_switch)))
         self._lang_switch = language_switch
-        # self._lang_switch_list = []
 
         self._with_stress = with_stress
 
@@ -201,18 +200,23 @@ def phonemize(self, text, separator=default_separator,
             log_storage = self.logger
             self.logger = None
 
+            # divide the input text in chunks, each chunk being processed in a
+            # separate job
             text_chunks = chunks(text, njobs)
+
+            # offset used below to recover the line numbers in the input text
+            # wrt the chunks
             offset = [0] + cumsum(
-                [c.count('\n') + 1 for c in text_chunks[:-1]])
+                (c.count('\n') + 1 for c in text_chunks[:-1]))
 
-            # we have here a list of phonemized chunks, output is a list of
-            # (text, lang_switches)
+            # we have here a list of (phonemized chunk, lang_switches)
             output = joblib.Parallel(n_jobs=njobs)(
                 joblib.delayed(self._phonemize_aux)(t, separator, strip)
                 for t in text_chunks)
 
-            # flatten them in a single list. For language switches lines we
-            # need to add an offset for each text chunk
+            # flatten both the phonemized chunks and language switches in a
+            # list. For language switches lines we need to add an offset to
+            # have the correct lines numbers wrt the input text.
             text = list(itertools.chain(*(chunk[0] for chunk in output)))
             lang_switches = [chunk[1] for chunk in output]
             for i in range(len(lang_switches)):
@@ -223,11 +227,12 @@ def phonemize(self, text, separator=default_separator,
             # restore the log as it was before parallel processing
             self.logger = log_storage
 
+        # warn the user if language switches occured during phonemization
         self._warn_on_lang_switch(lang_switches)
-        result = self._phonemize_postprocess(
-            text, text_type, punctuation_marks)
 
-        return result
+        # finally restore the punctuation
+        return self._phonemize_postprocess(
+            text, text_type, punctuation_marks)
 
     def _command(self, fname):
         return (

diff --git a/phonemizer/utils.py b/phonemizer/utils.py
@@ -30,24 +30,33 @@ def cumsum(l):
 
 
 def str2list(s):
-    """Returns the string `s` as a list of lines"""
+    """Returns the string `s` as a list of lines, split by \n"""
     return s.strip().split('\n') if isinstance(s, six.string_types) else s
 
 
 def list2str(s):
-    """Returns the list of lines `s` as a single string"""
+    """Returns the list of lines `s` as a single string separated by \n"""
     return '\n'.join(s) if not isinstance(s, six.string_types) else s
 
 
 def chunks(text, n):
-    """Return `n` equally sized chunks of a `text`
+    """Return a maximum of `n` equally sized chunks of a `text`
 
-    `n` must be an integer greater than 0.
+    This method is usefull when phonemizing a single text on multiple jobs.
 
-    Only the n-1 first chunks have equal size. The last chunk can be longer.
-    The input `text` can be a list or a string. Return a list of `n` strings.
+    The exact number of chunks eturned is `m = min(n, len(str2list(text)))`.
+    Only the m-1 first chunks have equal size. The last chunk can be longer.
+    The input `text` can be a list or a string. Return a list of `m` strings.
 
-    This method is usefull when phonemizing a single text on multiple jobs.
+    Parameters
+    ----------
+    text (str or list) : The text to divide in chunks
+
+    n (int) : The number of chunks to build, must be an integer greater than 0.
+
+    Returns
+    -------
+    The chunked text as a list of str.
 
     """
     text = str2list(text)