fixed/simplified espeak-mbrola backend

bootphon · Feb 21, 2020 · 8ed73bf · 8ed73bf
1 parent 177fea6
commit 8ed73bf
Show file tree

Hide file tree

Showing 8 changed files with 107 additions and 123 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,7 +20,8 @@ Version numbers follow [semantic versioning](https://semver.org)
     * On the other hand the ``espeak-mbrola`` backend allows espeak to output
       phones in standard SAMPA (adapted to the mbrola TTS front-end). This
       backend requires mbrola to be installed, as well as additional mbrola
-      voices to support needed languages.
+      voices to support needed languages. **This backend does not support word
+      separation nor punctuation preservation**.
 
 * **bugfixes**
 
@@ -33,6 +34,8 @@ Version numbers follow [semantic versioning](https://semver.org)
 
   * Fixed a test when using ``espeak>=1.50``.
 
+  * Empty lines are correctly ignored when reading text from a file.
+
 
 ## phonemizer-2.1
 

diff --git a/README.md b/README.md
@@ -20,7 +20,8 @@ https://doi.org/10.5281/zenodo.1045825)
     and IPA (International Phonetic Alphabet) output.
 
   * [espeak-mbrola](https://github.com/espeak-ng/espeak-ng/blob/master/docs/mbrola.md)
-    uses the SAMPA phonetic alphabet instead of IPA.
+    uses the SAMPA phonetic alphabet instead of IPA but does not preserve word
+    boundaries.
 
   * [festival](http://www.cstr.ed.ac.uk/projects/festival) currently supports
     only American English. It uses a [custom
@@ -138,12 +139,13 @@ See the installed backends with the `--version` option:
         $ echo "hello world" | phonemize -l en-us -b festival
         hhaxlow werld
 
-* In French, using **espeak** and **espeak-mbrola**
+* In French, using **espeak** and **espeak-mbrola**, with custom token
+  separators (see below). espeak-mbrola does not support words separation.
 
-        $ echo "bonjour le monde" | phonemize -b espeak -l fr-fr
-        bɔ̃ʒuʁ lə- mɔ̃d
-        $ echo "bonjour le monde" | phonemize -b espeak-mbrola -l mb-fr1
-        bo~ZuR l@ mo~d
+        $ echo "bonjour le monde" | phonemize -b espeak -l fr-fr -p ' ' -w '/w '
+        b ɔ̃ ʒ u ʁ /w l ə /w m ɔ̃ d /w
+        $ echo "bonjour le monde" | phonemize -b espeak-mbrola -l mb-fr1 -p ' ' -w '/w '
+        b o~ Z u R l @ m o~ d
 
 * In Japanese, using **segments**
 
@@ -185,8 +187,8 @@ The exhaustive list of supported languages is available with the command
 
 ### Token separators
 
-You can specify separators for phones, syllables (festival only) and
-words.
+You can specify separators for phones, syllables (**festival** only and
+words (excepted **espeak-mbrola**).
 
     $ echo "hello world" | phonemize -b festival -w ' ' -p ''
     hhaxlow werld
@@ -214,7 +216,8 @@ a space for both phones and words):
 ### Punctuation
 
 By default the punctuation is removed in the phonemized output. You can preserve
-it using the ``--preserve-punctuation`` option:
+it using the ``--preserve-punctuation`` option (not supported by the
+**espeak-mbrola** backend:
 
     $ echo "hello, world!" | phonemize --strip
     həloʊ wɜːld
@@ -230,9 +233,8 @@ it using the ``--preserve-punctuation`` option:
         $ echo "hello world" | phonemize -l en-us -b espeak --with-stress
         həlˈoʊ wˈɜːld
 
-* The **espeak** and **espeak-mbrola** backends can switch languages during
-  phonemization (below from French to English), use the ``--language-switch``
-  option to deal with it:
+* The **espeak** backend can switch languages during phonemization (below from
+  French to English), use the ``--language-switch`` option to deal with it:
 
         $ echo "j'aime le football" | phonemize -l fr-fr -b espeak --language-switch keep-flags
         [WARNING] fount 1 utterances containing language switches on lines 1

diff --git a/phonemizer/backend/espeak.py b/phonemizer/backend/espeak.py
@@ -44,38 +44,6 @@ class BaseEspeakBackend(BaseBackend):
 
     espeak_version_re = r'.*: ([0-9]+(\.[0-9]+)+(\-dev)?)'
 
-    def __init__(self, language,
-                 punctuation_marks=Punctuation.default_marks(),
-                 preserve_punctuation=False,
-                 language_switch='keep-flags',
-                 logger=get_logger()):
-        super().__init__(
-            language, punctuation_marks=punctuation_marks,
-            preserve_punctuation=preserve_punctuation, logger=logger)
-        self.logger.debug('espeak is %s', self.espeak_path())
-
-        # adapt some command line option to the espeak version (for
-        # phoneme separation and IPA output)
-        version = self.version()
-
-        self.sep = '--sep=_'
-        if version == '1.48.03' or version.split('.')[1] <= '47':
-            self.sep = ''  # pragma: nocover
-
-        self.ipa = '--ipa=3'
-        if self.is_espeak_ng():  # this is espeak-ng
-            self.ipa = '-x --ipa'
-
-        # ensure the lang_switch argument is valid
-        valid_lang_switch = [
-            'keep-flags', 'remove-flags', 'remove-utterance']
-        if language_switch not in valid_lang_switch:
-            raise RuntimeError(
-                'lang_switch argument "{}" invalid, must be in {}'
-                .format(language_switch, ", ".join(valid_lang_switch)))
-        self._lang_switch = language_switch
-        self._lang_switch_list = []
-
     @staticmethod
     def set_espeak_path(fpath):
         """Sets the espeak executable as `fpath`"""
@@ -261,8 +229,30 @@ def __init__(self, language,
                  logger=get_logger()):
         super().__init__(
             language, punctuation_marks=punctuation_marks,
-            preserve_punctuation=preserve_punctuation,
-            language_switch=language_switch, logger=logger)
+            preserve_punctuation=preserve_punctuation, logger=logger)
+        self.logger.debug('espeak is %s', self.espeak_path())
+
+        # adapt some command line option to the espeak version (for
+        # phoneme separation and IPA output)
+        version = self.version()
+
+        self.sep = '--sep=_'
+        if version == '1.48.03' or version.split('.')[1] <= '47':
+            self.sep = ''  # pragma: nocover
+
+        self.ipa = '--ipa=3'
+        if self.is_espeak_ng():  # this is espeak-ng
+            self.ipa = '-x --ipa'
+
+        # ensure the lang_switch argument is valid
+        valid_lang_switch = [
+            'keep-flags', 'remove-flags', 'remove-utterance']
+        if language_switch not in valid_lang_switch:
+            raise RuntimeError(
+                'lang_switch argument "{}" invalid, must be in {}'
+                .format(language_switch, ", ".join(valid_lang_switch)))
+        self._lang_switch = language_switch
+        self._lang_switch_list = []
 
         self._with_stress = with_stress
 
@@ -327,6 +317,12 @@ class EspeakMbrolaBackend(BaseEspeakBackend):
     # this will be initialized once, at the first call to supported_languages()
     _supported_languages = None
 
+    _lang_switch_list = []
+
+    def __init__(self, language, logger=get_logger()):
+        super().__init__(language, logger=logger)
+        self.logger.debug('espeak is %s', self.espeak_path())
+
     @staticmethod
     def name():
         return 'espeak-mbrola'
@@ -370,54 +366,16 @@ def supported_languages(cls):  # pragma: nocover
         return cls._supported_languages
 
     def _command(self, fname):
-        return (
-            f'{self.espeak_path()} -v {self.language} '
-            f'-q -f {fname} --pho --sep=_')
+        return f'{self.espeak_path()} -v {self.language} -q -f {fname} --pho'
 
     def _postprocess_line(self, line, num, separator, strip):
-        lines = line.split('\n')
-
-        # retrieve the phonemized output but with bad SAMPA alphabet
-        # (with word separation)
-        output_bad_phones = lines[0].strip()
-        if not output_bad_phones:
-            return ''
-
-        # this fix an unexplained bug fount only on travis (on all other tested
-        # platforms and epseak versions, 'oignon' is phonemized as o_n_j_'O~
-        # excepted on travis where it is o_n^_'O~)
-        output_bad_phones = output_bad_phones.replace('^', '_j')
-
         # retrieve the phonemes with the correct SAMPA alphabet (but
         # without word separation)
         phonemes = (
-            line.split('\t')[0] for line in lines[1:] if line.strip())
-        phonemes = [pho for pho in phonemes if pho != '_']
+            l.split('\t')[0] for l in line.split('\n') if l.strip())
+        phonemes = separator.phone.join(pho for pho in phonemes if pho != '_')
 
-        # merge the two outputs in a single one, word separation AND
-        # correct sampa alphabet
-        out_line = ''
-        phonemes_index = 0
-        for word in output_bad_phones.split(' '):
-            for phoneme in word.strip().split('_'):
-                if '(' in phoneme and ')' in phoneme:
-                    # this is a language switch flag
-                    out_line += phoneme + separator.phone
-                else:
-                    out_line += phonemes[phonemes_index] + separator.phone
-                    phonemes_index += 1
-
-            if strip and separator.phone:
-                out_line = out_line[:-len(separator.phone)]
-            out_line += separator.word
-
-        # ensure all the phonemes have been converted
-        if phonemes_index != len(phonemes):
-            raise RuntimeError(
-                f'failed to postprocess line {num}: {output_bad_phones}')
+        if not strip:
+            phonemes += separator.phone
 
-        if strip and separator.word:
-            out_line = out_line[:-len(separator.word)]
-
-        out_line = self._process_lang_switch(num, out_line)
-        return out_line
+        return phonemes
diff --git a/phonemizer/main.py b/phonemizer/main.py
@@ -89,7 +89,8 @@ def parse_args():
   https://github.com/espeak-ng/espeak-ng
 
 - espeak-mbrola uses the SAMPA phonetic alphabet, it requires mbrola to be
-  installed as well as additional mbrola voices. See
+  installed as well as additional mbrola voices. It does not support word or
+  syllable tokenization. See
   https://github.com/espeak-ng/espeak-ng/blob/master/docs/mbrola.md
 
 - festival is also a text-to-speech software. Currently only American
@@ -189,7 +190,8 @@ def parse_args():
     group.add_argument(
         '-w', '--word-separator', metavar='<str>',
         default=separator.default_separator.word,
-        help='word separator, default is "%(default)s".')
+        help='''word separator, not valid for espeak-mbrola backend,
+        default is "%(default)s".''')
 
     group.add_argument(
         '-s', '--syllable-separator', metavar='<str>',
@@ -235,7 +237,9 @@ def parse_args():
         This path can also be specified using the
         $PHONEMIZER_FESTIVAL_PATH environment variable.''')
 
-    group = parser.add_argument_group('punctuation processing')
+    group = parser.add_argument_group(
+        'punctuation processing',
+        description='not available for espeak-mbrola backend')
     group.add_argument(
         '--preserve-punctuation', action='store_true',
         help='''preserve the punctuation marks in the phonemized output,
@@ -302,18 +306,20 @@ def main():
     log.debug('writing to %s', streamout.name)
 
     # configure the separator for phonemes, syllables and words.
-    sep = separator.Separator(
-        phone=args.phone_separator,
-        syllable=args.syllable_separator,
-        word=args.word_separator)
+    if args.backend == 'espeak-mbrola':
+        log.debug('using espeak-mbrola backend: ignoring word separator')
+        sep = separator.Separator(
+            phone=args.phone_separator,
+            syllable=None,
+            word=None)
+    else:
+        sep = separator.Separator(
+            phone=args.phone_separator,
+            syllable=args.syllable_separator,
+            word=args.word_separator)
     log.debug('separator is %s', sep)
 
-    # load the input text (python2 optionnally needs an extra decode)
-    text = streamin.read()
-    try:
-        text = text.decode('utf8')
-    except (AttributeError, UnicodeEncodeError):
-        pass
+    text = [line.strip() for line in streamin]
 
     # phonemize the input text
     out = phonemize.phonemize(
@@ -329,8 +335,8 @@ def main():
         njobs=args.njobs,
         logger=log)
 
-    if len(out):
-        streamout.write(out + '\n')
+    if out:
+        streamout.write('\n'.join(out) + '\n')
 
 
 if __name__ == '__main__':  # pragma: nocover

diff --git a/phonemizer/phonemize.py b/phonemizer/phonemize.py
@@ -62,21 +62,24 @@ def phonemize(
       be 'festival' (US English only is supported, coded 'en-us'),
       'espeak', 'espeak-mbrola' or 'segments'.
 
-    separator (Separator): string separators between phonemes,
-      syllables and words, default to separator.default_separator.
+    separator (Separator): string separators between phonemes, syllables and
+      words, default to separator.default_separator. Syllable separator is
+      considered only for the festival backend. Word separator is ignored by
+      the 'espeak-mbrola' backend.
 
     strip (bool): If True, don't output the last word and phone
       separators of a token, default to False.
 
     preserve_punctuation (bool): When True, will keep the punctuation in the
-        phonemized output. Default to False and remove all the punctuation.
+        phonemized output. Not supportyed by the 'espeak-mbrola' backend.
+        Default to False and remove all the punctuation.
 
     punctuation_marks (str): The punctuation marks to consider when dealing
         with punctuation. Default to Punctuation.default_marks().
 
-    with_stress (bool): This option is only valid for the espeak backend. When
-      True the stresses on phonemes are present (stresses characters are ˈ'ˌ).
-      When False stresses are removed. Default to False.
+    with_stress (bool): This option is only valid for the 'espeak' backend.
+      When True the stresses on phonemes are present (stresses characters are
+      ˈ'ˌ). When False stresses are removed. Default to False.
 
     language_switch (str): Espeak can output some words in another language
       (typically English) when phonemizing a text. This option setups the
@@ -86,7 +89,7 @@ def phonemize(
       flags, for example (en) or (jp), in the output. The 'remove-flags' policy
       removes them and the 'remove-utterance' policy removes the whole line of
       text including a language switch. This option is only valid for the
-      'espeak' and 'espeak-mbrola' backends.
+      'espeak' backend.
 
     njobs (int): The number of parallel jobs to launch. The input text
       is split in `njobs` parts, phonemized on parallel instances of
@@ -130,6 +133,12 @@ def phonemize(
             'the "language_switch" option is available for espeak backend '
             'only, but you are using {} backend'.format(backend))
 
+    # preserve_punctuation and word separator not valid for espeak-mbrola
+    if backend == 'espeak-mbrola' and preserve_punctuation:
+        logger.warning('espeak-mbrola backend cannot preserve punctuation')
+    if backend == 'espeak-mbrola' and separator.word:
+        logger.warning('espeak-mbrola backend cannot preserve word separation')
+
     # python2 needs additional utf8 encoding
     if sys.version_info[0] == 2:  # pragma: nocover
         logger.warning(
@@ -152,9 +161,6 @@ def phonemize(
     elif backend == 'espeak-mbrola':
         phonemizer = backends[backend](
             language,
-            punctuation_marks=punctuation_marks,
-            preserve_punctuation=preserve_punctuation,
-            language_switch=language_switch,
             logger=logger)
     else:  # festival or segments
         phonemizer = backends[backend](

diff --git a/test/test_espeak.py b/test/test_espeak.py
@@ -272,7 +272,7 @@ def test_path_venv():
         ('mont', 'mo~'),
         ('nom', 'no~'),
         ('oignon', 'onjo~'),
-        ('ping', '(en)piN(fr)'),
+        ('ping', 'piN'),
         # liquid glides
         ('long', 'lo~'),
         ('rond', 'Ro~'),
@@ -309,13 +309,13 @@ def test_sampa_fr(text, expected):
 def test_french_sampa():
     text = u'bonjour le monde'
     backend = EspeakMbrolaBackend('mb-fr1')
-    sep = separator.Separator(word=' /w ', phone=' ')
+    sep = separator.Separator(word=None, phone=' ')
 
-    expected = 'b o~ Z u R  /w l @  /w m o~ d  /w '
+    expected = 'b o~ Z u R l @ m o~ d '
     out = backend.phonemize(text, separator=sep, strip=False)
     assert out == expected
 
-    expected = 'b o~ Z u R /w l @ /w m o~ d'
+    expected = 'b o~ Z u R l @ m o~ d'
     out = backend.phonemize(text, separator=sep, strip=True)
     assert out == expected