Small fixes

pymorphy2-fork · Sep 8, 2023 · 7a88d24 · 7a88d24
1 parent 3d012fd
commit 7a88d24
Show file tree

Hide file tree

Showing 10 changed files with 21 additions and 21 deletions.
diff --git a/pymorphy2/analyzer.py b/pymorphy2/analyzer.py
@@ -33,7 +33,7 @@ def inflect(self, required_grammemes):
         res = self._morph._inflect(self, required_grammemes)
         return None if not res else res[0]
 
-    def make_agree_with_number(self, num, animacy = None):
+    def make_agree_with_number(self, num, animacy=None):
         """
         Inflect the word so that it agrees with ``num``
         """
@@ -74,13 +74,13 @@ def apply_to_parses(self, word, word_lower, parses):
             return parses
 
         probs = [self.p_t_given_w.prob(word_lower, tag)
-                for (word, tag, normal_form, score, methods_stack) in parses]
+                 for (word, tag, normal_form, score, methods_stack) in parses]
 
         if sum(probs) == 0:
             # no P(t|w) information is available; return normalized estimate
             k = 1.0 / sum(map(_score_getter, parses))
             return [
-                (word, tag, normal_form, score*k, methods_stack)
+                (word, tag, normal_form, score * k, methods_stack)
                 for (word, tag, normal_form, score, methods_stack) in parses
             ]
 
@@ -94,9 +94,10 @@ def apply_to_parses(self, word, word_lower, parses):
     def apply_to_tags(self, word, word_lower, tags):
         if not tags:
             return tags
-        return sorted(tags,
+        return sorted(
+            tags,
             key=lambda tag: self.p_t_given_w.prob(word_lower, tag),
-            reverse=True
+            reverse=True,
         )
 
 
@@ -375,6 +376,7 @@ def _inflect(self, form, required_grammemes):
                                 if required_grammemes <= f[1].grammemes]
 
         grammemes = form[1].updated_grammemes(required_grammemes)
+
         def similarity(frm):
             tag = frm[1]
             return len(grammemes & tag.grammemes) - 0.1 * len(grammemes ^ tag.grammemes)
@@ -417,8 +419,8 @@ def word_is_known(self, word, strict=False):
 
         """
         return self.dictionary.word_is_known(
-            word = word.lower(),
-            substitutes_compiled = None if strict else self.char_substitutes
+            word=word.lower(),
+            substitutes_compiled=None if strict else self.char_substitutes
         )
 
     @property

diff --git a/pymorphy2/cli.py b/pymorphy2/cli.py
@@ -94,7 +94,7 @@ def main(argv=None):
         logger.debug(args)
 
         if args['mem_usage']:
-            return show_dict_mem_usage(lang, path, args['--verbose'])
+            return show_dict_mem_usage(lang, path)
         elif args['meta']:
             return show_dict_meta(lang, path)
 
@@ -109,7 +109,7 @@ def _open_for_read(fn):
 
 # ============================ Commands ===========================
 
-def show_dict_mem_usage(lang, dict_path=None, verbose=False):
+def show_dict_mem_usage(lang, dict_path=None):
     """
     Show dictionary memory usage.
     """

diff --git a/pymorphy2/opencorpora_dict/compile.py b/pymorphy2/opencorpora_dict/compile.py
@@ -109,7 +109,6 @@ def compile_parsed_dict(parsed_dict, compile_options=None):
             word = paradigm[0][2] + stem + paradigm[0][0]
             logger.debug("%20s %15s %15s %15s", word, len(gramtab), len(words), len(paradigms))
 
-
     logger.debug("%20s %15s %15s %15s", "total:", len(gramtab), len(words), len(paradigms))
     logger.debug("linearizing paradigms")
 
@@ -126,6 +125,7 @@ def get_form(para):
     paradigm_prefix_ids = dict(
         (pref, idx) for idx, pref in enumerate(paradigm_prefixes)
     )
+
     def fix_strings(paradigm):
         """ Replace suffix and prefix with the respective id numbers. """
         para = []
@@ -310,7 +310,7 @@ def _suffixes_prediction_data(words, paradigm_popularity, gramtab, paradigms, su
 
         POS = tuple(tag.replace(' ', ',', 1).split(','))[0]
 
-        for i in range(max(len(form_suffix), 1), max_suffix_length+1): #was: 1,2,3,4,5
+        for i in range(max(len(form_suffix), 1), max_suffix_length + 1):  # was: 1,2,3,4,5
             word_end = word[-i:]
             ending_counts[word_end] += 1
             prefix_endings[form_prefix_id][word_end][POS][(para_id, idx)] += 1
@@ -376,4 +376,3 @@ def _create_out_path(out_path, overwrite=False):
             logger.warning("Output folder already exists!")
             return False
     return True
-
diff --git a/pymorphy2/opencorpora_dict/wrapper.py b/pymorphy2/opencorpora_dict/wrapper.py
@@ -16,7 +16,8 @@ def __init__(self, path):
 
         self._data = load_dict(path)
 
-        logger.info("format: %(format_version)s, revision: %(source_revision)s, updated: %(compiled_at)s", self._data.meta)
+        logger.info("format: %(format_version)s, revision: %(source_revision)s, updated: %(compiled_at)s",
+                    self._data.meta)
 
         # attributes from opencorpora_dict.storage.LoadedDictionary
         self.paradigms = self._data.paradigms

diff --git a/pymorphy2/tagset.py b/pymorphy2/tagset.py
@@ -332,8 +332,6 @@ def __contains__(self, grammeme):
                 raise ValueError("Grammeme is unknown: %s" % grammeme)
             return False
 
-    # FIXME: __repr__ and __str__ always return unicode,
-    # but they should return a byte string under Python 2.x.
     def __str__(self):
         return self._str
 

diff --git a/pymorphy2/tokenizers.py b/pymorphy2/tokenizers.py
@@ -1,8 +1,8 @@
 import re
 
-
 GROUPING_SPACE_REGEX = re.compile(r'([^\w_-]|[+])', re.UNICODE)
 
+
 def simple_word_tokenize(text, _split=GROUPING_SPACE_REGEX.split):
     """
     Split text into tokens. Don't split by a hyphen.

diff --git a/pymorphy2/units/by_hyphen.py b/pymorphy2/units/by_hyphen.py
@@ -338,7 +338,7 @@ def _merge_lexemes(self, left_lexeme, right_lexeme):
             score = (left[3] + right[3]) / 2
             method_stack = ((self, left[4], right[4]), )
 
-            yield (word, tag, normal_form, score, method_stack)
+            yield word, tag, normal_form, score, method_stack
 
     def _align_lexeme_forms(self, left_lexeme, right_lexeme):
         # FIXME: quadratic algorithm

diff --git a/pymorphy2/units/by_lookup.py b/pymorphy2/units/by_lookup.py
@@ -87,7 +87,7 @@ def normalized(self, form):
         tag = self.dict.build_tag_info(para_id, 0)
         new_methods_stack = self._fix_stack(methods_stack, normal_form, para_id, 0)
 
-        return (normal_form, tag, normal_form, 1.0, new_methods_stack)
+        return normal_form, tag, normal_form, 1.0, new_methods_stack
 
     def _extract_para_info(self, methods_stack):
         # This method assumes that DictionaryAnalyzer is the first

diff --git a/pymorphy2/units/utils.py b/pymorphy2/units/utils.py
@@ -25,7 +25,7 @@ def add_tag_if_not_seen(tag, result_list, seen_tags):
 def with_suffix(form, suffix):
     """ Return a new form with ``suffix`` attached """
     word, tag, normal_form, score, methods_stack = form
-    return (word+suffix, tag, normal_form+suffix, score, methods_stack)
+    return word + suffix, tag, normal_form + suffix, score, methods_stack
 
 
 def without_fixed_suffix(form, suffix_length):
@@ -45,7 +45,7 @@ def without_fixed_prefix(form, prefix_length):
 def with_prefix(form, prefix):
     """ Return a new form with ``prefix`` added """
     word, tag, normal_form, score, methods_stack = form
-    return (prefix+word, tag, prefix+normal_form, score, methods_stack)
+    return prefix + word, tag, prefix + normal_form, score, methods_stack
 
 
 def replace_methods_stack(form, new_methods_stack):

diff --git a/tests/test_lexemes.py b/tests/test_lexemes.py
@@ -12,6 +12,7 @@ def parse_lexemes(lexemes_txt):
     )
     return lexemes_txt.split("\n\n")
 
+
 def get_lexeme_words(lexeme):
     lexeme_words = tuple(lexeme.split())
     if lexeme_words[0].startswith('XFAIL'):
@@ -235,4 +236,3 @@ def _lexemes_for_word(word, morph):
     for p in morph.parse(word):
         res.append(tuple(f.word for f in p.lexeme))
     return res
-