Skip to content

Commit

Permalink
Small fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
insolor committed Sep 8, 2023
1 parent 3d012fd commit 7a88d24
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 21 deletions.
16 changes: 9 additions & 7 deletions pymorphy2/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def inflect(self, required_grammemes):
res = self._morph._inflect(self, required_grammemes)
return None if not res else res[0]

def make_agree_with_number(self, num, animacy = None):
def make_agree_with_number(self, num, animacy=None):
"""
Inflect the word so that it agrees with ``num``
"""
Expand Down Expand Up @@ -74,13 +74,13 @@ def apply_to_parses(self, word, word_lower, parses):
return parses

probs = [self.p_t_given_w.prob(word_lower, tag)
for (word, tag, normal_form, score, methods_stack) in parses]
for (word, tag, normal_form, score, methods_stack) in parses]

if sum(probs) == 0:
# no P(t|w) information is available; return normalized estimate
k = 1.0 / sum(map(_score_getter, parses))
return [
(word, tag, normal_form, score*k, methods_stack)
(word, tag, normal_form, score * k, methods_stack)
for (word, tag, normal_form, score, methods_stack) in parses
]

Expand All @@ -94,9 +94,10 @@ def apply_to_parses(self, word, word_lower, parses):
def apply_to_tags(self, word, word_lower, tags):
if not tags:
return tags
return sorted(tags,
return sorted(
tags,
key=lambda tag: self.p_t_given_w.prob(word_lower, tag),
reverse=True
reverse=True,
)


Expand Down Expand Up @@ -375,6 +376,7 @@ def _inflect(self, form, required_grammemes):
if required_grammemes <= f[1].grammemes]

grammemes = form[1].updated_grammemes(required_grammemes)

def similarity(frm):
tag = frm[1]
return len(grammemes & tag.grammemes) - 0.1 * len(grammemes ^ tag.grammemes)
Expand Down Expand Up @@ -417,8 +419,8 @@ def word_is_known(self, word, strict=False):
"""
return self.dictionary.word_is_known(
word = word.lower(),
substitutes_compiled = None if strict else self.char_substitutes
word=word.lower(),
substitutes_compiled=None if strict else self.char_substitutes
)

@property
Expand Down
4 changes: 2 additions & 2 deletions pymorphy2/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def main(argv=None):
logger.debug(args)

if args['mem_usage']:
return show_dict_mem_usage(lang, path, args['--verbose'])
return show_dict_mem_usage(lang, path)
elif args['meta']:
return show_dict_meta(lang, path)

Expand All @@ -109,7 +109,7 @@ def _open_for_read(fn):

# ============================ Commands ===========================

def show_dict_mem_usage(lang, dict_path=None, verbose=False):
def show_dict_mem_usage(lang, dict_path=None):
"""
Show dictionary memory usage.
"""
Expand Down
5 changes: 2 additions & 3 deletions pymorphy2/opencorpora_dict/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def compile_parsed_dict(parsed_dict, compile_options=None):
word = paradigm[0][2] + stem + paradigm[0][0]
logger.debug("%20s %15s %15s %15s", word, len(gramtab), len(words), len(paradigms))


logger.debug("%20s %15s %15s %15s", "total:", len(gramtab), len(words), len(paradigms))
logger.debug("linearizing paradigms")

Expand All @@ -126,6 +125,7 @@ def get_form(para):
paradigm_prefix_ids = dict(
(pref, idx) for idx, pref in enumerate(paradigm_prefixes)
)

def fix_strings(paradigm):
""" Replace suffix and prefix with the respective id numbers. """
para = []
Expand Down Expand Up @@ -310,7 +310,7 @@ def _suffixes_prediction_data(words, paradigm_popularity, gramtab, paradigms, su

POS = tuple(tag.replace(' ', ',', 1).split(','))[0]

for i in range(max(len(form_suffix), 1), max_suffix_length+1): #was: 1,2,3,4,5
for i in range(max(len(form_suffix), 1), max_suffix_length + 1): # was: 1,2,3,4,5
word_end = word[-i:]
ending_counts[word_end] += 1
prefix_endings[form_prefix_id][word_end][POS][(para_id, idx)] += 1
Expand Down Expand Up @@ -376,4 +376,3 @@ def _create_out_path(out_path, overwrite=False):
logger.warning("Output folder already exists!")
return False
return True

3 changes: 2 additions & 1 deletion pymorphy2/opencorpora_dict/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def __init__(self, path):

self._data = load_dict(path)

logger.info("format: %(format_version)s, revision: %(source_revision)s, updated: %(compiled_at)s", self._data.meta)
logger.info("format: %(format_version)s, revision: %(source_revision)s, updated: %(compiled_at)s",
self._data.meta)

# attributes from opencorpora_dict.storage.LoadedDictionary
self.paradigms = self._data.paradigms
Expand Down
2 changes: 0 additions & 2 deletions pymorphy2/tagset.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,6 @@ def __contains__(self, grammeme):
raise ValueError("Grammeme is unknown: %s" % grammeme)
return False

# FIXME: __repr__ and __str__ always return unicode,
# but they should return a byte string under Python 2.x.
def __str__(self):
return self._str

Expand Down
2 changes: 1 addition & 1 deletion pymorphy2/tokenizers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import re


GROUPING_SPACE_REGEX = re.compile(r'([^\w_-]|[+])', re.UNICODE)


def simple_word_tokenize(text, _split=GROUPING_SPACE_REGEX.split):
"""
Split text into tokens. Don't split by a hyphen.
Expand Down
2 changes: 1 addition & 1 deletion pymorphy2/units/by_hyphen.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def _merge_lexemes(self, left_lexeme, right_lexeme):
score = (left[3] + right[3]) / 2
method_stack = ((self, left[4], right[4]), )

yield (word, tag, normal_form, score, method_stack)
yield word, tag, normal_form, score, method_stack

def _align_lexeme_forms(self, left_lexeme, right_lexeme):
# FIXME: quadratic algorithm
Expand Down
2 changes: 1 addition & 1 deletion pymorphy2/units/by_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def normalized(self, form):
tag = self.dict.build_tag_info(para_id, 0)
new_methods_stack = self._fix_stack(methods_stack, normal_form, para_id, 0)

return (normal_form, tag, normal_form, 1.0, new_methods_stack)
return normal_form, tag, normal_form, 1.0, new_methods_stack

def _extract_para_info(self, methods_stack):
# This method assumes that DictionaryAnalyzer is the first
Expand Down
4 changes: 2 additions & 2 deletions pymorphy2/units/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def add_tag_if_not_seen(tag, result_list, seen_tags):
def with_suffix(form, suffix):
""" Return a new form with ``suffix`` attached """
word, tag, normal_form, score, methods_stack = form
return (word+suffix, tag, normal_form+suffix, score, methods_stack)
return word + suffix, tag, normal_form + suffix, score, methods_stack


def without_fixed_suffix(form, suffix_length):
Expand All @@ -45,7 +45,7 @@ def without_fixed_prefix(form, prefix_length):
def with_prefix(form, prefix):
""" Return a new form with ``prefix`` added """
word, tag, normal_form, score, methods_stack = form
return (prefix+word, tag, prefix+normal_form, score, methods_stack)
return prefix + word, tag, prefix + normal_form, score, methods_stack


def replace_methods_stack(form, new_methods_stack):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_lexemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def parse_lexemes(lexemes_txt):
)
return lexemes_txt.split("\n\n")


def get_lexeme_words(lexeme):
lexeme_words = tuple(lexeme.split())
if lexeme_words[0].startswith('XFAIL'):
Expand Down Expand Up @@ -235,4 +236,3 @@ def _lexemes_for_word(word, morph):
for p in morph.parse(word):
res.append(tuple(f.word for f in p.lexeme))
return res

0 comments on commit 7a88d24

Please sign in to comment.