Skip to content

Commit

Permalink
Ensure resilience for uniqueness of TargetLanguageKeywords, and avoid…
Browse files Browse the repository at this point in the history
…ing issues with UAlbertaALTLab/crk-db#126
  • Loading branch information
fbanados committed Jul 24, 2024
1 parent f96f98f commit 88296f8
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/morphodict/lexicon/management/commands/importjsondict.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,11 @@ def run(self):
text=entry["head"],
raw_analysis=entry["analysis"],
)[0]
self.create_definitions(wf, entry["senses"])

# Because we are inserting new definitions, there is a risk of duplicates.
# To avoid it, remember the old ones so that they are _not_ added.
# Use the InsertBuffer to make sure the set is full
self.create_definitions(wf, entry["senses"], True)

self.flush_insert_buffers()

Expand Down Expand Up @@ -512,7 +516,7 @@ def _add_definition(self, wordform, text, sources: list[str], **kwargs):
)
return d

def create_definitions(self, wordform, senses):
def create_definitions(self, wordform, senses, check_uniqueness = False):
"""Create definition objects for the given wordform and senses."""

# Normally definition.citations.all() would tell you the sources, but to
Expand All @@ -537,6 +541,10 @@ def create_definitions(self, wordform, senses):

keywords.update(stem_keywords(new_definition.semantic_definition))

if check_uniqueness:
keywords -= { kw.text for kw in TargetLanguageKeyword.objects.filter(wordform=wordform)}
keywords -= { kw.text for kw in self.target_language_keyword_buffer._buffer if kw.wordform == wordform}

for kw in keywords:
self.target_language_keyword_buffer.add(
TargetLanguageKeyword(text=kw, wordform=wordform)
Expand Down

0 comments on commit 88296f8

Please sign in to comment.