das.py

#!/usr/bin/env python
"""A module applying semantic, morphological, and phonological criteria to explain the neuter gender assignment"""

import deepl
from langdetect import detect

from hypernyms import taxonomy
from rules import neut_category_dict, neut_classes, neut_prefixes, neut_suffixes

license_key = "#"  # replace with your own DeepL licence key
deepl_translator = deepl.Translator(license_key)


def neut_rule1(hypernyms: list) -> set:
    """returns an intersection of the semantic categories associated with the neuter noun class and the set of hypernyms generated by the input noun across all of its synsets. To simplify the output, the initially generated classes are also expressed in terms of general semantic categories."""
    granular_categories = []
    general_categories = []
    if (
        hypernyms is None
    ):  # handles the None object occasionally generated by taxonomy()
        return set()
    else:
        for category in hypernyms:
            if category in neut_classes:
                granular_categories.append(category)
        for category in granular_categories:
            for key, value in neut_category_dict.items():
                if category in value:
                    general_categories.append(key)
        return set(general_categories), set(granular_categories)


def neut_rule2(token: str) -> list:
    """checks the input noun for the prefixes and suffixes associated with the neuter noun class"""
    suffixes = []  # suffixes may be nested
    prefixes = ""  # prefixes are exclusive
    for suffix in neut_suffixes:
        if token.endswith(suffix):
            suffixes.append("-" + suffix)
    for prefix in neut_prefixes:
        if token.startswith(prefix):
            prefixes = prefix
    if suffixes:
        return [max(suffixes, key=len) + prefixes]
    else:
        return prefixes


def neut_evaluate(lemmatized: str, hypernyms: list, parsed_base: str) -> None:
    """the fucntion takes a noun and a set of hypernyms generated over all noun synsets and determines if any of the hypernyms are affiliated with the neuter noun class.  It then performs simple morphological analysis by checking if the noun contains the prefixes and suffixes associated with the neuter gender."""
    neut_flag = False
    print(f"The noun '{lemmatized}' is neuter.")
    # check the semantic taxonomy
    if hypernyms:  # check if it generated hypernyms
        semantic_general, semantic_granular = neut_rule1(
            hypernyms
        )  # generate the taxonomy
        if semantic_granular:
            print(
                f"It belongs to the following predominantly neuter semantic categories: {', '.join(semantic_granular)}"
            )
            print(
                f"The above classification can be expressed in terms of the following general semantic categories: {', '.join(semantic_general)}"
            )
            neut_flag = True
        else:
            print(
                "Grammatical gender assignment could not be determined based on the semantic category alone."
            )
    elif (
        not hypernyms
    ):  # if the word returns no hypernyms, see if its parsed base returns them
        if parsed_base:  # check if the parser returned a base in the first place
            parsed_translation = deepl_translator.translate_text(
                parsed_base, source_lang="DE", target_lang="EN-US"
            )
            translated_base = parsed_translation.text.casefold()
            base_hypernyms = taxonomy(
                translated_base
            )  # generate all possible hypernyms
            if base_hypernyms:
                base_semantic_general, base_semantic_granular = neut_rule1(
                    base_hypernyms
                )
                if base_semantic_granular:
                    print(f"Couldn't find any semantic categories for '{lemmatized}'.")
                    print(
                        f"The base noun '{parsed_base}' belongs to the following predominantly neuter semantic categories: {', '.join(base_semantic_granular)}"
                    )
                    print(
                        f"The above classification can be expressed in terms of the following general semantic categories: {', '.join(base_semantic_general)}"
                    )
                    neut_flag = True
                else:
                    print(
                        f"Couldn't find any semantic categories for '{lemmatized}'. There don't seem to be any predomiantly neuter semantic categories to which the base noun '{parsed_base}' blelongs."
                    )
            else:
                print(f"Couldn't generate any semantic categories for '{parsed_base}'.")
        else:
            print(f"Couldn't parse '{lemmatized}'.")
    elif hypernyms is None:
        print(f"Couldn't generate a semantic taxonomy for '{lemmatized}'.")

    # check the morphology
    morphological = neut_rule2(lemmatized)
    if parsed_base:
        print(f"'{lemmatized}' has the following neuter base noun: '{parsed_base}'.")
    if morphological:
        print(f"The noun has the following neuter affixes: {', '.join(morphological)}")
        neut_flag = True
    else:
        print(
            "Grammatical gender assignment cannot be determined based on the noun's affixes alone."
        )

    # check if the word is borrowed. Foreign borrowings are predominantly neuter
    if detect(lemmatized) != "de":
        print(
            f"The word '{lemmatized}' could be a borrowing from another langauge.  Imported words tend to be neuter"
        )
        neut_flag = True

    # print this if none of the above applies
    if neut_flag == False:
        print(
            f"The grammatical gender of '{lemmatized}' cannot be explained with the available rules."
        )
        print("For better or worse, it has to be memorized")