diff --git a/wordview/mwes/patterns.py b/wordview/mwes/patterns.py index 2aaa155..d1a88ae 100644 --- a/wordview/mwes/patterns.py +++ b/wordview/mwes/patterns.py @@ -4,56 +4,80 @@ class EnMWEPatterns: patterns: Dict[str, List[str]] = {} - def __init__(self, mwe_types=["LVC", "NC2", "NC3", "ANC2", "ANC3", "VPC"]): - if "LVC" in mwe_types: - self.patterns["LVC"] = [ - "LVC: {
<\\w+>}", + def __init__( + self, + mwe_types=[ + "Light Verb Constructions", + "Noun Noun Compounds", + "Noun Noun Noun Compounds", + "Adjective Noun Compounds", + "Adjective Adjective Noun Compounds", + "Verb Particle Constructions", + ], + ): + if "Light Verb Constructions" in mwe_types: + self.patterns["Light Verb Constructions"] = [ + "Light Verb Constructions: {
<\\w+>}", ] - if "NC2" in mwe_types: - self.patterns["NC2"] = [ - "NC2: {}", + if "Noun Noun Compounds" in mwe_types: + self.patterns["Noun Noun Compounds"] = [ + "Noun Noun Compounds: {}", ] - if "NC3" in mwe_types: - self.patterns["NC3"] = [ - "NC3: {}", + if "Noun Noun Noun Compounds" in mwe_types: + self.patterns["Noun Noun Noun Compounds"] = [ + "Noun Noun Noun Compounds: {}", ] - if "ANC2" in mwe_types: - self.patterns["ANC2"] = [ - "ANC2: {}", + if "Adjective Noun Compounds" in mwe_types: + self.patterns["Adjective Noun Compounds"] = [ + "Adjective Noun Compounds: {}", ] - if "ANC3" in mwe_types: - self.patterns["ANC3"] = ["ANC3: {}"] - if "VPC" in mwe_types: - self.patterns["VPC"] = [ - "VPC: {}", + if "Adjective Adjective Noun Compounds" in mwe_types: + self.patterns["Adjective Adjective Noun Compounds"] = [ + "Adjective Adjective Noun Compounds: {}" + ] + if "Verb Particle Constructions" in mwe_types: + self.patterns["Verb Particle Constructions"] = [ + "Verb Particle Constructions: {}", ] class DeMWEPatterns: patterns: Dict[str, List[str]] = {} - def __init__(self, mwe_types=["LVC", "NC2", "NC3", "ANC2", "ANC3", "VPC"]): - if "LVC" in mwe_types: - self.patterns["LVC"] = [ - "LVC: {
<\\w+>}", + def __init__( + self, + mwe_types=[ + "Light Verb Constructions", + "Noun Noun Compounds", + "Noun Noun Noun Compounds", + "Adjective Noun Compounds", + "Adjective Adjective Noun Compounds", + "Verb Particle Constructions", + ], + ): + if "Light Verb Constructions" in mwe_types: + self.patterns["Light Verb Constructions"] = [ + "Light Verb Constructions: {
<\\w+>}", ] # Define the patterns for 2 and 3-word noun compounds (e.g., "Hausaufgaben", "Fußballplatz") - if "NC2" in mwe_types: - self.patterns["NC2"] = [ - "NC2: {}", + if "Noun Noun Compounds" in mwe_types: + self.patterns["Noun Noun Compounds"] = [ + "Noun Noun Compounds: {}", + ] + if "Noun Noun Noun Compounds" in mwe_types: + self.patterns["Noun Noun Noun Compounds"] = [ + "Noun Noun Noun Compounds: {}", ] - if "NC3" in mwe_types: - self.patterns["NC3"] = [ - "NC3: {}", + if "Adjective Noun Compounds" in mwe_types: + self.patterns["Adjective Noun Compounds"] = [ + "Adjective Noun Compounds: {}", ] - if "ANC2" in mwe_types: - self.patterns["ANC2"] = [ - "ANC2: {}", + if "Adjective Adjective Noun Compounds" in mwe_types: + self.patterns["Adjective Adjective Noun Compounds"] = [ + "Adjective Adjective Noun Compounds: {}" ] - if "ANC3" in mwe_types: - self.patterns["ANC3"] = ["ANC3: {}"] # Define the patterns for verb particle constructions (e.g., "aufstehen", "zurückkommen") - if "VPC" in mwe_types: - self.patterns["VPC"] = [ - "VPC: {}", + if "Verb Particle Constructions" in mwe_types: + self.patterns["Verb Particle Constructions"] = [ + "Verb Particle Constructions: {}", ]