-
Notifications
You must be signed in to change notification settings - Fork 0
/
id_heuristic.py
33 lines (26 loc) · 1005 Bytes
/
id_heuristic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# === Indonesian specific heuristics BEGIN
nonprefixes_dict_id={}
common_prefixes_id = ["ber", "di", "ke", "me", "mem", "men", "meng", "menge", "meny", "pe", "pem", "pen", "peng", "penge", "peny", "per", "se", "ter",
"antar", "para", "eka", "kau", "ku", "oto", "pasca"]
common_suffixes_id = ["an", "kan", "i", "lah", "kah", "nya",
"pun", "ku", "mu"]
def is_good_root_id(part,word):
return len(part)>3 and is_good_part_generic(part)
def is_good_postfix_id(part):
if len(part)<=3:
return is_good_ending_id(part)
elif len(part)>3:
return False
else:
if not is_good_part_generic(part):
return False
return True
def is_good_ending_id(part):
return part in common_suffixes_id
def is_good_prefix_id(part):
if part in common_prefixes_id:
return True
if len(part)>5:
return False
return is_good_part_generic(part)
# === Indonesian specific heuristics END