-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathorganizations.py
152 lines (132 loc) · 6.19 KB
/
organizations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# ---------------------------------------------
class Organization:
# ---------------------------------------------
# - - - - - - - - - - - - - - - - - - - - - -
def __init__(self, name, shortname=None, city=None, country=None, contact=None, isInstitute=False, isOnlineResource=False):
# - - - - - - - - - - - - - - - - - - - - - -
self.name = name
self.shortname = shortname
self.city = city
self.country = country
self.contact = contact
self.isInstitute = isInstitute
self.isOnlineResource = isOnlineResource
# - - - - - - - - - - - - - - - - - - - - - -
def __str__(self):
# - - - - - - - - - - - - - - - - - - - - - -
return f"Organization({self.name}, {self.shortname}, {self.city}, {self.country}, {self.contact}, {self.isInstitute}, {self.isOnlineResource})"
# ---------------------------------------------
class KnownOrganizations:
# ---------------------------------------------
# - - - - - - - - - - - - - - - - - - - - - -
def __init__(self):
# - - - - - - - - - - - - - - - - - - - - - -
self.korg_dict = dict()
# - - - - - - - - - - - - - - - - - - - - - -
def loadInstitutions(self, file):
# - - - - - - - - - - - - - - - - - - - - - -
f_in = open(file) # we expect institution_list file here
lineNo = 0
while True:
line = f_in.readline()
if line == "": break
lineNo += 1
line = line.strip()
if line == "": continue # we skip empty lines
elems = line.split("; ")
if len(elems) < 3 or len(elems) > 4:
print(f"ERROR, unexpected number of fields, skipping line {lineNo}: {line}")
continue
name = elems[0]
city = elems[1]
if city == "-": city = ""
country = elems[2]
if country == "-": country = ""
shortname = ""
if len(elems)==4: shortname = elems[3][6:]
korg = Organization(name = name, shortname = shortname, city = city, country = country, isInstitute=True)
other_korg = None
if name in self.korg_dict: other_korg = self.korg_dict[name]
if shortname in self.korg_dict: other_korg = self.korg_dict[shortname]
if other_korg is None:
if name is not None and len(name)>0: self.korg_dict[name] = korg
if shortname is not None and len(shortname)>0: self.korg_dict[shortname] = korg
else:
print(f"WARNING: will not insert\n{korg}\nwhile\n{other_korg} already exists")
f_in.close()
# - - - - - - - - - - - - - - - - - - - - - -
def loadInstitutions_old(self, file):
# - - - - - - - - - - - - - - - - - - - - - -
f_in = open(file) # we expect institution_list file here
while True:
line = f_in.readline()
if line == "": break
line = line.strip()
pos = line.find("; Short=")
if pos == -1:
name = line
shortname = None
else:
name = line[0:pos]
shortname = line[pos + 8:]
korg = Organization(name = name, shortname = shortname, isInstitute=True)
other_korg = None
if name in self.korg_dict: other_korg = self.korg_dict[name]
if shortname in self.korg_dict: other_korg = self.korg_dict[shortname]
if other_korg is None:
if name is not None and len(name)>0: self.korg_dict[name] = korg
if shortname is not None and len(shortname)>0: self.korg_dict[shortname] = korg
else:
print(f"WARNING: will ne insert\n{korg}\nwhile\n{other_korg} already exists")
f_in.close()
# - - - - - - - - - - - - - - - - - - - - - -
def loadOnlineResources(self, file):
# - - - - - - - - - - - - - - - - - - - - - -
f_in = open(file) # we expect cellosaurus_xrefs.txt file here
shortname = None
name = None
while True:
line = f_in.readline()
if line == "": break
line = line.strip()
if line.startswith ("Abbrev: "):
shortname = line[8:].strip()
elif line.startswith("Name : "):
name = line[8:].strip()
elif line.startswith("//"):
korg = Organization(name = name, shortname = shortname, isOnlineResource=True)
if name in self.korg_dict and shortname in self.korg_dict:
#print("case 1 - update")
self.korg_dict[name].isOnlineResource = True
else:
other_korg = None
if name in self.korg_dict:
other_korg = self.korg_dict[name]
elif shortname in self.korg_dict:
other_korg = self.korg_dict[shortname]
if other_korg is None:
#print("case 2 - insert")
if name is not None and len(name)>0: self.korg_dict[name] = korg
if shortname is not None and len(shortname)>0: self.korg_dict[shortname] = korg
else:
#print("case 3 - ignore")
print(f"WARNING: will ignore\n{korg}\nsince\n{other_korg}\nalready exists")
shortname = None
name = None
f_in.close()
# - - - - - - - - - - - - - - - - - - - - - -
def get(self, key):
# - - - - - - - - - - - - - - - - - - - - - -
return self.korg_dict.get(key)
# - - - - - - - - - - - - - - - - - - - - - -
def print(self):
# - - - - - - - - - - - - - - - - - - - - - -
for k in self.korg_dict:
print(k, "-->", self.korg_dict[k])
# ===========================================================================================
if __name__ == "__main__":
# ===========================================================================================
known_orgs = KnownOrganizations()
known_orgs.loadInstitutions("data_in/institution_list")
known_orgs.loadOnlineResources("data_in/cellosaurus_xrefs.txt")
known_orgs.print()