-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun.py
116 lines (94 loc) · 2.93 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import argparse
import json
import os
from adsputils import load_config, setup_logging
from adsaffildb import tasks, utils
from adsaffildb.models import AffilCuration as affil_curation
from adsaffildb.models import AffilData as affil_data
from adsaffildb.models import AffilInst as affil_inst
from adsaffildb.models import AffilNorm as affil_norm
proj_home = os.path.realpath(os.path.dirname(__file__))
config = load_config(proj_home=proj_home)
logger = setup_logging(
"run.py",
proj_home=proj_home,
level=config.get("LOGGING_LEVEL", "INFO"),
attach_stdout=config.get("LOG_STDOUT", False),
)
def get_args():
parser = argparse.ArgumentParser("Manage affiliation data for augment_pipeline")
parser.add_argument(
"-lp",
"--load_parentchild",
dest="load_pc",
action="store_true",
default=False,
help="Load parent-child information from file into db",
)
parser.add_argument(
"-lm",
"--load_matched",
dest="load_matched",
action="store_true",
default=False,
help="Load matched affiliation strings from file into db",
)
parser.add_argument(
"-f",
"--filename",
dest="filename",
action="store",
default=None,
help="Filename to load, if different from what is in config",
)
parser.add_argument(
"-n",
"--normalize",
dest="normalize",
action="store_true",
default=None,
help="Normalize affiliations in data table",
)
args = parser.parse_args()
return args
def load_parent_child(filename):
try:
affIdMap = utils.read_affid_dict(filename)
except Exception as err:
logger.error("Failed to read parent_child dictionary: %s" % err)
else:
tasks.task_bulk_insert_data(affil_inst, affIdMap)
return
def load_matched_affils(filename):
try:
affilDataMap = utils.read_match_dict(filename)
except Exception as err:
logger.error("Failed to read parent_child dictionary: %s" % err)
else:
tasks.task_bulk_insert_data(affil_data, affilDataMap)
return
def main():
args = get_args()
if args.load_pc:
if args.filename:
file_parent_child = args.filename
else:
file_parent_child = config.get("PARENT_CHILD_FILE", None)
if not file_parent_child:
logger.error("No parent_child data file name specified.")
else:
load_parent_child(file_parent_child)
if args.load_matched:
if args.filename:
file_matched = args.filename
else:
file_matched = config.get("MATCHED_AFFILS_FILE", None)
if not file_matched:
logger.error("No matched affiliation file name specified.")
else:
load_matched_affils(file_matched)
if args.normalize:
tasks.task_normalize_affils()
return
if __name__ == "__main__":
main()