-
Notifications
You must be signed in to change notification settings - Fork 0
/
11_run_metadata_update.py
51 lines (37 loc) · 1.64 KB
/
11_run_metadata_update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
'''
The purpose of this file is to format mappings that can be used for the CaseOLAP scoring.
The mappings are the outputs:
- pmid2pcount_path: {'PMID':{'Entity':'Hits',...},...}
- category2pmids_path: {'Category1': ['PMID_1',...,'PMID_n' ], ...}
Starting with the textcube provided PMIDs in each category, this produces mappings of
PMIDs in each category only if those PMIDs were found to contain entities of interest.
'''
import json
from caseolap._11_metadata_update import *
'''
Parameters
'''
# Input file paths
entitycount_path = 'data/entitycount.txt' # PMID Entity|Count ...
pmid2category_path = 'data/textcube_pmid2category.json'# PMIDs of interest to category
category_names_file = './config/textcube_config.json' # Category names
# Output file paths
outfile_pmid2entity2count = 'data/metadata_pmid2entity2count.json' # {PMID:{Entity:Count,...},...}
cat2pmids_path = 'data/metadata_category2pmids.json' # {CatName:[PMID,...], ...}
logfile_path = './log/metadata_update_log.txt' # Similar to pmid2pcount
'''
Main Code
'''
if __name__ == '__main__':
# Open log file
logfile = open(logfile_path, 'w')
# Get category names
category_names = json.load(open(category_names_file,'r'))
# Initialize class
MU = MetadataUpdate(category_names)
# Rewrite PMID->Entity->Entity Count as a nested dictionary
MU.update_pmid2entity2count(entitycount_path, outfile_pmid2entity2count, logfile)
# Category->PMID (PMIDs in which queried entities were discovered)
MU.map_category2pmid_pmids_with_entities(pmid2category_path, cat2pmids_path, logfile)
# Close log file
logfile.close()