-
Notifications
You must be signed in to change notification settings - Fork 0
/
08_run_count_synonyms.py
41 lines (32 loc) · 1.27 KB
/
08_run_count_synonyms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from caseolap._08_count_synonyms import *
import sys, json, time, os
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q
from multiprocessing import cpu_count, Process
'''
Parameters
'''
# Input
entity_dict_path = 'input/id2syns.json'
textcube_pmid2category = 'data/textcube_pmid2category.json'
# Intermediary file (produced as output, used as input)
syn_pmid_count = 'data/syn_pmid_count.txt'
# Output
pmid_syn_count_out = 'data/pmid_synonym_counts.json' # PMID Syn|Count...Syn|Cnt
synfound_pmid2cat = 'data/synfound_pmid2category.txt' # PMID--->CategoryNumber
logfile = 'log/synonymcount_log.txt' # #hits:Synonym
# Other parameters
index_name = 'pubmed' # Index name
key = 'abstract' # Choose if searching the abstracts and titles
#key = 'full_text' # Choose if searchines the abstracts, titles, and full text
'''
Main code
'''
if __name__ == '__main__':
# Instantiate the object
CS = CountSynonyms(entity_dict_path, textcube_pmid2category)
# Search for the synonyms in the indexed text
CS.synonym_search(key, logfile, syn_pmid_count, index_name)
# Finalize the output files
CS.finish_synonym_search(logfile, syn_pmid_count,\
pmid_syn_count_out, synfound_pmid2cat)