-
Notifications
You must be signed in to change notification settings - Fork 0
/
04_run_index_init.py
53 lines (41 loc) · 1.74 KB
/
04_run_index_init.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
'''
The purpose of this file is to initialize the ElasticSearch index
which is where the indexed PubMed text data will be.
This version allows the option to preserve case-sensitivity in the
indexed text.
'''
import json
from elasticsearch import Elasticsearch
'''
Parameters
'''
# Index parameters
index_name = 'pubmed' # Index name (match with 05 index populate file)
type_name = 'pubmed_meta' # Index type name (match with 05 index populate file)
number_shards = 1 # Set to 1 if no cluster
number_replicas = 0
case_sensitive = True # Index the text as case sensitive (True) or lower case (False)
# Input file
index_init_config_file = './config/index_init_config.json'
'''
Main Code
'''
if __name__ == '__main__':
# Load the indexing config file
index_init_config = json.load(open(index_init_config_file,'r'))
# Start elasticsearch
es = Elasticsearch()
# Delete the old index if it exists
if es.indices.exists(index = index_name):
res = es.indices.delete(index = index_name)
print('Deleted index:',index_name,'\nResponse:',res,'\n')
# Request Body Parameters
mappings = {type_name: {'properties':index_init_config}}
settings = {'number_of_shards': number_shards, 'number_of_replicas': number_replicas}
if case_sensitive == True:
settings['analysis'] = {'analyzer':{'casesensitive_text':{'type':'custom',
'tokenizer':'standard',
'filter': ['stop']}}}
# Create an index
res = es.indices.create(index = index_name, settings = settings, mappings = mappings)
print('Created index:',index_name,'\nResponse:',res)