-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathconfig.py
79 lines (60 loc) · 3.12 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
# possible values: WARN, INFO, DEBUG
LOGGING_LEVEL = 'DEBUG'
# Connection to the database where we save orcid-claims (this database
# serves as a running log of claims and storage of author-related
# information). It is not consumed by others (ie. we 'push' results)
# SQLALCHEMY_URL = 'postgres://docker:docker@localhost:6432/docker'
SQLALCHEMY_URL = 'sqlite:///'
SQLALCHEMY_ECHO = False
# Celery related configuration
# All work we do is concentrated into one exchange (the queues are marked
# by topics, e.g. ads.orcid.claims); The queues will be created automatically
# based on the workers' definition. If 'durable' = True, it means that the
# queue is created as permanent *AND* the worker will publish 'permanent'
# messages. Ie. if rabbitmq goes down/restarted, the uncomsumed messages will
# still be there
#CELERY_DEFAULT_EXCHANGE = 'orcid_pipeline'
#CELERY_DEFAULT_EXCHANGE_TYPE = "topic"
CELERY_INCLUDE = ['ADSOrcid.tasks']
ACKS_LATE=True
PREFETCH_MULTIPLIER=1
CELERYD_TASK_SOFT_TIME_LIMIT = 60
CELERY_BROKER = 'pyamqp://'
# Where to send results (of our processing); since we rely on Celery, we have
# to specify the task id - which is the worker's module on the remote side
# that will be handling the message. This is a limitation of the current setup.
# TODO: find a way to send a queue to the remote queue and let Celery deliver
# it to the appropriate worker without having to specify it's name
OUTPUT_CELERY_BROKER = 'pyamqp://guest:guest@localhost:6672/master_pipeline'
OUTPUT_TASKNAME = 'adsmp.tasks.task_update_record'
#OUTPUT_EXCHANGE = 'master_pipeline'
OUTPUT_QUEUE = 'update-record'
# URLs to get data from our own API, the token must give us
# access to the orcid microservice + access to the info about
# a user (highly privileged access, so make sure you are not
# exposing it!)
API_ENDPOINT = 'https://api.adsabs.harvard.edu'
API_SOLR_QUERY_ENDPOINT = API_ENDPOINT + '/v1/search/query/'
API_ORCID_EXPORT_PROFILE = API_ENDPOINT + '/v1/orcid/get-profile/%s'
API_ORCID_UPDATES_ENDPOINT = API_ENDPOINT + '/v1/orcid/export/%s'
API_ORCID_UPDATE_BIB_STATUS = API_ENDPOINT + '/v1/orcid/update-status/%s'
API_ORCID_UPDATE_PROFILE = API_ENDPOINT + '/v1/orcid/update-orcid-profile/%s'
API_TOKEN = 'fixme'
# The ORCID API public endpoint
API_ORCID_PROFILE_ENDPOINT = 'https://pub.orcid.org/v2.0/%s/record'
# Levenshtein.ration() to compute similarity between two strings; if
# lower than this, we refuse to match names, eg.
# Levenshtein.ratio('Neumann, John', 'Neuman, J')
# > Out[2]: 0.8181818181818182
# Based on testing, minimum Levenshtein ratio has been increased to 0.75
MIN_LEVENSHTEIN_RATIO = 0.75
# order in which the identifiers (inside an orcid profile) will be tested
# to retrieve a canonical bibcode; first match will stop the process. Higher number
# means 'higher priority'
# the '*' will be used for no-match, if this number is <0, the identifier will be skipped
ORCID_IDENTIFIERS_ORDER = {'bibcode': 9, 'doi': 8, 'arxiv': 7, '*': 0}
# token to query Kibana - gives us access to our logs
KIBANA_TOKEN = 'fix_me'
# URL to access SOLR; can be a localhost URL after tunneling
SOLR_URL = 'http://localhost:9984/solr/collection1/query'