-
Notifications
You must be signed in to change notification settings - Fork 0
/
dodo.py
121 lines (100 loc) · 3.48 KB
/
dodo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# encoding=utf8
from doit import get_var
from roald import Roald
import logging
import logging.config
logging.config.fileConfig('logging.cfg', )
logger = logging.getLogger(__name__)
import data_ub_tasks
config = {
'dumps_dir': get_var('dumps_dir', '/opt/data.ub/www/default/dumps'),
'dumps_dir_url': get_var('dumps_dir_url', 'http://data.ub.uio.no/dumps'),
'graph': 'http://data.ub.uio.no/usvd',
'fuseki': 'http://localhost:3030/ds',
'basename': 'usvd',
}
# def task_fetch():
# logger.info('Checking for updated files')
# yield {
# 'doc': 'Fetch remote files that have changed',
# 'basename': 'fetch',
# 'name': None
# }
# yield {
# 'name': 'git pull',
# 'actions': [
# 'git config user.name "ubo-bot"',
# 'git config user.email "danmichaelo+ubobot@gmail.com"',
# 'git pull',
# ]
# }
# for file in [
# {
# 'remote': 'https://mapper.biblionaut.net/export.rdf',
# 'local': 'src/mappings.rdf'
# }
# ]:
# yield {
# 'name': file['local'],
# 'actions': [(data_ub_tasks.fetch_remote, [], {
# 'remote': file['remote'],
# 'etag_cache': '{}.etag'.format(file['local'])
# })],
# 'targets': [file['local']]
# }
def task_build():
def build_dist(task):
from rdflib import URIRef
from rdflib.namespace import SKOS
import re
logger.info('Building new dist')
roald = Roald()
roald.load('src/usvd.xml', format='bibsys', language='nb', exclude_underemne=True)
roald.set_uri_format(
'http://data.ub.uio.no/%s/c{id}' % config['basename'])
roald.save('%s.json' % config['basename'])
logger.info('Wrote %s.json', config['basename'])
includes = [
'%s.scheme.ttl' % config['basename'],
'ubo-onto.ttl'
]
# 1) MARC21
marc21options = {
'vocabulary_code': 'usvd',
'created_by': 'No-TrBIB'
}
roald.export('dist/%s.marc21.xml' %
config['basename'], format='marc21', **marc21options)
logger.info('Wrote dist/%s.marc21.xml', config['basename'])
# 2) RDF (core)
filename = 'dist/%s.ttl' % config['basename']
prepared = roald.prepare_export('rdfskos', include=includes)
g = prepared.prepared_data['graph']
for tr in g.triples((None, SKOS.notation, None)):
ddc = re.sub('[^0-9.-]', '', tr[2])
g.add((tr[0], SKOS.exactMatch, URIRef('http://dewey.info/class/%s/e23/' % ddc)))
prepared.write(filename)
logger.info('Wrote %s', filename)
return {
'doc': 'Build distribution files (RDF/SKOS + MARC21XML) from source files',
'actions': [build_dist],
'file_dep': [
'src/usvd.xml',
'ubo-onto.ttl',
'%s.scheme.ttl' % config['basename']
],
'targets': [
'%s.json' % config['basename'],
'dist/%s.marc21.xml' % config['basename'],
'dist/%s.ttl' % config['basename']
]
}
# def task_git_push():
# return data_ub_tasks.git_push_task_gen(config)
def task_publish_dumps():
return data_ub_tasks.publish_dumps_task_gen(config['dumps_dir'], [
'%s.marc21.xml' % config['basename'],
'%s.ttl' % config['basename']
])
def task_fuseki():
return data_ub_tasks.fuseki_task_gen(config)