-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun.sh
executable file
·154 lines (118 loc) · 3.46 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env bash
source configuration.cnf
SOLR_HOST=${MQAF_SOLR_HOST:-http://localhost:${MQAF_SOLR_PORT:-8983}}
check_core() {
LOCAL_CORE=$1
LOCAL_URL=$(printf "%s/solr/admin/cores?action=STATUS&core=%s" $SOLR_HOST $LOCAL_CORE)
CORE_EXISTS=$(curl -s "$LOCAL_URL" | jq .status | grep "\"$LOCAL_CORE\":" | grep -c -P '{$')
# use echo instead of return
echo $CORE_EXISTS
}
create_core() {
LOCAL_CORE=$1
echo "creating Solr index: ${LOCAL_CORE} at $SOLR_HOST"
curl -s "$SOLR_HOST/solr/admin/cores?action=CREATE&name=$LOCAL_CORE&configSet=_default"
}
CORE=qa_ddb
PROD_EXISTS=$(check_core $CORE)
echo "$CORE exists: $PROD_EXISTS"
if [[ $PROD_EXISTS != 1 ]]; then
echo "Create Solr core '$CORE'"
create_core $CORE
fi
curl $SOLR_HOST/solr/qa_ddb/update -H "Content-type: text/xml" --data-binary '<delete><query>*:*</query></delete>'
curl "$SOLR_HOST/solr/qa_ddb/update?optimize=true" -H 'Content-type: text/xml' --data-binary '<commit/>'
JAR=target/metadata-qa-ddb-1.0-SNAPSHOT-jar-with-dependencies.jar
# index text
# TODO: disable deletion
java -cp $JAR de.gwdg.metadataqa.ddb.App \
--schema schemas/dc-schema.yaml \
--input $INPUT_DIR/DC-DDB-WuerzburgTXT/UB_W-rzburg_Texte.xml \
--output results/texts.csv \
--format csv \
--path solr/qa_ddb \
--index
# analyse text
DIR=${OUTPUT_DIR}/DC-DDB-WuerzburgTXT
if [[ ! -d ${DIR} ]]; then
mkdir ${DIR}
fi
if [[ -f ${DIR}/qa.sqlite ]]; then
rm ${DIR}/qa.sqlite
fi
java -cp $JAR de.gwdg.metadataqa.ddb.App \
--schema schemas/dc-schema.yaml \
--input $INPUT_DIR/DC-DDB-WuerzburgTXT/UB_W-rzburg_Texte.xml \
--output ${DIR}/raw.csv \
--format csv \
--path solr/qa_ddb \
--sqlitePath ${DIR}/qa.sqlite
echo "DC-DDB-WuerzburgTXT/UB_W-rzburg_Texte.xml" > ${DIR}/filename
echo ${DIR}/raw.csv
echo "Rscript scripts/process-texts.R"
Rscript scripts/process.R ${DIR}
sqlite3 ${DIR}/qa.sqlite << EOF
.mode csv
.import ${DIR}/raw.csv issue
EOF
# analyse images
DIR=${OUTPUT_DIR}/DC-DDB-WuerzburgIMG
if [[ ! -d ${DIR} ]]; then
mkdir ${DIR}
fi
if [[ -f ${DIR}/qa.sqlite ]]; then
rm ${DIR}/qa.sqlite
fi
# index text
java -cp $JAR de.gwdg.metadataqa.ddb.App \
--schema schemas/dc-schema.yaml \
--input $INPUT_DIR/DC-DDB-WuerzburgIMG/UB_W-rzburg_Bilder.xml \
--output ${DIR}/raw.csv \
--format csv \
--path solr/qa_ddb \
--index
java -cp $JAR de.gwdg.metadataqa.ddb.App \
--schema schemas/dc-schema.yaml \
--input $INPUT_DIR/DC-DDB-WuerzburgIMG/UB_W-rzburg_Bilder.xml \
--output ${DIR}/raw.csv \
--format csv \
--sqlitePath ${DIR}/qa.sqlite
echo "DC-DDB-WuerzburgIMG/UB_W-rzburg_Bilder.xml" > ${DIR}/filename
echo "Rscript scripts/process-images.R"
Rscript scripts/process.R ${DIR}
sqlite3 ${DIR}/qa.sqlite << EOF
.mode csv
.import ${DIR}/raw.csv issue
EOF
java -cp $JAR de.gwdg.metadataqa.ddb.App \
--schema schemas/marc-schema.yaml \
--directory /home/kiru/temp/source/MARC_BSB/oai_bsb_84 \
--output /home/kiru/temp/raw.csv \
--format csv \
--path solr/qa_ddb \
--sqlitePath /home/kiru/temp/qa.sqlite \
--record-address '//marc:record' \
--recursive
exit
EDM_Bamberg
EDM-OAI: split: //record - metadata/rdf:RDF
DDB-EDM_BSB2-0
EDM
EDM-DDB-Bamberg-0
EDM
EDM-DDB-WuerzburgIMG-0
EDM
EDM-DDB-WuerzburgTXT-0
EDM
LIDO_dmm_digiporta
LIDO: split: //lido:lido
LIDO_IfL
LIDO
LIDO_sddm
LIDO: split: //lido:lido
LIDO_UB-HB_anaill
LIDO: split: //record //lido:lido
MARC_BSB
MARC <mx:record xmlns:mx="info:lc/xmlns/marcxchange-v1"
METSMODS_2021
METSMODS split: //record //mets:mets