forked from harvard-library/archivesspace-preprocessor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_schematron.xml
418 lines (372 loc) · 20.6 KB
/
test_schematron.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
<?xml version="1.0" encoding="UTF-8"?>
<schema xmlns="http://purl.oclc.org/dsdl/schematron" queryBinding="xslt2">
<ns uri="http://www.w3.org/1999/xlink" prefix="xlink"/>
<ns uri="urn:isbn:1-931666-22-9" prefix="ead"/>
<phase id="automated">
<active pattern="descgrp-automated" />
<active pattern="date-unitdate-automated" />
<active pattern="container-automated" />
<active pattern="dao-automated" />
<active pattern="any-automated" />
<active pattern="c-automated" />
<active pattern="note-automated" />
<active pattern="unitdate-automated" />
<active pattern="unitid-automated" />
<active pattern="unittitle-automated" />
<active pattern="scopecontent-automated" />
<active pattern="daogrp-automated" />
<active pattern="namegrp-automated" />
<active pattern="name-automated" />
<active pattern="identifier-automated" />
<active pattern="bibliography-automated" />
<active pattern="ptrgrp-automated" />
<active pattern="nested-list-automated" />
</phase>
<phase id="manual">
<active pattern="descgrp-manual" />
<active pattern="extent-nonnumeric-manual" />
<active pattern="did-manual" />
<active pattern="table-manual" />
<active pattern="change-manual" />
</phase>
<pattern id="extent-nonnumeric-manual">
<rule context="//*:extent">
<!-- 'extent' elements -->
<assert test="matches(., '^\s*[0123456789.]')" diagnostics="enm-1">'extent' element content should not start with non-numeric character except '.'.</assert>
</rule>
</pattern>
<pattern id="descgrp-automated">
<rule context="//*:descgrp/*:head">
<!-- 'head' elements inside 'descgrp' elements -->
<assert test="not(.)" diagnostics="da-1">'head' element should be dropped from descgrp</assert>
</rule>
<rule context="//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:address|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:blockquote|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:chronlist|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:list|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:p">
<!-- 'descgrp' sub-elements of kinds valid in 'note' element -->
<assert test="not(.)" diagnostics="da-2">'descgrp' is deprecated, and must be removed. 'address', 'blockquote', 'chronlist', 'list', and 'p' children of 'descgrp' must be reparented into a new 'note' element in the 'descgrp's parent element</assert>
</rule>
<rule context="//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:accessrestrict|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:accruals|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:acqinfo|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:altformavail|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:appraisal|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:custodhist|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:note|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:prefercite|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:processinfo|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:separatedmaterial|
//*:descgrp[(@type and @type != 'add') or (not(normalize-space(@type)) and @encodinganalog = '544')]/*:userestrict">
<!-- 'descgrp' sub-elements of kinds valid outside of 'note' -->
<assert test="not(.)" diagnostics="da-3">'descgrp' is deprecated, and must be removed. Element children of the following types must be reparented into 'descgrp's parent element:
'accessrestrict', 'accruals', 'acqinfo', 'altformavail', 'appraisal', 'custodhist', 'note', 'prefercite', 'processinfo', 'separatedmaterial', 'userestrict'</assert>
</rule>
</pattern>
<pattern id="descgrp-manual">
<rule context="//*:descgrp[@type]">
<!-- 'descgrp' elements with type 'add' -->
<assert test="not(@type='add')" diagnostics="dm-1">'descgrp' is deprecated, and must be removed. 'descgrp' element with type 'add' requires manual review and intervention.</assert>
</rule>
</pattern>
<pattern id="did-manual">
<rule context="/*:ead/*:archdesc/*:did">
<!-- 'did' element (collection-level) -->
<assert test=".[*:unitid]" diagnostics="didm-1">
Collection level 'did' element must contain a 'unitid' element.
</assert>
<assert test=".[*:unittitle]" diagnostics="didm-2">
Collection level 'did' element must contain a 'unittitle' element.
</assert>
<assert test=".[*:unitdate or *:unittitle/*:unitdate]" diagnostics="didm-3">
Collection level 'did' element must contain a 'unitdate' element.
</assert>
<assert test=".[*:physdesc/*:extent]" diagnostics="didm-5">
Collection level 'did' element must contain 'physdesc' element with 'extent' child.
</assert>
</rule>
<rule context="/*:ead/*:archdesc/*[not(local-name(.) = 'did')]//*:did">
<!-- 'did' elements (anywhere below collection-level)-->
<assert test="count(./*:unitdate|./*:unittitle) > 0" diagnostics="didm-4">
'did' elements must contain a either a 'unitdate' element, a 'unittitle' element or both.
</assert>
</rule>
<rule context="/*:ead/*:archdesc/*:did/*:unitid">
<!-- 'unitid' element (collection-level) -->
<assert test="./text()[normalize-space(.)]" diagnostics="didm-1a">
Collection level 'unitid' element must not be empty.
</assert>
</rule>
<rule context="/*:ead/*:archdesc/*:did/*:unittitle">
<!-- 'unittitle' element (collection-level) -->
<assert test="./text()[normalize-space(.)]" diagnostics="didm-2a">
Collection level 'unittitle' element must not be empty.
</assert>
</rule>
<rule context="/*:ead/*:archdesc/*:did/*:unitdate|
/*:ead/*:archdesc/*:did/*:unittitle/*:unitdate">
<!-- 'unitdate' element (collection-level) -->
<assert test="./text()[normalize-space(.)]" diagnostics="didm-3a">
Collection level 'unitdate' element must not be empty.
</assert>
</rule>
<rule context="/*:ead/*:archdesc/*:did/*:physdesc/*:extent">
<!-- 'physdesc/extent' element (collection-level) -->
<assert test="./text()[normalize-space(.)]" diagnostics="didm-5a">
Collection level 'physdesc/extent' element must not be empty.
</assert>
</rule>
</pattern>
<pattern id="table-manual">
<rule context="//*:table">
<!-- 'table' element -->
<assert test="not(.)" diagnostics="tblm-1">
'table' element is deprecated and must be removed.
</assert>
</rule>
</pattern>
<pattern id="date-unitdate-automated">
<rule context="//*:date|//*:unitdate">
<!-- 'date' and 'unitdate' elements -->
<assert test="not(.[@startYear or @endYear])" diagnostics="dua-1">
'date' and 'unitdate' elements must not contain 'startYear' or 'endYear' attributes.
</assert>
<assert test="normalize-space(string-join((./@startYear, ./@endYear, ./@normal, ./text()), ''))" diagnostics="noempty-2">
'date' and 'unitdate' elements must contain a value in either or both of their 'normal' attribute or their text content.
</assert>
<assert test="not(.[@startYear or @endYear or @normal]) or not(.[@startYear gt @endYear] or .[tokenize(@normal, '/')[1] gt tokenize(@normal, '/')[2]])" diagnostics="dua-3">
'date' and 'unitdate' elements must not have ending dates previous to their start dates.
</assert>
</rule>
</pattern>
<pattern id="container-automated">
<rule context="//*:container">
<!-- 'container' element -->
<assert test=".[@type]" diagnostics="ca-1">
'container' element must contain 'type' attribute.
</assert>
<assert test=".[@label]" diagnostics="ca-2">
'container' element must contain 'label' attribute.
</assert>
</rule>
</pattern>
<pattern id="dao-automated">
<rule context="//*:dao">
<!-- 'dao' element -->
<assert test=".[@xlink:title]" diagnostics="dao-1">
'dao' element must contain 'xlink:title' attribute.
</assert>
</rule>
</pattern>
<pattern id="any-automated">
<rule context="//*">
<!-- Any element ('//*') -->
<assert test=".[node() or @*]" diagnostics="noempty-1">
Any element must contain either text, element, or attribute content.
</assert>
</rule>
</pattern>
<pattern id="c-automated">
<rule context="//*:c|//*:c01|//*:c02|//*:c03|//*:c04|//*:c05|//*:c06|//*:c07|//*:c08|//*:c09|//*:c10|//*:c11|//*:c12 ">
<!-- 'c' element -->
<assert test=".[(@level and @level != 'otherlevel') or (@level = 'otherlevel' and normalize-space(@otherlevel))]" diagnostics="c-1">
'c' element must contain a value in its 'level' attribute. If that value is 'otherlevel', the 'c' must also contain an 'otherlevel' attribute.
</assert>
</rule>
</pattern>
<pattern id="note-automated">
<rule context="//*:note">
<!-- 'note' element -->
<assert test="not(./ancestor::*/controlaccess)" diagnostics="note-1">
'controlaccess' elements cannot contain 'note' elements.
</assert>
<assert test="../local-name() != 'archdesc'" diagnostics="note-2">
'archdesc' elements cannot contain 'note' elements. 'note' elements should be converted to 'odd' elements.
</assert>
<assert test="../local-name() != 'c'" diagnostics="note-3">
'c' and 'archdesc' elements cannot contain 'note' elements. 'note' elements should be converted to 'odd' elements.
</assert>
<assert test="local-name(..) != 'did'" diagnostics="note-4">
'did' element cannot contain 'note' elements. 'note' elements should be converted to 'odd' elements, which should be moved out of their parent 'did' elements and inserted as a following sibling of same.
</assert>
</rule>
</pattern>
<pattern id="unitdate-automated">
<rule context="//*:unitdate">
<!-- 'unitdate' element -->
<assert test="not(matches(string-join(./text(), ''), '(circa|ca|c|approximately|approx)(\s|\.)', 'i'))"
diagnostics="dua-2">
'unitdate' containing approximation language must carry 'certainty' attribute with value 'approximate'.
</assert>
</rule>
</pattern>
<pattern id="unitid-automated">
<rule context="//*:did[count(./*:unitid) gt 1]">
<!-- 'unitid' element -->
<assert test="not(.)" diagnostics="unitid-1">
More than one 'unitid' element may not be provided per level of description ('did' element), per ArchivesSpace. One authoritative 'unitid' element should be decided on or created, method of choice/construction is left to local practice to determine.
</assert>
</rule>
<rule context="//*:unitid">
<!-- 'unitid' elements pointed at by 'ref' elements -->
<assert test="not(./@id = //*:ref/@target)" diagnostics="unitid-2">
'ref' elements must not point at 'unitid' elements. 'target' attribute should be rewritten to point at parent 'c'.
</assert>
</rule>
</pattern>
<pattern id="unittitle-automated">
<rule context="//*:did[count(./*:unittitle) gt 1]">
<!-- 'unittitle' element -->
<assert test="not(.)" diagnostics="unittitle-1">
More than one 'unittitle' element may not be provided per level of description ('did' element), per ArchivesSpace. One authoritative 'unittitle' element should be decided on or created, method of choice/construction is left to local practice to determine.
</assert>
</rule>
<rule context="//*:unittitle">
<!-- 'unittitle' elements -->
<assert test="not(./@id = //*:ref/@target)" diagnostics="unittitle-2">
'ref' elements must not point at 'unittitle' elements. 'target' attribute should be rewritten to point at parent 'c'.
</assert>
<assert test="not(.//*:unitdate)" diagnostics="unittitle-3">
'unitdate' elements nested inside 'unittitle' elements will be pulled out by ArchivesSpace, and thus leave gaps in their parent's content. Suggest copying 'unitdate' content directly into parent element, and moving 'unitdate' to following-sibling position relative to its enclosing 'unitdate'.
</assert>
</rule>
</pattern>
<pattern id="scopecontent-automated">
<rule context="//*:scopecontent/*:arrangement">
<!-- 'scopecontent' element -->
<assert test="not(.)" diagnostics="scopecontent-1">
'scopecontent' elements must not contain 'arrangement' elements. Move 'arrangement' element to be a sibling of containing 'scopecontent' element.
</assert>
</rule>
</pattern>
<pattern id="daogrp-automated">
<rule context="//*:daogrp">
<!-- 'daogrp' element -->
<assert test="not(.)" diagnostics="daogrp-1">
'daogrp' element is not well supported in ArchivesSpace. Remove 'daogrp', and rewrite contained 'daoloc' elements to 'dao's, taking xlink attributes from 'arc' elements.
</assert>
</rule>
<rule context="//*:daogrp/*:arc[@xlink:show = 'new']">
<!-- 'arc' element -->
<assert test=".[@xlink:actuate = 'onRequest']" diagnostics = "daogrp-2">
'arc' elements with the 'xlink:show' attribute of 'new' should have the 'xlink:actuate' attribute 'onRequest'.
</assert>
</rule>
</pattern>
<pattern id="namegrp-automated">
<rule context="//*:namegrp">
<!-- 'namegrp' element -->
<assert test="not(.)" diagnostics="namegrp-1">
'namegrp' element is not supported in ArchivesSpace. Serialize contents of 'namegrp' subelements in content of first subelement, and replace 'namegrp' with first subelement.
</assert>
</rule>
</pattern>
<pattern id="identifier-automated">
<rule context="/*:ead">
<!-- 'ead' element -->
<assert test="not(.[.//*:eadid/@identifier]) or
(./*:archdesc//*:processinfo/*:p/text() = .//*:eadid/@identifier)"
diagnostics="identifier-1">
The 'identifier' attribute on 'eadid' elements is not preserved by ArchivesSpace. Place contents in a 'p' element within a 'processinfo' element in 'archdesc'.
</assert>
</rule>
</pattern>
<pattern id="bibliography-automated">
<rule context="//*:bibliography/*:list">
<!-- 'bibliography' element -->
<assert test="not(.)" diagnostics="bibliography-1">
'bibliography' element with 'list' element as content imports incorrectly. Change 'item' elements to 'bibref' elements, move into parent 'bibliography', drop 'list'.
</assert>
</rule>
</pattern>
<pattern id="change-manual">
<rule context="//*:revisiondesc/*:change">
<!-- 'change' element in 'revisiondesc' -->
<assert test=".[*:date and *:item]" diagnostics="change-1">
'revisiondesc/change' element must contain both a date and an item subelement.
</assert>
</rule>
</pattern>
<pattern id="name-automated">
<rule context="//*:name">
<!-- 'name' element -->
<assert test=".[not(@encodinganalog = ('110', '111', '130', '240', '245', '610', '611', '630', '650', '651', '654', '700', '710'))]" diagnostics="name-1">
'name' elements with 'encodinganalog' attribute values that correspond to more specific EAD elements should be represented by those elements instead of 'name'.
</assert>
</rule>
</pattern>
<pattern id="ptrgrp-automated">
<rule context="//*:ptrgrp">
<!-- 'ptrgrp' element -->
<assert test="not(.)" diagnostics="ptrgrp-1">
'ptrgrp' elements with nested 'ref' elements are not properly imported by ArchivesSpace. The 'ptrgrp' should be replaced with a 'ref' element and the 'ref's to nested 'ptr's.
</assert>
</rule>
</pattern>
<pattern id="nested-list-automated">
<rule context="//*:list">
<!-- 'list' element -->
<assert test="not(//*:item/*:list)" diagnostics="nested-list-1">
'item' elements with 'list' element children are not properly imported by ArchivesSpace. 'list' elements should be flattened so that no nesting occurs.
</assert>
<assert test="not(//*:item/*[not(./self::list)]//list)" diagnostics="nested-list-2">
'item' elements with non-'list' children which have 'list' children are not properly imported by ArchivesSpace. 'list' elements should be flattened so that no nesting occurs.
</assert>
</rule>
</pattern>
<diagnostics>
<diagnostic id="enm-1">Ref-number: 18
Content: Value is "<value-of select="." />"</diagnostic>
<diagnostic id="dm-1">Ref-number: 11</diagnostic>
<diagnostic id="da-1">Ref-number: 11</diagnostic>
<diagnostic id="da-2">Ref-number: 11
Content: '<value-of select="local-name(.)" />' element can be moved out of 'descgrp' element into a new 'note' element in surrounding '<value-of select="local-name(./../..)" />'</diagnostic>
<diagnostic id="da-3">Ref-number: 11
Content: '<value-of select="local-name(.)" />' element can be moved out of 'descgrp element into surrounding '<value-of select="local-name(./../..)" />'</diagnostic>
<diagnostic id="didm-1">Ref-number: 12</diagnostic>
<diagnostic id="didm-1a">Ref-number: 12</diagnostic>
<diagnostic id="didm-2">Ref-number: 13</diagnostic>
<diagnostic id="didm-2a">Ref-number: 13</diagnostic>
<diagnostic id="didm-3">Ref-number: 14</diagnostic>
<diagnostic id="didm-3a">Ref-number: 14</diagnostic>
<diagnostic id="didm-5">Ref-number: 16</diagnostic>
<diagnostic id="didm-5a">Ref-number: 16</diagnostic>
<diagnostic id="didm-4">Ref-number: 15</diagnostic>
<diagnostic id="tblm-1">Ref-number: 25</diagnostic>
<diagnostic id="dua-1">Ref-number: 38
Content: Content from '<value-of select="local-name(.)" />' element's 'startYear' (<value-of select="./@startYear" />) and 'endYear' (<value-of select="./@endYear" />) attributes should be combined into a single 'normal' attribute separated by '/' (<value-of select="./@startYear" />/<value-of select="./@endYear" />)
</diagnostic>
<diagnostic id="ca-1">Ref-number: 6</diagnostic>
<diagnostic id="dao-1">Ref-number: 9</diagnostic>
<diagnostic id="noempty-1">Ref-number: 46
Content: '<value-of select="local-name(.)" />' element should not be empty
</diagnostic>
<diagnostic id="c-1">Ref-number: 44</diagnostic>
<diagnostic id="note-1">Ref-number: 23.1</diagnostic>
<diagnostic id="note-2">Ref-number: 23.2</diagnostic>
<diagnostic id="note-3">Ref-number: 23.3</diagnostic>
<diagnostic id="note-4">Ref-number: 23.4</diagnostic>
<diagnostic id="noempty-2">Ref-number: 46</diagnostic>
<diagnostic id="dua-2">Ref-number: 47</diagnostic>
<diagnostic id="dua-3">Ref-number: 57</diagnostic>
<diagnostic id="unitid-1">Ref-number: 43</diagnostic>
<diagnostic id="unitid-2">Ref-number: 55</diagnostic>
<diagnostic id="unittitle-1">Ref-number: 28</diagnostic>
<diagnostic id="unittitle-2">Ref-number: 28</diagnostic>
<diagnostic id="scopecontent-1">Ref-number: 2</diagnostic>
<diagnostic id="daogrp-1">Ref-number: 10</diagnostic>
<diagnostic id="namegrp-1">Ref-number: 22</diagnostic>
<diagnostic id="identifier-1">Ref-number: 59</diagnostic>
<diagnostic id="ca-2">Ref-number: 6.2</diagnostic>
<diagnostic id="bibliography-1">Ref-number: 20.2</diagnostic>
<diagnostic id="change-1">Ref-number: 9001</diagnostic>
<diagnostic id="name-1">Ref-number: 55
Content: encodinganalog value is '<value-of select="./@encodinganalog" />', local practice at Harvard is to represent values according to the following mappings: 110,111,710=corpname, 130,240,245=title, 610,611,630,650,654=subject, 651=geogname, 700=persname</diagnostic>
<diagnostic id="unittitle-3">Ref-number: 40</diagnostic>
<diagnostic id="ptrgrp-1">Ref-number: 53</diagnostic>
<diagnostic id="daogrp-2">Ref-number: 63</diagnostic>
<diagnostic id="nested-list-1">Ref-number: 20.1</diagnostic>
<diagnostic id="nested-list-2">Ref-number: 20.2</diagnostic>
</diagnostics>
</schema>