-
Notifications
You must be signed in to change notification settings - Fork 0
/
make-book-batch-ingest-folders.py
73 lines (60 loc) · 2.37 KB
/
make-book-batch-ingest-folders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
Edit the sourceFolder variable below to the name of the folder.
This script will make a copy of it and rearange the files to match the
Islandora newspaper batch naming structure.
python3 make-batch-ingest-folders.py
"""
import shutil, os
import glob
import argparse
argparser = argparse.ArgumentParser()
argparser.add_argument("TOPFOLDER")
argparser.add_argument('--nocopy', help="Modifying the folder directly instead of making a copy", action="store_true")
args = argparser.parse_args()
TOPFOLDER = args.TOPFOLDER
sourceFolder = TOPFOLDER.strip().strip('/')
if(args.nocopy):
destFolder = sourceFolder
else:
destFolder = sourceFolder + '-batched'
shutil.copytree(sourceFolder, destFolder)
os.chdir(destFolder)
# MODS template for page level metadata (just ID for linking)
pageModsTemplate = """<?xml version="1.0" encoding="UTF-8"?>
<mods xmlns="http://www.loc.gov/mods/v3" xmlns:mods="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink">
<titleInfo>
<title>{identifier}</title>
</titleInfo>
<identifier type="local">{identifier}</identifier>
</mods>
"""
fileExtentionGuestList = ['TIF','TIFF','tif','tiff','jpg','JPG','jpeg','JPEG']
# Now for every page file make a folder
# and move the page into there and name it OBJ
pageNum = 1
pageFileName_S = glob.glob(r'*')
pageFileName_S.sort()
filteredPageFileName_S = []
def getExtension(fileName):
return pageFileName.split('.')[-1]
for pageFileName in pageFileName_S:
if getExtension(pageFileName) in fileExtentionGuestList:
filteredPageFileName_S.append(pageFileName)
if len(filteredPageFileName_S) < 1:
print("No files in %s ending in %s" % (destFolder, fileExtentionGuestList))
print("Quiting")
exit(1)
for pageFileName in filteredPageFileName_S:
print(pageFileName)
pageFileExtension = getExtension(pageFileName)
pageFolder = str(pageNum).zfill(5)
print('Create folder %s' % pageFolder)
os.makedirs(pageFolder)
print('Move file %s into folder %s' % (pageFileName, pageFolder))
shutil.move(pageFileName, pageFolder + '/' + 'OBJ.' + pageFileExtension)
id = pageFileName.split(".")[0]
modsOutput = pageModsTemplate.format(identifier=id)
print("Generate MODS.xml")
with open(pageFolder + '/MODS.xml', 'w') as modsFile:
modsFile.write(modsOutput)
pageNum = pageNum + 1