-
Notifications
You must be signed in to change notification settings - Fork 4
/
grab_collection.php
executable file
·112 lines (82 loc) · 2.85 KB
/
grab_collection.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/php -q
<?php
/**
* This script takes a collection object pid as arguements and saves
* all the OBJ datastreams and MODS to a file
* ./grab_collection.php collectionPid:001
* So far it has only been used with islandora images but can
* easily be modified to grab large image collections by adding
* to the extensions array in the getExtension function
*/
require_once './connection.php';
/**
*This section is where I grab the list of pids from a collection using itql query
*/
$pid = $argv[1];
$format = 'select $object from <#ri> where $object <info:fedora/fedora-system:def/relations-external#isMemberOfCollection> <info:fedora/%s>';
$query = sprintf($format, $pid);
//$query = 'select $object from <#ri> where $object <info:fedora/fedora-system:def/relations-external#isMemberOfCollection> <info:fedora/SMDI:30926>';
$objects = $repository->ri->itqlQuery($query, 'unlimited', '0'); // for itql
$dir = $pid . ".files";
mkdir($dir);
echo count($objects);
echo "dir made\n";
foreach ($objects as $k => $v) {
$pid = $v['object']['value'];
$fedora_object = $repository->getObject($pid);
//Get the existing datastream
print "PID: ". $pid ."\n";
$tif_tmp = saveDatastream($fedora_object, 'OBJ');
$xml_tmp = saveDatastream($fedora_object, 'MODS', 'xml');
$obj_info = pathinfo($tif_tmp);
$tif_new = $dir . "/" . $pid . "." . $obj_info['extension'];
$xml_new = $dir . "/" . $pid . ".xml";
copy($tif_tmp, $tif_new) or die ("Unable to copy object\n");
copy($xml_tmp, $xml_new) or die ("Unable to copy metadata\n");
unlink($tif_tmp);
unlink($xml_tmp);
}
function saveDatastream($fedora_object = NULL, $dsid = NULL, $extension = NULL) {
if (!isset($dsid)) {
return;
}
$datastream_array = array();
foreach ($fedora_object as $datastream) {
$datastream_array[] = $datastream->id;
}
if (!in_array($dsid, $datastream_array)) {
print "Could not find the $dsid datastream!";
}
try {
$datastream = $fedora_object->getDatastream($dsid);
$mime_type = $datastream->mimetype;
if (!$extension) {
$extension = getExtension($mime_type);
}
echo $extension ."\n";
$tempfile = temp_filename($extension);
$file_handle = fopen($tempfile, 'w');
fwrite($file_handle, $datastream->content);
fclose($file_handle);
} catch (Exception $e) {
print "Could not save datastream - $e";
}
return $tempfile;
}
function getExtension ($mime_type){
$extensions = array('image/jpeg' => 'jpeg',
'text/xml' => 'xml'
);
// Add as many other Mime Types / File Extensions as you like
return $extensions[$mime_type];
}
function temp_filename($extension = NULL) {
while (true) {
$filename = sys_get_temp_dir() . '/' . uniqid(rand()) . '.' . $extension;
print $filename . "\n";
if (!file_exists($filename))
break;
}
return $filename;
}
?>