Skip to content

Commit

Permalink
Merge pull request #14 from utkdigitalinitiatives/compound_objects
Browse files Browse the repository at this point in the history
Migrate compound objects.
  • Loading branch information
Weston49 authored May 31, 2024
2 parents 7c6409e + 8618f95 commit 17048c8
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "utk-exodus"
version = "0.2.0"
version = "0.2.1"
description = "A tool for building import sheets from UTK legacy systems"
authors = ["Mark Baggett <mbagget1@utk.edu>"]
readme = "README.md"
Expand Down
25 changes: 25 additions & 0 deletions utk_exodus/exodus.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,3 +371,28 @@ def export_errors(
ei = ExistingImport(import_ids, directory, initial_auth=(os.getenv('HYKU_BASIC_AUTH_USER'), os.getenv('HYKU_BASIC_AUTH_PASS')))
ei.sign_in_to_hyku(os.getenv('HYKU_USER'), os.getenv('HYKU_PASS'))
ei.export_errors()

@cli.command(
"add_datastreams",
help="Add datastreams to existing PIDS",
)
@click.option(
"--path",
"-p",
required=True,
help="Path to the Original Files",
)
def add_datastreams(
path: str,
) -> None:
print(f"Adding datastreams {path}.")
for path, directories, files in os.walk(path):
for file in tqdm(files):
pid = file.split('_')[0]
dsid = file.split('_')[1].split('.')[0]
fedora = FedoraObject(
auth=(os.getenv("FEDORA_USERNAME"), os.getenv("FEDORA_PASSWORD")),
fedora_uri=os.getenv("FEDORA_URI"),
pid=pid,
)
fedora.add_datastream(dsid, os.path.join(path, file))
79 changes: 77 additions & 2 deletions utk_exodus/fedora/fedora.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests
import xmltodict
from urllib.parse import quote


class FedoraObject:
Expand Down Expand Up @@ -80,12 +81,86 @@ def write_all_versions(self, dsid, output):
self.getDatastream(dsid, output, version["dsCreateDate"])
return

def add_datastream(self, dsid, file, mimetype="text/plain"):
r = requests.post(
f"{self.fedora_uri}/objects/{self.pid}/datastreams/{dsid}?controlGroup=M&dsLabel={dsid}&versionable=true"
f"&dsState=A&logMessage=Added+{dsid}+datastream+to+{self.pid}.",
auth=self.auth,
headers={"Content-Type": mimetype},
data=open(file, "rb"),
)
return r

def purge_relationship(self, predicate, object, is_literal=True):
body = f"/objects/{self.pid}/relationships?subject=info%3afedora/{self.pid}&predicate={quote(predicate)}&object={quote(object)}&isLiteral={is_literal}"
r = requests.delete(
f"{self.fedora_uri}{body}",
auth=self.auth,
)
return r

def add_relationship(self, predicate, object, is_literal=True):
r = requests.post(
f"{self.fedora_uri}/objects/{self.pid}/relationships/new?subject=info%3afedora/{self.pid}&predicate={quote(predicate)}&object={quote(object)}&isLiteral={is_literal}",
auth=self.auth,
)
return r

def remove_membership_of_page(self, book_pid):
# Remove the isPageOf relationship
self.purge_relationship(
"http://islandora.ca/ontology/relsext#isPageOf", f"info:fedora/{book_pid}", False
)
# Remove isMemberOf relationship
self.purge_relationship(
"info:fedora/fedora-system:def/relations-external#isMemberOf",
f"info:fedora/{book_pid}",
False
)
return

def add_membership_of_page(self, book_pid):
self.add_relationship(
"http://islandora.ca/ontology/relsext#isPageOf", f"info:fedora/{book_pid}", False
)
self.add_relationship(
"info:fedora/fedora-system:def/relations-external#isMemberOf", f"info:fedora/{book_pid}", False
)
return

def remove_sequencing(self, sequence_number):
self.purge_relationship(
"http://islandora.ca/ontology/relsext#isSequenceNumber", sequence_number, True
)
self.purge_relationship(
"http://islandora.ca/ontology/relsext#isPageNumber", sequence_number, True
)
self.purge_relationship(
"http://islandora.ca/ontology/relsext#isSection", sequence_number, True
)
return

def add_sequencing(self, sequence_number):
self.add_relationship(
"http://islandora.ca/ontology/relsext#isSequenceNumber", sequence_number, True
)
self.add_relationship(
"http://islandora.ca/ontology/relsext#isPageNumber", sequence_number, True
)
self.add_relationship(
"http://islandora.ca/ontology/relsext#isSection", sequence_number, True
)
return


if __name__ == "__main__":
import os
x = FedoraObject(
auth=(os.getenv("FEDORA_USERNAME"), os.getenv("FEDORA_PASSWORD")),
fedora_uri=os.getenv("FEDORA_URI"),
pid="roth:10"
pid="beacon:35815"
)
x.getDatastream("OBJ", "tmp/roth2")
x.remove_membership_of_page("beacon:35814")
x.remove_sequencing("10")
x.add_sequencing("12")
x.add_membership_of_page("beacon:35825")
12 changes: 12 additions & 0 deletions utk_exodus/finder/finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,18 @@ def __add_files(self, what_to_add=['filesets', 'attachments']):
new_csv_content.append(self.__add_an_attachment(dsid, row))
if 'filesets' in what_to_add:
new_csv_content.append(self.__add_a_file(dsid, row))
elif row['model'] == "CompoundObject":
for dsid in all_files:
if 'PRESERVE' in all_files and 'OBJ' in all_files:
if 'attachments' in what_to_add:
new_csv_content.append(self.__add_an_attachment(dsid, row, True))
if 'filesets' in what_to_add:
new_csv_content.append(self.__add_a_file(dsid, row, True))
else:
if 'attachments' in what_to_add:
new_csv_content.append(self.__add_an_attachment(dsid, row))
if 'filesets' in what_to_add:
new_csv_content.append(self.__add_a_file(dsid, row))
elif row['model'] == "Page":
dsids_to_remove = ('MODS', 'RELS-INT', 'PDF')
for dsid in dsids_to_remove:
Expand Down
15 changes: 15 additions & 0 deletions utk_exodus/metadata/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,13 +1140,21 @@ def __execute(self, namespaces):
all_file_data.append(output_data)
for item in all_file_data:
pages = self.look_for_pages(item)
parts = self.look_for_compound_parts(item)
for page in pages:
new_page = item.copy()
new_page["source_identifier"] = page["pid"].replace("info:fedora/", "")
new_page["parents"] = item["source_identifier"]
new_page["model"] = "Page"
new_page["sequence"] = page["page"]
all_pages.append(new_page)
for part in parts:
new_part = item.copy()
new_part["source_identifier"] = part["pid"].replace("info:fedora/", "")
new_part["parents"] = item["source_identifier"]
new_part["model"] = "Page"
new_part["sequence"] = part["sequence"]
all_pages.append(new_part)
for page in all_pages:
all_file_data.append(page)
return all_file_data
Expand All @@ -1156,6 +1164,11 @@ def look_for_pages(self, data):
return ResourceIndexSearch().find_pages_in_book(data["source_identifier"])
return []

def look_for_compound_parts(self, data):
if data["model"] == "CompoundObject":
return ResourceIndexSearch().get_compound_object_parts(data["source_identifier"])
return []

def __find_unique_fieldnames(self, data):
for k, v in data.items():
if k not in self.fieldnames:
Expand All @@ -1166,6 +1179,7 @@ def __dereference_islandora_type(self, file):
islandora_types = {
"info:fedora/islandora:sp-audioCModel": "Audio",
"info:fedora/islandora:bookCModel": "Book",
"info:fedora/islandora:compoundCModel": "CompoundObject",
"info:fedora/islandora:binaryObjectCModel": "Generic",
"info:fedora/islandora:sp_large_image_cmodel": "Image",
"info:fedora/islandora:sp_basic_image": "Image",
Expand All @@ -1185,6 +1199,7 @@ def __get_utk_ontology_value(model):
ontology_values = {
"Audio": "https://ontology.lib.utk.edu/works#AudioWork",
"Book": "https://ontology.lib.utk.edu/works#BookWork",
"CompoundObject": "https://ontology.lib.utk.edu/works#CompoundObjectWork",
"Generic": "https://ontology.lib.utk.edu/works#GenericWork",
"Image": "https://ontology.lib.utk.edu/works#ImageWork",
"Pdf": "https://ontology.lib.utk.edu/works#PDFWork",
Expand Down
50 changes: 50 additions & 0 deletions utk_exodus/risearch/risearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,22 @@ def find_pages_in_book(self, book):
page_results = requests.get(f"{self.base_url}&query={query}").content
return self.clean_pages(page_results)

def get_compound_object_parts(self, compound_object):
query = quote(
f"""PREFIX fedora: <info:fedora/fedora-system:def/relations-external#>
PREFIX fedoraModel: <info:fedora/fedora-system:def/model#>
PREFIX islandora: <http://islandora.ca/ontology/relsext#>
SELECT ?pid ?sequence ?model WHERE {{
?pid fedora:isConstituentOf <info:fedora/{compound_object}>;
fedoraModel:hasModel ?model;
islandora:isSequenceNumberOf{compound_object.replace(':', '_')} ?sequence .
FILTER(REGEX(STR(?model), "islandora")) . }}
"""
)
results = requests.get(f"{self.base_url}&query={query}").content
print(self.clean_compound_parts(results))
return self.clean_compound_parts(results)

@staticmethod
def clean_pages(results):
all_pages = []
Expand All @@ -165,6 +181,21 @@ def clean_pages(results):
)
return all_pages

@staticmethod
def clean_compound_parts(results):
all_parts = []
cleaned = results.decode("utf-8").split("\n")
for item in cleaned:
if item != '"pid","sequence","model"' and item != "":
all_parts.append(
{
"pid": item.split(",")[0],
"sequence": item.split(",")[1],
"model": item.split(",")[2],
}
)
return all_parts

@staticmethod
def __lookup_work_type(work_type):
work_types = {
Expand Down Expand Up @@ -309,6 +340,25 @@ def get_works_of_a_type_with_dsid(self, work_type, dsid):
if result != "" and result != '"pid"'
]

def find_pids_and_pages_from_book_local_id(self, local_id):
query = quote(
f"""
SELECT ?pid ?page WHERE {{
?pid <info:fedora/fedora-system:def/relations-external#isMemberOf> ?book ;
<http://islandora.ca/ontology/relsext#isPageNumber> ?page .
?book <http://purl.org/dc/elements/1.1/identifier> ?id .
FILTER(REGEX(?id, "{local_id}"))
}}
"""
)
results = requests.get(f"{self.base_url}&query={query}").content.decode("utf-8")
return [
(result.replace("info:fedora/", "").split(",")[0], result.split(',')[1])
for result in results.split("\n")
if result != "" and result != '"pid","page"'
]


if __name__ == "__main__":
risearch = ResourceIndexSearch()
x = risearch.get_works_of_a_type_with_dsid("book", "MODS")

0 comments on commit 17048c8

Please sign in to comment.