diff --git a/pyproject.toml b/pyproject.toml index d840102..8bcbf97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "utk-exodus" -version = "0.2.0" +version = "0.2.1" description = "A tool for building import sheets from UTK legacy systems" authors = ["Mark Baggett "] readme = "README.md" diff --git a/utk_exodus/exodus.py b/utk_exodus/exodus.py index 881dd49..eb1ba09 100644 --- a/utk_exodus/exodus.py +++ b/utk_exodus/exodus.py @@ -371,3 +371,28 @@ def export_errors( ei = ExistingImport(import_ids, directory, initial_auth=(os.getenv('HYKU_BASIC_AUTH_USER'), os.getenv('HYKU_BASIC_AUTH_PASS'))) ei.sign_in_to_hyku(os.getenv('HYKU_USER'), os.getenv('HYKU_PASS')) ei.export_errors() + +@cli.command( + "add_datastreams", + help="Add datastreams to existing PIDS", +) +@click.option( + "--path", + "-p", + required=True, + help="Path to the Original Files", +) +def add_datastreams( + path: str, +) -> None: + print(f"Adding datastreams {path}.") + for path, directories, files in os.walk(path): + for file in tqdm(files): + pid = file.split('_')[0] + dsid = file.split('_')[1].split('.')[0] + fedora = FedoraObject( + auth=(os.getenv("FEDORA_USERNAME"), os.getenv("FEDORA_PASSWORD")), + fedora_uri=os.getenv("FEDORA_URI"), + pid=pid, + ) + fedora.add_datastream(dsid, os.path.join(path, file)) diff --git a/utk_exodus/fedora/fedora.py b/utk_exodus/fedora/fedora.py index b74c46f..6939ae7 100644 --- a/utk_exodus/fedora/fedora.py +++ b/utk_exodus/fedora/fedora.py @@ -1,5 +1,6 @@ import requests import xmltodict +from urllib.parse import quote class FedoraObject: @@ -80,12 +81,86 @@ def write_all_versions(self, dsid, output): self.getDatastream(dsid, output, version["dsCreateDate"]) return + def add_datastream(self, dsid, file, mimetype="text/plain"): + r = requests.post( + f"{self.fedora_uri}/objects/{self.pid}/datastreams/{dsid}?controlGroup=M&dsLabel={dsid}&versionable=true" + f"&dsState=A&logMessage=Added+{dsid}+datastream+to+{self.pid}.", + auth=self.auth, + headers={"Content-Type": mimetype}, + data=open(file, "rb"), + ) + return r + + def purge_relationship(self, predicate, object, is_literal=True): + body = f"/objects/{self.pid}/relationships?subject=info%3afedora/{self.pid}&predicate={quote(predicate)}&object={quote(object)}&isLiteral={is_literal}" + r = requests.delete( + f"{self.fedora_uri}{body}", + auth=self.auth, + ) + return r + + def add_relationship(self, predicate, object, is_literal=True): + r = requests.post( + f"{self.fedora_uri}/objects/{self.pid}/relationships/new?subject=info%3afedora/{self.pid}&predicate={quote(predicate)}&object={quote(object)}&isLiteral={is_literal}", + auth=self.auth, + ) + return r + + def remove_membership_of_page(self, book_pid): + # Remove the isPageOf relationship + self.purge_relationship( + "http://islandora.ca/ontology/relsext#isPageOf", f"info:fedora/{book_pid}", False + ) + # Remove isMemberOf relationship + self.purge_relationship( + "info:fedora/fedora-system:def/relations-external#isMemberOf", + f"info:fedora/{book_pid}", + False + ) + return + + def add_membership_of_page(self, book_pid): + self.add_relationship( + "http://islandora.ca/ontology/relsext#isPageOf", f"info:fedora/{book_pid}", False + ) + self.add_relationship( + "info:fedora/fedora-system:def/relations-external#isMemberOf", f"info:fedora/{book_pid}", False + ) + return + + def remove_sequencing(self, sequence_number): + self.purge_relationship( + "http://islandora.ca/ontology/relsext#isSequenceNumber", sequence_number, True + ) + self.purge_relationship( + "http://islandora.ca/ontology/relsext#isPageNumber", sequence_number, True + ) + self.purge_relationship( + "http://islandora.ca/ontology/relsext#isSection", sequence_number, True + ) + return + + def add_sequencing(self, sequence_number): + self.add_relationship( + "http://islandora.ca/ontology/relsext#isSequenceNumber", sequence_number, True + ) + self.add_relationship( + "http://islandora.ca/ontology/relsext#isPageNumber", sequence_number, True + ) + self.add_relationship( + "http://islandora.ca/ontology/relsext#isSection", sequence_number, True + ) + return + if __name__ == "__main__": import os x = FedoraObject( auth=(os.getenv("FEDORA_USERNAME"), os.getenv("FEDORA_PASSWORD")), fedora_uri=os.getenv("FEDORA_URI"), - pid="roth:10" + pid="beacon:35815" ) - x.getDatastream("OBJ", "tmp/roth2") + x.remove_membership_of_page("beacon:35814") + x.remove_sequencing("10") + x.add_sequencing("12") + x.add_membership_of_page("beacon:35825") diff --git a/utk_exodus/finder/finder.py b/utk_exodus/finder/finder.py index 3da8809..00c8f56 100644 --- a/utk_exodus/finder/finder.py +++ b/utk_exodus/finder/finder.py @@ -188,6 +188,18 @@ def __add_files(self, what_to_add=['filesets', 'attachments']): new_csv_content.append(self.__add_an_attachment(dsid, row)) if 'filesets' in what_to_add: new_csv_content.append(self.__add_a_file(dsid, row)) + elif row['model'] == "CompoundObject": + for dsid in all_files: + if 'PRESERVE' in all_files and 'OBJ' in all_files: + if 'attachments' in what_to_add: + new_csv_content.append(self.__add_an_attachment(dsid, row, True)) + if 'filesets' in what_to_add: + new_csv_content.append(self.__add_a_file(dsid, row, True)) + else: + if 'attachments' in what_to_add: + new_csv_content.append(self.__add_an_attachment(dsid, row)) + if 'filesets' in what_to_add: + new_csv_content.append(self.__add_a_file(dsid, row)) elif row['model'] == "Page": dsids_to_remove = ('MODS', 'RELS-INT', 'PDF') for dsid in dsids_to_remove: diff --git a/utk_exodus/metadata/metadata.py b/utk_exodus/metadata/metadata.py index ac78b88..200cae4 100644 --- a/utk_exodus/metadata/metadata.py +++ b/utk_exodus/metadata/metadata.py @@ -1140,6 +1140,7 @@ def __execute(self, namespaces): all_file_data.append(output_data) for item in all_file_data: pages = self.look_for_pages(item) + parts = self.look_for_compound_parts(item) for page in pages: new_page = item.copy() new_page["source_identifier"] = page["pid"].replace("info:fedora/", "") @@ -1147,6 +1148,13 @@ def __execute(self, namespaces): new_page["model"] = "Page" new_page["sequence"] = page["page"] all_pages.append(new_page) + for part in parts: + new_part = item.copy() + new_part["source_identifier"] = part["pid"].replace("info:fedora/", "") + new_part["parents"] = item["source_identifier"] + new_part["model"] = "Page" + new_part["sequence"] = part["sequence"] + all_pages.append(new_part) for page in all_pages: all_file_data.append(page) return all_file_data @@ -1156,6 +1164,11 @@ def look_for_pages(self, data): return ResourceIndexSearch().find_pages_in_book(data["source_identifier"]) return [] + def look_for_compound_parts(self, data): + if data["model"] == "CompoundObject": + return ResourceIndexSearch().get_compound_object_parts(data["source_identifier"]) + return [] + def __find_unique_fieldnames(self, data): for k, v in data.items(): if k not in self.fieldnames: @@ -1166,6 +1179,7 @@ def __dereference_islandora_type(self, file): islandora_types = { "info:fedora/islandora:sp-audioCModel": "Audio", "info:fedora/islandora:bookCModel": "Book", + "info:fedora/islandora:compoundCModel": "CompoundObject", "info:fedora/islandora:binaryObjectCModel": "Generic", "info:fedora/islandora:sp_large_image_cmodel": "Image", "info:fedora/islandora:sp_basic_image": "Image", @@ -1185,6 +1199,7 @@ def __get_utk_ontology_value(model): ontology_values = { "Audio": "https://ontology.lib.utk.edu/works#AudioWork", "Book": "https://ontology.lib.utk.edu/works#BookWork", + "CompoundObject": "https://ontology.lib.utk.edu/works#CompoundObjectWork", "Generic": "https://ontology.lib.utk.edu/works#GenericWork", "Image": "https://ontology.lib.utk.edu/works#ImageWork", "Pdf": "https://ontology.lib.utk.edu/works#PDFWork", diff --git a/utk_exodus/risearch/risearch.py b/utk_exodus/risearch/risearch.py index 909e884..4ca4e75 100644 --- a/utk_exodus/risearch/risearch.py +++ b/utk_exodus/risearch/risearch.py @@ -154,6 +154,22 @@ def find_pages_in_book(self, book): page_results = requests.get(f"{self.base_url}&query={query}").content return self.clean_pages(page_results) + def get_compound_object_parts(self, compound_object): + query = quote( + f"""PREFIX fedora: + PREFIX fedoraModel: + PREFIX islandora: + SELECT ?pid ?sequence ?model WHERE {{ + ?pid fedora:isConstituentOf ; + fedoraModel:hasModel ?model; + islandora:isSequenceNumberOf{compound_object.replace(':', '_')} ?sequence . + FILTER(REGEX(STR(?model), "islandora")) . }} + """ + ) + results = requests.get(f"{self.base_url}&query={query}").content + print(self.clean_compound_parts(results)) + return self.clean_compound_parts(results) + @staticmethod def clean_pages(results): all_pages = [] @@ -165,6 +181,21 @@ def clean_pages(results): ) return all_pages + @staticmethod + def clean_compound_parts(results): + all_parts = [] + cleaned = results.decode("utf-8").split("\n") + for item in cleaned: + if item != '"pid","sequence","model"' and item != "": + all_parts.append( + { + "pid": item.split(",")[0], + "sequence": item.split(",")[1], + "model": item.split(",")[2], + } + ) + return all_parts + @staticmethod def __lookup_work_type(work_type): work_types = { @@ -309,6 +340,25 @@ def get_works_of_a_type_with_dsid(self, work_type, dsid): if result != "" and result != '"pid"' ] + def find_pids_and_pages_from_book_local_id(self, local_id): + query = quote( + f""" + SELECT ?pid ?page WHERE {{ + ?pid ?book ; + ?page . + ?book ?id . + FILTER(REGEX(?id, "{local_id}")) + }} + """ + ) + results = requests.get(f"{self.base_url}&query={query}").content.decode("utf-8") + return [ + (result.replace("info:fedora/", "").split(",")[0], result.split(',')[1]) + for result in results.split("\n") + if result != "" and result != '"pid","page"' + ] + + if __name__ == "__main__": risearch = ResourceIndexSearch() x = risearch.get_works_of_a_type_with_dsid("book", "MODS")