Merge pull request #14 from utkdigitalinitiatives/compound_objects

Migrate compound objects.
utkdigitalinitiatives · May 31, 2024 · 17048c8 · 17048c8
2 parents 7c6409e + 8618f95
commit 17048c8
Show file tree

Hide file tree

Showing 6 changed files with 180 additions and 3 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "utk-exodus"
-version = "0.2.0"
+version = "0.2.1"
 description = "A tool for building import sheets from UTK legacy systems"
 authors = ["Mark Baggett <mbagget1@utk.edu>"]
 readme = "README.md"

diff --git a/utk_exodus/exodus.py b/utk_exodus/exodus.py
@@ -371,3 +371,28 @@ def export_errors(
     ei = ExistingImport(import_ids, directory, initial_auth=(os.getenv('HYKU_BASIC_AUTH_USER'), os.getenv('HYKU_BASIC_AUTH_PASS')))
     ei.sign_in_to_hyku(os.getenv('HYKU_USER'), os.getenv('HYKU_PASS'))
     ei.export_errors()
+
+@cli.command(
+    "add_datastreams",
+    help="Add datastreams to existing PIDS",
+)
+@click.option(
+    "--path",
+    "-p",
+    required=True,
+    help="Path to the Original Files",
+)
+def add_datastreams(
+    path: str,
+) -> None:
+    print(f"Adding datastreams {path}.")
+    for path, directories, files in os.walk(path):
+        for file in tqdm(files):
+            pid = file.split('_')[0]
+            dsid = file.split('_')[1].split('.')[0]
+            fedora = FedoraObject(
+                auth=(os.getenv("FEDORA_USERNAME"), os.getenv("FEDORA_PASSWORD")),
+                fedora_uri=os.getenv("FEDORA_URI"),
+                pid=pid,
+            )
+            fedora.add_datastream(dsid, os.path.join(path, file))
diff --git a/utk_exodus/fedora/fedora.py b/utk_exodus/fedora/fedora.py
@@ -1,5 +1,6 @@
 import requests
 import xmltodict
+from urllib.parse import quote
 
 
 class FedoraObject:
@@ -80,12 +81,86 @@ def write_all_versions(self, dsid, output):
                 self.getDatastream(dsid, output, version["dsCreateDate"])
         return
 
+    def add_datastream(self, dsid, file, mimetype="text/plain"):
+        r = requests.post(
+            f"{self.fedora_uri}/objects/{self.pid}/datastreams/{dsid}?controlGroup=M&dsLabel={dsid}&versionable=true"
+            f"&dsState=A&logMessage=Added+{dsid}+datastream+to+{self.pid}.",
+            auth=self.auth,
+            headers={"Content-Type": mimetype},
+            data=open(file, "rb"),
+        )
+        return r
+
+    def purge_relationship(self, predicate, object, is_literal=True):
+        body = f"/objects/{self.pid}/relationships?subject=info%3afedora/{self.pid}&predicate={quote(predicate)}&object={quote(object)}&isLiteral={is_literal}"
+        r = requests.delete(
+            f"{self.fedora_uri}{body}",
+            auth=self.auth,
+        )
+        return r
+
+    def add_relationship(self, predicate, object, is_literal=True):
+        r = requests.post(
+            f"{self.fedora_uri}/objects/{self.pid}/relationships/new?subject=info%3afedora/{self.pid}&predicate={quote(predicate)}&object={quote(object)}&isLiteral={is_literal}",
+            auth=self.auth,
+        )
+        return r
+
+    def remove_membership_of_page(self, book_pid):
+        # Remove the isPageOf relationship
+        self.purge_relationship(
+            "http://islandora.ca/ontology/relsext#isPageOf", f"info:fedora/{book_pid}", False
+        )
+        # Remove isMemberOf relationship
+        self.purge_relationship(
+            "info:fedora/fedora-system:def/relations-external#isMemberOf",
+            f"info:fedora/{book_pid}",
+            False
+        )
+        return
+
+    def add_membership_of_page(self, book_pid):
+        self.add_relationship(
+            "http://islandora.ca/ontology/relsext#isPageOf", f"info:fedora/{book_pid}", False
+        )
+        self.add_relationship(
+            "info:fedora/fedora-system:def/relations-external#isMemberOf", f"info:fedora/{book_pid}", False
+        )
+        return
+
+    def remove_sequencing(self, sequence_number):
+        self.purge_relationship(
+            "http://islandora.ca/ontology/relsext#isSequenceNumber", sequence_number, True
+        )
+        self.purge_relationship(
+            "http://islandora.ca/ontology/relsext#isPageNumber", sequence_number, True
+        )
+        self.purge_relationship(
+            "http://islandora.ca/ontology/relsext#isSection", sequence_number, True
+        )
+        return
+
+    def add_sequencing(self, sequence_number):
+        self.add_relationship(
+            "http://islandora.ca/ontology/relsext#isSequenceNumber", sequence_number, True
+        )
+        self.add_relationship(
+            "http://islandora.ca/ontology/relsext#isPageNumber", sequence_number, True
+        )
+        self.add_relationship(
+            "http://islandora.ca/ontology/relsext#isSection", sequence_number, True
+        )
+        return
+
 
 if __name__ == "__main__":
     import os
     x = FedoraObject(
         auth=(os.getenv("FEDORA_USERNAME"), os.getenv("FEDORA_PASSWORD")),
         fedora_uri=os.getenv("FEDORA_URI"),
-        pid="roth:10"
+        pid="beacon:35815"
     )
-    x.getDatastream("OBJ", "tmp/roth2")
+    x.remove_membership_of_page("beacon:35814")
+    x.remove_sequencing("10")
+    x.add_sequencing("12")
+    x.add_membership_of_page("beacon:35825")
diff --git a/utk_exodus/finder/finder.py b/utk_exodus/finder/finder.py
@@ -188,6 +188,18 @@ def __add_files(self, what_to_add=['filesets', 'attachments']):
                             new_csv_content.append(self.__add_an_attachment(dsid, row))
                         if 'filesets' in what_to_add:
                             new_csv_content.append(self.__add_a_file(dsid, row))
+            elif row['model'] == "CompoundObject":
+                for dsid in all_files:
+                    if 'PRESERVE' in all_files and 'OBJ' in all_files:
+                        if 'attachments' in what_to_add:
+                            new_csv_content.append(self.__add_an_attachment(dsid, row, True))
+                        if 'filesets' in what_to_add:
+                            new_csv_content.append(self.__add_a_file(dsid, row, True))
+                    else:
+                        if 'attachments' in what_to_add:
+                            new_csv_content.append(self.__add_an_attachment(dsid, row))
+                        if 'filesets' in what_to_add:
+                            new_csv_content.append(self.__add_a_file(dsid, row))
             elif row['model'] == "Page":
                 dsids_to_remove = ('MODS', 'RELS-INT', 'PDF')
                 for dsid in dsids_to_remove:

diff --git a/utk_exodus/metadata/metadata.py b/utk_exodus/metadata/metadata.py
@@ -1140,13 +1140,21 @@ def __execute(self, namespaces):
             all_file_data.append(output_data)
         for item in all_file_data:
             pages = self.look_for_pages(item)
+            parts = self.look_for_compound_parts(item)
             for page in pages:
                 new_page = item.copy()
                 new_page["source_identifier"] = page["pid"].replace("info:fedora/", "")
                 new_page["parents"] = item["source_identifier"]
                 new_page["model"] = "Page"
                 new_page["sequence"] = page["page"]
                 all_pages.append(new_page)
+            for part in parts:
+                new_part = item.copy()
+                new_part["source_identifier"] = part["pid"].replace("info:fedora/", "")
+                new_part["parents"] = item["source_identifier"]
+                new_part["model"] = "Page"
+                new_part["sequence"] = part["sequence"]
+                all_pages.append(new_part)
         for page in all_pages:
             all_file_data.append(page)
         return all_file_data
@@ -1156,6 +1164,11 @@ def look_for_pages(self, data):
             return ResourceIndexSearch().find_pages_in_book(data["source_identifier"])
         return []
 
+    def look_for_compound_parts(self, data):
+        if data["model"] == "CompoundObject":
+            return ResourceIndexSearch().get_compound_object_parts(data["source_identifier"])
+        return []
+
     def __find_unique_fieldnames(self, data):
         for k, v in data.items():
             if k not in self.fieldnames:
@@ -1166,6 +1179,7 @@ def __dereference_islandora_type(self, file):
         islandora_types = {
             "info:fedora/islandora:sp-audioCModel": "Audio",
             "info:fedora/islandora:bookCModel": "Book",
+            "info:fedora/islandora:compoundCModel": "CompoundObject",
             "info:fedora/islandora:binaryObjectCModel": "Generic",
             "info:fedora/islandora:sp_large_image_cmodel": "Image",
             "info:fedora/islandora:sp_basic_image": "Image",
@@ -1185,6 +1199,7 @@ def __get_utk_ontology_value(model):
         ontology_values = {
             "Audio": "https://ontology.lib.utk.edu/works#AudioWork",
             "Book": "https://ontology.lib.utk.edu/works#BookWork",
+            "CompoundObject": "https://ontology.lib.utk.edu/works#CompoundObjectWork",
             "Generic": "https://ontology.lib.utk.edu/works#GenericWork",
             "Image": "https://ontology.lib.utk.edu/works#ImageWork",
             "Pdf": "https://ontology.lib.utk.edu/works#PDFWork",

diff --git a/utk_exodus/risearch/risearch.py b/utk_exodus/risearch/risearch.py
@@ -154,6 +154,22 @@ def find_pages_in_book(self, book):
         page_results = requests.get(f"{self.base_url}&query={query}").content
         return self.clean_pages(page_results)
 
+    def get_compound_object_parts(self, compound_object):
+        query = quote(
+            f"""PREFIX fedora: <info:fedora/fedora-system:def/relations-external#>
+            PREFIX fedoraModel: <info:fedora/fedora-system:def/model#>
+            PREFIX islandora: <http://islandora.ca/ontology/relsext#>
+            SELECT ?pid ?sequence ?model WHERE {{ 
+            ?pid fedora:isConstituentOf <info:fedora/{compound_object}>;
+            fedoraModel:hasModel ?model;
+            islandora:isSequenceNumberOf{compound_object.replace(':', '_')} ?sequence . 
+            FILTER(REGEX(STR(?model), "islandora")) . }}
+            """
+        )
+        results = requests.get(f"{self.base_url}&query={query}").content
+        print(self.clean_compound_parts(results))
+        return self.clean_compound_parts(results)
+
     @staticmethod
     def clean_pages(results):
         all_pages = []
@@ -165,6 +181,21 @@ def clean_pages(results):
                 )
         return all_pages
 
+    @staticmethod
+    def clean_compound_parts(results):
+        all_parts = []
+        cleaned = results.decode("utf-8").split("\n")
+        for item in cleaned:
+            if item != '"pid","sequence","model"' and item != "":
+                all_parts.append(
+                    {
+                        "pid": item.split(",")[0],
+                        "sequence": item.split(",")[1],
+                        "model": item.split(",")[2],
+                    }
+                )
+        return all_parts
+
     @staticmethod
     def __lookup_work_type(work_type):
         work_types = {
@@ -309,6 +340,25 @@ def get_works_of_a_type_with_dsid(self, work_type, dsid):
             if result != "" and result != '"pid"'
         ]
 
+    def find_pids_and_pages_from_book_local_id(self, local_id):
+        query = quote(
+            f"""
+            SELECT ?pid ?page WHERE {{
+            ?pid <info:fedora/fedora-system:def/relations-external#isMemberOf> ?book ;
+            <http://islandora.ca/ontology/relsext#isPageNumber> ?page .
+            ?book <http://purl.org/dc/elements/1.1/identifier> ?id .
+            FILTER(REGEX(?id, "{local_id}"))
+            }}
+            """
+        )
+        results = requests.get(f"{self.base_url}&query={query}").content.decode("utf-8")
+        return [
+            (result.replace("info:fedora/", "").split(",")[0], result.split(',')[1])
+            for result in results.split("\n")
+            if result != "" and result != '"pid","page"'
+        ]
+
+
 if __name__ == "__main__":
     risearch = ResourceIndexSearch()
     x = risearch.get_works_of_a_type_with_dsid("book", "MODS")