From 05a758234d8a5b1e0400ef65967cb76e4e3960e8 Mon Sep 17 00:00:00 2001 From: tomcrane Date: Thu, 4 Jan 2024 13:38:21 +0000 Subject: [PATCH] Add observations about memento usage --- .../appsettings.Production.json | 16 ++++++ LeedsExperiment/Preservation/FedoraWrapper.cs | 50 +++++++++++++++++-- 2 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 LeedsExperiment/Preservation.API/appsettings.Production.json diff --git a/LeedsExperiment/Preservation.API/appsettings.Production.json b/LeedsExperiment/Preservation.API/appsettings.Production.json new file mode 100644 index 0000000..ddc856f --- /dev/null +++ b/LeedsExperiment/Preservation.API/appsettings.Production.json @@ -0,0 +1,16 @@ +{ + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.AspNetCore": "Warning" + } + }, + "Fedora-AWS": { + "Profile": "uol", + "Region": "eu-west-1" + }, + "Fedora-AWS-S3": { + "bucket": "uol-expts-fedora-01" + }, + "AllowedHosts": "*" +} \ No newline at end of file diff --git a/LeedsExperiment/Preservation/FedoraWrapper.cs b/LeedsExperiment/Preservation/FedoraWrapper.cs index 9fb79fa..07c20a3 100644 --- a/LeedsExperiment/Preservation/FedoraWrapper.cs +++ b/LeedsExperiment/Preservation/FedoraWrapper.cs @@ -496,13 +496,55 @@ private List GetIdsFromContainsProperty(JsonElement element) // WithContainedDescriptions could return @graph or it could return a single object if the container has no children + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/fcr:versions + // => + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/fcr:versions/20240103160421 + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/fcr:versions/20240103160432 + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/fcr:versions/20240103160437 + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/fcr:versions/20240103160446 + // PROBLEM - I can't just append /fcr:version/20240103160421 because that causes an error if you also ask for .WithContainedDescriptions() - // "Invalid request for memento" - // presumably because the contained descriptions don't each have that version? + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/fcr:versions + + // This is OK: + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/fcr:versions/20240103160421 + // But not this + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/fcr:versions/20240103160421?contained=true + // Error: Unable to retrieve triples for info:fedora/storage-01/ocfl-expt-01-03-24-16-04-18/image.tiff + + // image.tiff is now a tombstone + + // Similarly: + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/foo/fcr:versions/20240103160421?contained=true + // This shows bar.xml (the single contained object) as having a size of 75 - but at timestamp 20240103160421 it didn't, as we can see: + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/foo/bar.xml/fcr:metadata/fcr:versions/20240103160421 + + // So the contained versions are (it seems) not asked for at the same version + + // I can't send the accept date header and also ask for withcontaineddescriptions IF one of them is now a tombstone + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18?acceptDate=20240103160421 + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18?acceptDate=20240103160421&contained=true + // + // I can't even do this: + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/image.tiff/fcr:metadata?acceptDate=20240103160421 + // although I can ask for it directly + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/image.tiff/fcr:metadata/fcr:versions/20240103160421 + + // If I try to just ask for specific versions of everything in my traversal, that will fail: + // OK because a specific version exists + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/foo/bar.xml/fcr:metadata/fcr:versions/20240103160432 + // But not OK: + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/foo/bar.xml/fcr:metadata/fcr:versions/20240103160437 + // Error: There is no version in info:fedora/storage-01/ocfl-expt-01-03-24-16-04-18/foo/bar.xml/fcr:metadata/fcr:versions/20240103160437 with a created date matching 2024-01-03T16:04:37Z + + + // BUT I could use acceptDate to see that - + // https://uol.digirati.io/api/fedora/application/ld+json/storage-01/ocfl-expt-01-03-24-16-04-18/foo/bar.xml/fcr:metadata?acceptDate=20240103160437 + // ...because unlike image.tiff, the resource still exists + + // So, given all this info, how do I traverse an archival group to gather a specific VERSION? - // but - // what's most efficient way? var response = await httpClient.SendAsync(request); bool hasArchivalGroupHeader = response.HasArchivalGroupHeader(); if (isArchivalGroup && !hasArchivalGroupHeader)