From 0843435ce259684852461ac6cd2902b3775ec7fc Mon Sep 17 00:00:00 2001 From: tomcrane Date: Mon, 15 Jan 2024 10:19:58 +0000 Subject: [PATCH] Documentation on Preservation API --- .../Dashboard/Controllers/BrowseController.cs | 9 +- .../Controllers/ImportExportController.cs | 2 + .../Transfer/ContainerDirectory.cs | 21 ++++- LeedsExperiment/Fedora/IFedora.cs | 5 +- LeedsExperiment/Preservation/ExportResult.cs | 30 +++++-- LeedsExperiment/Preservation/FedoraWrapper.cs | 15 ++-- LeedsExperiment/Preservation/IPreservation.cs | 30 ++++++- LeedsExperiment/Preservation/ImportJob.cs | 88 +++++++++++++++++-- .../PreservationService.cs | 15 ++++ LeedsExperiment/SamplesWorker/Worker.cs | 7 +- 10 files changed, 191 insertions(+), 31 deletions(-) diff --git a/LeedsExperiment/Dashboard/Controllers/BrowseController.cs b/LeedsExperiment/Dashboard/Controllers/BrowseController.cs index 009b68e..0874287 100644 --- a/LeedsExperiment/Dashboard/Controllers/BrowseController.cs +++ b/LeedsExperiment/Dashboard/Controllers/BrowseController.cs @@ -30,7 +30,14 @@ public async Task IndexAsync(string? path = null) } if(resource.ObjectPath != path) { - return Problem("ObjectPath != path"); + if((resource.ObjectPath ?? string.Empty) == string.Empty && (path ?? string.Empty) == string.Empty) + { + // not a problem but rationalise this! + } + else + { + return Problem("ObjectPath != path"); + } } if(resource.PreservationApiPartOf != null) { diff --git a/LeedsExperiment/Dashboard/Controllers/ImportExportController.cs b/LeedsExperiment/Dashboard/Controllers/ImportExportController.cs index 3e2a3b7..9594974 100644 --- a/LeedsExperiment/Dashboard/Controllers/ImportExportController.cs +++ b/LeedsExperiment/Dashboard/Controllers/ImportExportController.cs @@ -23,6 +23,7 @@ public async Task ExportStartAsync( [FromRoute] string path, [FromQuery] string? version = null) { + throw new NotImplementedException(); // display a list of what's going to be exported // add a default destination in the staging bucket // allow a different destination to be specified (bucket, key root) @@ -43,6 +44,7 @@ public async Task ImportStartAsync( [FromRoute] string path, [FromQuery] string? version = null) { + throw new NotImplementedException(); // work out what can be shared with a create new // (is it just this with a null path?) diff --git a/LeedsExperiment/Fedora/Abstractions/Transfer/ContainerDirectory.cs b/LeedsExperiment/Fedora/Abstractions/Transfer/ContainerDirectory.cs index 2351ae0..615fb70 100644 --- a/LeedsExperiment/Fedora/Abstractions/Transfer/ContainerDirectory.cs +++ b/LeedsExperiment/Fedora/Abstractions/Transfer/ContainerDirectory.cs @@ -5,10 +5,27 @@ public class ContainerDirectory /// /// The repository path (not a full Uri), will end with Slug /// Only contains permitted characters (e.g., no spaces) + /// + /// This is not required if you supply Slug and a parent /// - public required string Path { get; set; } + public string? Path { get; set; } - public string Slug => Path.Split('/')[^1]; + private string? slug; + public string? Slug + { + get + { + if (string.IsNullOrEmpty(Path)) + { + return slug; + } + return Path.Split('/')[^1]; + } + set + { + slug = value; + } + } /// /// The name of the resource in Fedora (dc:title) diff --git a/LeedsExperiment/Fedora/IFedora.cs b/LeedsExperiment/Fedora/IFedora.cs index 2c13d71..73cb81e 100644 --- a/LeedsExperiment/Fedora/IFedora.cs +++ b/LeedsExperiment/Fedora/IFedora.cs @@ -1,4 +1,5 @@ using Fedora.Abstractions; +using Fedora.Abstractions.Transfer; using Fedora.ApiModel; namespace Fedora @@ -20,8 +21,8 @@ public interface IFedora Task CreateArchivalGroup(Uri parent, string slug, string name, Transaction? transaction = null); Task CreateArchivalGroup(string parentPath, string slug, string name, Transaction? transaction = null); - Task CreateContainer(Uri parent, string slug, string name, Transaction? transaction = null); - Task CreateContainer(string parentPath, string slug, string name, Transaction? transaction = null); + + Task CreateContainer(Uri parent, ContainerDirectory containerDirectory, Transaction? transaction = null); /// /// DISALLOW a POST for binaries, for now diff --git a/LeedsExperiment/Preservation/ExportResult.cs b/LeedsExperiment/Preservation/ExportResult.cs index c44f003..b38ab2b 100644 --- a/LeedsExperiment/Preservation/ExportResult.cs +++ b/LeedsExperiment/Preservation/ExportResult.cs @@ -7,20 +7,38 @@ namespace Preservation; public class ExportResult { /// - /// For info - the path of the source archival group + /// The Fedora path of the Archival Group /// - public required string Path { get; set; } + public required string ArchivalGroupPath { get; set; } + + + /// + /// The S3 location (later maybe other locations) to which the object was exported + /// + public required string Destination { get; set; } + + /// + /// Currently either FileSystem or S3 + /// + public required string StorageType { get; set; } /// /// The version that was exported /// public required ObjectVersion Version { get; set; } + + /// + /// When the export started + /// public DateTime Start { get; set; } - public DateTime End { get; set; } - // The root location (S3 Uri, directory path) where the ArchivalGroup has been exported - public required string Source { get; set; } - public required string StorageType { get; set; } + /// + /// When the export finished + /// + public DateTime End { get; set; } + /// + /// A list of all the files exported + /// public List Files { get; set; } = []; } diff --git a/LeedsExperiment/Preservation/FedoraWrapper.cs b/LeedsExperiment/Preservation/FedoraWrapper.cs index f33d7db..1f8a4c7 100644 --- a/LeedsExperiment/Preservation/FedoraWrapper.cs +++ b/LeedsExperiment/Preservation/FedoraWrapper.cs @@ -1,5 +1,6 @@ using Fedora; using Fedora.Abstractions; +using Fedora.Abstractions.Transfer; using Fedora.ApiModel; using Fedora.Storage; using Fedora.Vocab; @@ -10,6 +11,7 @@ using System.Net.Http.Json; using System.Text; using System.Text.Json; +using System.Xml.Linq; namespace Preservation; @@ -97,15 +99,14 @@ public async Task Proxy(string contentType, string path, string? jsonLdM var parent = GetUri(parentPath); return await CreateContainerInternal(true, parent, slug, name, transaction) as ArchivalGroup; } - public async Task CreateContainer(Uri parent, string slug, string name, Transaction? transaction = null) - { - return await CreateContainerInternal(false, parent, slug, name, transaction); - } - public async Task CreateContainer(string parentPath, string slug, string name, Transaction? transaction = null) + public async Task CreateContainer(Uri parent, ContainerDirectory containerDirectory, Transaction? transaction = null) { - var parent = GetUri(parentPath); - return await CreateContainerInternal(false, parent, slug, name, transaction); + if(containerDirectory.Slug == null) + { + throw new ArgumentNullException(nameof(containerDirectory.Slug)); + } + return await CreateContainerInternal(false, parent, containerDirectory.Slug, containerDirectory.Name, transaction); } private async Task CreateContainerInternal(bool isArchivalGroup, Uri parent, string slug, string name, Transaction? transaction = null) diff --git a/LeedsExperiment/Preservation/IPreservation.cs b/LeedsExperiment/Preservation/IPreservation.cs index 467634f..9fe2a3d 100644 --- a/LeedsExperiment/Preservation/IPreservation.cs +++ b/LeedsExperiment/Preservation/IPreservation.cs @@ -5,22 +5,46 @@ namespace Preservation; public interface IPreservation { // Getting things from Fedora + Task GetResource(string? path); string GetInternalPath(Uri preservationApiUri); Task GetArchivalGroup(string path, string? version); // Interacting with a staging area + // =============================== + + /// + /// The Preservation app decides where to put this - in a bucket under a unique key - and then tells you where it is + /// rather than you specifiying where you want it put. Do we want to allow that? It would still need to be somewhere in an accessible + /// bucket - better to leave it in the hands of the preservation API to avoid collisions + /// + /// Repository path / identifier of Archival Group + /// (optional) The version to export; if omitted the HEAD (latest) is exported + /// Task Export(string path, string? version); /// - /// Get a diff that can then be executed + /// An ImportJob is a representation of what needs doing to bring the repository ArchivalGroup to the same state + /// as a set of files on disk or S3. It's a wrapper round a diff, that is then "executed" when it is sent to the Import endpoint. + /// + /// The reason to split is to allow the operator (a human user, or software) to see the diff - to verify that the job is what + /// was intended or expected. /// /// /// - /// + /// A partially populated ImportJob Task GetUpdateJob(string path, string source); - // Create or update the job obtained above - latter requires isUpdate explicitly + /// + /// "Execute" the update job obtrained above. + /// There is an `isUpdate` flag on ImportJob that must be explicitly set to true if a new version is intended. + /// (to avoid unexpected overwrites). + /// + /// The job will fail if, when entering a transaction, the ArchivalGroup is found to be at a later version than + /// the one marked on the ImportJob. + /// + /// + /// A fully populated Job including the results // Task Import(ImportJob importJob); } diff --git a/LeedsExperiment/Preservation/ImportJob.cs b/LeedsExperiment/Preservation/ImportJob.cs index 66cf3f1..e14ccdd 100644 --- a/LeedsExperiment/Preservation/ImportJob.cs +++ b/LeedsExperiment/Preservation/ImportJob.cs @@ -5,15 +5,34 @@ namespace Preservation; public class ImportJob { + /// + /// The Fedora path of the Archival Group + /// public required string ArchivalGroupPath { get; set; } - // Must be an S3 URI, for now + + /// + /// A filesystem or S3 path for the directory that will be compared to the archival object /// + /// public required string Source { get; set; } + + /// + /// Currently either FileSystem or S3 + /// public required string StorageType { get; set; } + /// + /// The Fedora Uri of the ArchivalGroup + /// public Uri? ArchivalGroupUri { get; set; } - + /// + /// When the diff calculation began + /// public DateTime DiffStart { get; set; } + + /// + /// When the diff calculation finished + /// public DateTime DiffEnd { get; set; } /// @@ -24,11 +43,28 @@ public class ImportJob /// public ObjectVersion? DiffVersion { get; set; } + /// + /// Fedora containers that need to be created to synchronise the Archival Group object with the source + /// public List ContainersToAdd { get; set; } = []; + + /// + /// Fedora binaries that need to be created to synchronise the Archival Group object with the source + /// public List FilesToAdd { get; set; } = []; + + /// + /// Fedora binaries that need to be deleted to synchronise the Archival Group object with the source + /// public List FilesToDelete { get; set; } = []; + + /// + /// Fedora binaries that need to be UPDATED to synchronise the Archival Group object with the source + /// Typically because their checksums don't match + /// public List FilesToPatch { get; set; } = []; - // FilesToRename? + + // FilesToRename? Can we even do that in Fedora? /// /// While any required new containers can be created as files are added (create along path), @@ -36,22 +72,58 @@ public class ImportJob /// public List ContainersToDelete { get; set; } = []; - + /// + /// (populated when the job is executed) + /// Any containers added to Fedora as part of this operation + /// public List ContainersAdded { get; set; } = []; + + /// + /// (populated when the job is executed) + /// Any binaries added to Fedora as part of this operation + /// public List FilesAdded { get; set; } = []; + + /// + /// (populated when the job is executed) + /// Any binaries deleted from Fedora as part of this operation + /// (left as Tombstones) + /// public List FilesDeleted { get; set; } = []; + + /// + /// (populated when the job is executed) + /// Any files UPDATED added in Fedora as part of this operation + /// (typically, new binary content supplied, but could be other properties) + /// public List FilesPatched { get; set; } = []; + + /// + /// (populated when the job is executed) + /// Any containers deleted from Fedora as part of this operation + /// public List ContainersDeleted { get; set; } = []; - // Must be explicitly set to true to allow an update of an existing ArchivalGroup + /// + /// Must be explicitly set to true to allow an update of an existing ArchivalGroup + /// public bool IsUpdate { get; set; } - + /// + /// When the job execution started + /// public DateTime Start { get; set; } - public DateTime End { get; set; } + /// + /// When the job execution finished + /// + public DateTime End { get; set; } + /// + /// (populated when the job is executed) + /// The version that the Archival Group is now at + /// Should be vNext OCFL, one higher than DiffVersion + /// public ObjectVersion? NewVersion { get; set; } - } diff --git a/LeedsExperiment/PreservationApiClient/PreservationService.cs b/LeedsExperiment/PreservationApiClient/PreservationService.cs index abff2a1..0596026 100644 --- a/LeedsExperiment/PreservationApiClient/PreservationService.cs +++ b/LeedsExperiment/PreservationApiClient/PreservationService.cs @@ -66,4 +66,19 @@ public string GetInternalPath(Uri preservationApiUri) var ag = await _httpClient.GetFromJsonAsync(agApi); return ag; } + + public Task Export(string path, string? version) + { + throw new NotImplementedException(); + } + + public Task GetUpdateJob(string path, string source) + { + throw new NotImplementedException(); + } + + public Task Import(ImportJob importJob) + { + throw new NotImplementedException(); + } } diff --git a/LeedsExperiment/SamplesWorker/Worker.cs b/LeedsExperiment/SamplesWorker/Worker.cs index 72657a8..80c415a 100644 --- a/LeedsExperiment/SamplesWorker/Worker.cs +++ b/LeedsExperiment/SamplesWorker/Worker.cs @@ -1,5 +1,6 @@ using Fedora; using Fedora.Abstractions; +using Fedora.Abstractions.Transfer; namespace SamplesWorker { @@ -104,11 +105,13 @@ private async Task OcflV1() // POST the basic container foo var fooDir = new DirectoryInfo(Path.Combine(localPath, "foo")); - var fooContainer = await fedora.CreateContainer(archivalGroup.Location, fooDir.Name, fooDir.Name, transaction); + var cd = new ContainerDirectory { Name = fooDir.Name, Slug = fooDir.Name }; + var fooContainer = await fedora.CreateContainer(archivalGroup.Location, cd, transaction); // POST into foo the binary bar.xml var localBarXml = new FileInfo(Path.Combine(localPath, "foo", "bar.xml")); - var xmlLocation = archivalGroup.GetResourceUri("foo/bar.xml"); + var xmlLocation = archivalGroup.GetResourceUri("foo/bar.xml"); // Path is used here, which we could just supply as-is to BinaryFile... BUT needs archivalGroup to resolve, do we pass in the ArchivalGroup Uri as well? Maybe yes, you're supplying a file in the context of an archival group. + // A factory on ArchivalGroup? But then how do you make a new AG in one v1 transaction? var fedoraBarXml = await fedora.PutBinary(xmlLocation, localBarXml, localBarXml.Name, "text/xml", transaction); await fedora.CommitTransaction(transaction);