Skip to content

Commit

Permalink
Documentation on Preservation API
Browse files Browse the repository at this point in the history
  • Loading branch information
tomcrane committed Jan 15, 2024
1 parent 78a125d commit 0843435
Show file tree
Hide file tree
Showing 10 changed files with 191 additions and 31 deletions.
9 changes: 8 additions & 1 deletion LeedsExperiment/Dashboard/Controllers/BrowseController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,14 @@ public async Task<IActionResult> IndexAsync(string? path = null)
}
if(resource.ObjectPath != path)
{
return Problem("ObjectPath != path");
if((resource.ObjectPath ?? string.Empty) == string.Empty && (path ?? string.Empty) == string.Empty)
{
// not a problem but rationalise this!
}
else
{
return Problem("ObjectPath != path");
}
}
if(resource.PreservationApiPartOf != null)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public async Task<IActionResult> ExportStartAsync(
[FromRoute] string path,
[FromQuery] string? version = null)
{
throw new NotImplementedException();
// display a list of what's going to be exported
// add a default destination in the staging bucket
// allow a different destination to be specified (bucket, key root)
Expand All @@ -43,6 +44,7 @@ public async Task<IActionResult> ImportStartAsync(
[FromRoute] string path,
[FromQuery] string? version = null)
{
throw new NotImplementedException();
// work out what can be shared with a create new
// (is it just this with a null path?)

Expand Down
21 changes: 19 additions & 2 deletions LeedsExperiment/Fedora/Abstractions/Transfer/ContainerDirectory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,27 @@ public class ContainerDirectory
/// <summary>
/// The repository path (not a full Uri), will end with Slug
/// Only contains permitted characters (e.g., no spaces)
///
/// This is not required if you supply Slug and a parent
/// </summary>
public required string Path { get; set; }
public string? Path { get; set; }

public string Slug => Path.Split('/')[^1];
private string? slug;
public string? Slug
{
get
{
if (string.IsNullOrEmpty(Path))
{
return slug;
}
return Path.Split('/')[^1];
}
set
{
slug = value;
}
}

/// <summary>
/// The name of the resource in Fedora (dc:title)
Expand Down
5 changes: 3 additions & 2 deletions LeedsExperiment/Fedora/IFedora.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Fedora.Abstractions;
using Fedora.Abstractions.Transfer;
using Fedora.ApiModel;

namespace Fedora
Expand All @@ -20,8 +21,8 @@ public interface IFedora

Task<ArchivalGroup?> CreateArchivalGroup(Uri parent, string slug, string name, Transaction? transaction = null);
Task<ArchivalGroup?> CreateArchivalGroup(string parentPath, string slug, string name, Transaction? transaction = null);
Task<Container?> CreateContainer(Uri parent, string slug, string name, Transaction? transaction = null);
Task<Container?> CreateContainer(string parentPath, string slug, string name, Transaction? transaction = null);

Task<Container?> CreateContainer(Uri parent, ContainerDirectory containerDirectory, Transaction? transaction = null);

/// <summary>
/// DISALLOW a POST for binaries, for now
Expand Down
30 changes: 24 additions & 6 deletions LeedsExperiment/Preservation/ExportResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,38 @@ namespace Preservation;
public class ExportResult
{
/// <summary>
/// For info - the path of the source archival group
/// The Fedora path of the Archival Group
/// </summary>
public required string Path { get; set; }
public required string ArchivalGroupPath { get; set; }


/// <summary>
/// The S3 location (later maybe other locations) to which the object was exported
/// </summary>
public required string Destination { get; set; }

/// <summary>
/// Currently either FileSystem or S3
/// </summary>
public required string StorageType { get; set; }

/// <summary>
/// The version that was exported
/// </summary>
public required ObjectVersion Version { get; set; }

/// <summary>
/// When the export started
/// </summary>
public DateTime Start { get; set; }
public DateTime End { get; set; }

// The root location (S3 Uri, directory path) where the ArchivalGroup has been exported
public required string Source { get; set; }
public required string StorageType { get; set; }
/// <summary>
/// When the export finished
/// </summary>
public DateTime End { get; set; }

/// <summary>
/// A list of all the files exported
/// </summary>
public List<BinaryFile> Files { get; set; } = [];
}
15 changes: 8 additions & 7 deletions LeedsExperiment/Preservation/FedoraWrapper.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Fedora;
using Fedora.Abstractions;
using Fedora.Abstractions.Transfer;
using Fedora.ApiModel;
using Fedora.Storage;
using Fedora.Vocab;
Expand All @@ -10,6 +11,7 @@
using System.Net.Http.Json;
using System.Text;
using System.Text.Json;
using System.Xml.Linq;

namespace Preservation;

Expand Down Expand Up @@ -97,15 +99,14 @@ public async Task<string> Proxy(string contentType, string path, string? jsonLdM
var parent = GetUri(parentPath);
return await CreateContainerInternal(true, parent, slug, name, transaction) as ArchivalGroup;
}
public async Task<Container?> CreateContainer(Uri parent, string slug, string name, Transaction? transaction = null)
{
return await CreateContainerInternal(false, parent, slug, name, transaction);
}

public async Task<Container?> CreateContainer(string parentPath, string slug, string name, Transaction? transaction = null)
public async Task<Container?> CreateContainer(Uri parent, ContainerDirectory containerDirectory, Transaction? transaction = null)
{
var parent = GetUri(parentPath);
return await CreateContainerInternal(false, parent, slug, name, transaction);
if(containerDirectory.Slug == null)
{
throw new ArgumentNullException(nameof(containerDirectory.Slug));
}
return await CreateContainerInternal(false, parent, containerDirectory.Slug, containerDirectory.Name, transaction);
}

private async Task<Container?> CreateContainerInternal(bool isArchivalGroup, Uri parent, string slug, string name, Transaction? transaction = null)
Expand Down
30 changes: 27 additions & 3 deletions LeedsExperiment/Preservation/IPreservation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,46 @@ namespace Preservation;
public interface IPreservation
{
// Getting things from Fedora

Task<Resource?> GetResource(string? path);
string GetInternalPath(Uri preservationApiUri);
Task<ArchivalGroup?> GetArchivalGroup(string path, string? version);


// Interacting with a staging area
// ===============================

/// <summary>
/// The Preservation app decides where to put this - in a bucket under a unique key - and then tells you where it is
/// rather than you specifiying where you want it put. Do we want to allow that? It would still need to be somewhere in an accessible
/// bucket - better to leave it in the hands of the preservation API to avoid collisions
/// </summary>
/// <param name="path">Repository path / identifier of Archival Group</param>
/// <param name="version">(optional) The version to export; if omitted the HEAD (latest) is exported</param>
/// <returns></returns>
Task<ExportResult> Export(string path, string? version);

/// <summary>
/// Get a diff that can then be executed
/// An ImportJob is a representation of what needs doing to bring the repository ArchivalGroup to the same state
/// as a set of files on disk or S3. It's a wrapper round a diff, that is then "executed" when it is sent to the Import endpoint.
///
/// The reason to split is to allow the operator (a human user, or software) to see the diff - to verify that the job is what
/// was intended or expected.
/// </summary>
/// <param name="path"></param>
/// <param name="source"></param>
/// <returns></returns>
/// <returns>A partially populated ImportJob</returns>
Task<ImportJob> GetUpdateJob(string path, string source);

// Create or update the job obtained above - latter requires isUpdate explicitly
/// <summary>
/// "Execute" the update job obtrained above.
/// There is an `isUpdate` flag on ImportJob that must be explicitly set to true if a new version is intended.
/// (to avoid unexpected overwrites).
///
/// The job will fail if, when entering a transaction, the ArchivalGroup is found to be at a later version than
/// the one marked on the ImportJob.
/// </summary>
/// <param name="importJob"></param>
/// <returns>A fully populated Job including the results</returns> //
Task<ImportJob> Import(ImportJob importJob);
}
88 changes: 80 additions & 8 deletions LeedsExperiment/Preservation/ImportJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,34 @@ namespace Preservation;

public class ImportJob
{
/// <summary>
/// The Fedora path of the Archival Group
/// </summary>
public required string ArchivalGroupPath { get; set; }
// Must be an S3 URI, for now

/// <summary>
/// A filesystem or S3 path for the directory that will be compared to the archival object ///
/// </summary>
public required string Source { get; set; }

/// <summary>
/// Currently either FileSystem or S3
/// </summary>
public required string StorageType { get; set; }

/// <summary>
/// The Fedora Uri of the ArchivalGroup
/// </summary>
public Uri? ArchivalGroupUri { get; set; }


/// <summary>
/// When the diff calculation began
/// </summary>
public DateTime DiffStart { get; set; }

/// <summary>
/// When the diff calculation finished
/// </summary>
public DateTime DiffEnd { get; set; }

/// <summary>
Expand All @@ -24,34 +43,87 @@ public class ImportJob
/// </summary>
public ObjectVersion? DiffVersion { get; set; }

/// <summary>
/// Fedora containers that need to be created to synchronise the Archival Group object with the source
/// </summary>
public List<ContainerDirectory> ContainersToAdd { get; set; } = [];

/// <summary>
/// Fedora binaries that need to be created to synchronise the Archival Group object with the source
/// </summary>
public List<BinaryFile> FilesToAdd { get; set; } = [];

/// <summary>
/// Fedora binaries that need to be deleted to synchronise the Archival Group object with the source
/// </summary>
public List<BinaryFile> FilesToDelete { get; set; } = [];

/// <summary>
/// Fedora binaries that need to be UPDATED to synchronise the Archival Group object with the source
/// Typically because their checksums don't match
/// </summary>
public List<BinaryFile> FilesToPatch { get; set; } = [];
// FilesToRename?

// FilesToRename? Can we even do that in Fedora?

/// <summary>
/// While any required new containers can be created as files are added (create along path),
/// we may end up with containers that have no files in them; these need to be deleted from Fedora.
/// </summary>
public List<ContainerDirectory> ContainersToDelete { get; set; } = [];


/// <summary>
/// (populated when the job is executed)
/// Any containers added to Fedora as part of this operation
/// </summary>
public List<ContainerDirectory> ContainersAdded { get; set; } = [];

/// <summary>
/// (populated when the job is executed)
/// Any binaries added to Fedora as part of this operation
/// </summary>
public List<BinaryFile> FilesAdded { get; set; } = [];

/// <summary>
/// (populated when the job is executed)
/// Any binaries deleted from Fedora as part of this operation
/// (left as Tombstones)
/// </summary>
public List<BinaryFile> FilesDeleted { get; set; } = [];

/// <summary>
/// (populated when the job is executed)
/// Any files UPDATED added in Fedora as part of this operation
/// (typically, new binary content supplied, but could be other properties)
/// </summary>
public List<BinaryFile> FilesPatched { get; set; } = [];

/// <summary>
/// (populated when the job is executed)
/// Any containers deleted from Fedora as part of this operation
/// </summary>
public List<ContainerDirectory> ContainersDeleted { get; set; } = [];

// Must be explicitly set to true to allow an update of an existing ArchivalGroup
/// <summary>
/// Must be explicitly set to true to allow an update of an existing ArchivalGroup
/// </summary>
public bool IsUpdate { get; set; }


/// <summary>
/// When the job execution started
/// </summary>
public DateTime Start { get; set; }
public DateTime End { get; set; }

/// <summary>
/// When the job execution finished
/// </summary>
public DateTime End { get; set; }

/// <summary>
/// (populated when the job is executed)
/// The version that the Archival Group is now at
/// Should be vNext OCFL, one higher than DiffVersion
/// </summary>
public ObjectVersion? NewVersion { get; set; }


}
15 changes: 15 additions & 0 deletions LeedsExperiment/PreservationApiClient/PreservationService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,19 @@ public string GetInternalPath(Uri preservationApiUri)
var ag = await _httpClient.GetFromJsonAsync<ArchivalGroup>(agApi);
return ag;
}

public Task<ExportResult> Export(string path, string? version)
{
throw new NotImplementedException();
}

public Task<ImportJob> GetUpdateJob(string path, string source)
{
throw new NotImplementedException();
}

public Task<ImportJob> Import(ImportJob importJob)
{
throw new NotImplementedException();
}
}
7 changes: 5 additions & 2 deletions LeedsExperiment/SamplesWorker/Worker.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Fedora;
using Fedora.Abstractions;
using Fedora.Abstractions.Transfer;

namespace SamplesWorker
{
Expand Down Expand Up @@ -104,11 +105,13 @@ private async Task<ArchivalGroup> OcflV1()

// POST the basic container foo
var fooDir = new DirectoryInfo(Path.Combine(localPath, "foo"));
var fooContainer = await fedora.CreateContainer(archivalGroup.Location, fooDir.Name, fooDir.Name, transaction);
var cd = new ContainerDirectory { Name = fooDir.Name, Slug = fooDir.Name };
var fooContainer = await fedora.CreateContainer(archivalGroup.Location, cd, transaction);

// POST into foo the binary bar.xml
var localBarXml = new FileInfo(Path.Combine(localPath, "foo", "bar.xml"));
var xmlLocation = archivalGroup.GetResourceUri("foo/bar.xml");
var xmlLocation = archivalGroup.GetResourceUri("foo/bar.xml"); // Path is used here, which we could just supply as-is to BinaryFile... BUT needs archivalGroup to resolve, do we pass in the ArchivalGroup Uri as well? Maybe yes, you're supplying a file in the context of an archival group.
// A factory on ArchivalGroup? But then how do you make a new AG in one v1 transaction?
var fedoraBarXml = await fedora.PutBinary(xmlLocation, localBarXml, localBarXml.Name, "text/xml", transaction);

await fedora.CommitTransaction(transaction);
Expand Down

0 comments on commit 0843435

Please sign in to comment.