Skip to content

Commit

Permalink
Add Transfer types
Browse files Browse the repository at this point in the history
  • Loading branch information
tomcrane committed Jan 14, 2024
1 parent 9adf7ad commit 78a125d
Show file tree
Hide file tree
Showing 9 changed files with 210 additions and 2 deletions.
40 changes: 40 additions & 0 deletions LeedsExperiment/Dashboard/Controllers/ImportExportController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ public async Task<IActionResult> ExportStartAsync(
[FromRoute] string path,
[FromQuery] string? version = null)
{
// display a list of what's going to be exported
// add a default destination in the staging bucket
// allow a different destination to be specified (bucket, key root)

// POST the job to /processexport (not the file list, that gets recalculated)

// sync wait for response
// in production this will go on a queue and will poll for completion

// display summary completion, link back to ArchivalGroup Browse head
}


Expand All @@ -33,6 +43,36 @@ public async Task<IActionResult> ImportStartAsync(
[FromRoute] string path,
[FromQuery] string? version = null)
{
// work out what can be shared with a create new
// (is it just this with a null path?)

// get a source bucket and root key from the user
// show a list of the contents but don't diff yet

// https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html

// S3 supports SHA256 but not 512
// So we could lean on S3 to compute the checksums at source

// can we set a bucket policy that auto-calculates a checkusm for every key when added?
// otherwise we'll need to calculate the checksums ourselves

// ask Leeds about 512 / 256

// In order to calculate a diff we can compare checksums. We could compare sizes first to pick up obvious changes
// but we'd need to checksum everything anyway

// Generate a diff
// files to add
// files to change
// files to rename? - can you even do that...
// files to delete
// This could be optimised in production



}


}
}
44 changes: 44 additions & 0 deletions LeedsExperiment/Fedora/Abstractions/Transfer/BinaryFile.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
namespace Fedora.Abstractions.Transfer;

/// <summary>
/// Used when importing new files into the repository from a source location (disk or S3)
/// or exporting to a location
/// </summary>
public class BinaryFile
{
/// <summary>
/// The repository path (not a full Uri), will end with Slug
/// Only contains permitted characters (e.g., no spaces)
/// </summary>
public required string Path { get; set; }

/// <summary>
/// An S3 key, a filesystem path - somewhere accessible to the Preservation API, to import from or export to
/// </summary>
public required string Location { get; set; }

public required string StorageType { get; set; }

/// <summary>
/// Only contains permitted characters (e.g., no spaces)
/// </summary>
public string Slug => Path.Split('/')[^1];

/// <summary>
/// The name of the resource in Fedora (dc:title)
/// </summary>
public required string Name { get; set; }

/// <summary>
/// The Original / actual name of the file, rather than the path-safe, reduced character set slug
/// </summary>
public required string FileName { get; set; }

// NB ^^^ for a filename like readme.txt, Slug, Name and FileName will all be the same.
// And in practice, Name and FileName are going ot be the same
// But Slug may differ as it always must be in the reduced character set

public required string ContentType { get; set; }

public string? Digest { get; set; }
}
17 changes: 17 additions & 0 deletions LeedsExperiment/Fedora/Abstractions/Transfer/ContainerDirectory.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
namespace Fedora.Abstractions.Transfer;

public class ContainerDirectory
{
/// <summary>
/// The repository path (not a full Uri), will end with Slug
/// Only contains permitted characters (e.g., no spaces)
/// </summary>
public required string Path { get; set; }

public string Slug => Path.Split('/')[^1];

/// <summary>
/// The name of the resource in Fedora (dc:title)
/// </summary>
public required string Name { get; set; }
}
8 changes: 8 additions & 0 deletions LeedsExperiment/Fedora/Storage/StorageTypes.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

namespace Fedora.Storage;

public static class StorageTypes
{
public const string S3 = "S3";
public const string FileSystem = "FileSystem";
}
26 changes: 26 additions & 0 deletions LeedsExperiment/Preservation/ExportResult.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

using Fedora.Abstractions.Transfer;
using Fedora.Storage;

namespace Preservation;

public class ExportResult
{
/// <summary>
/// For info - the path of the source archival group
/// </summary>
public required string Path { get; set; }

/// <summary>
/// The version that was exported
/// </summary>
public required ObjectVersion Version { get; set; }
public DateTime Start { get; set; }
public DateTime End { get; set; }

// The root location (S3 Uri, directory path) where the ArchivalGroup has been exported
public required string Source { get; set; }
public required string StorageType { get; set; }

public List<BinaryFile> Files { get; set; } = [];
}
2 changes: 1 addition & 1 deletion LeedsExperiment/Preservation/FedoraWrapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ private void PopulateOrigins(StorageMap storageMap, Container container)

private static void PopulateOrigin(StorageMap storageMap, Binary binary)
{
if (storageMap.StorageType == "S3")
if (storageMap.StorageType == StorageTypes.S3)
{
// This "s3-ness" needs to be inside an abstracted impl
binary.Origin = $"s3://{storageMap.Root}/{storageMap.ObjectPath}/{storageMap.Hashes[binary.Digest!]}";
Expand Down
16 changes: 16 additions & 0 deletions LeedsExperiment/Preservation/IPreservation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,23 @@ namespace Preservation;

public interface IPreservation
{
// Getting things from Fedora
Task<Resource?> GetResource(string? path);
string GetInternalPath(Uri preservationApiUri);
Task<ArchivalGroup?> GetArchivalGroup(string path, string? version);


// Interacting with a staging area
Task<ExportResult> Export(string path, string? version);

/// <summary>
/// Get a diff that can then be executed
/// </summary>
/// <param name="path"></param>
/// <param name="source"></param>
/// <returns></returns>
Task<ImportJob> GetUpdateJob(string path, string source);

// Create or update the job obtained above - latter requires isUpdate explicitly
Task<ImportJob> Import(ImportJob importJob);
}
57 changes: 57 additions & 0 deletions LeedsExperiment/Preservation/ImportJob.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
using Fedora.Abstractions.Transfer;
using Fedora.Storage;

namespace Preservation;

public class ImportJob
{
public required string ArchivalGroupPath { get; set; }
// Must be an S3 URI, for now
public required string Source { get; set; }
public required string StorageType { get; set; }

public Uri? ArchivalGroupUri { get; set; }


public DateTime DiffStart { get; set; }
public DateTime DiffEnd { get; set; }

/// <summary>
/// What version at HEAD is this diff based on?
/// When it comes to execute the job, need to make sure it's the same
/// And then execute the update in a transaction.
/// (null if a new object, IsUpdate = false)
/// </summary>
public ObjectVersion? DiffVersion { get; set; }

public List<ContainerDirectory> ContainersToAdd { get; set; } = [];
public List<BinaryFile> FilesToAdd { get; set; } = [];
public List<BinaryFile> FilesToDelete { get; set; } = [];
public List<BinaryFile> FilesToPatch { get; set; } = [];
// FilesToRename?

/// <summary>
/// While any required new containers can be created as files are added (create along path),
/// we may end up with containers that have no files in them; these need to be deleted from Fedora.
/// </summary>
public List<ContainerDirectory> ContainersToDelete { get; set; } = [];


public List<ContainerDirectory> ContainersAdded { get; set; } = [];
public List<BinaryFile> FilesAdded { get; set; } = [];
public List<BinaryFile> FilesDeleted { get; set; } = [];
public List<BinaryFile> FilesPatched { get; set; } = [];
public List<ContainerDirectory> ContainersDeleted { get; set; } = [];

// Must be explicitly set to true to allow an update of an existing ArchivalGroup
public bool IsUpdate { get; set; }


public DateTime Start { get; set; }
public DateTime End { get; set; }


public ObjectVersion? NewVersion { get; set; }


}
2 changes: 1 addition & 1 deletion LeedsExperiment/Preservation/OcflS3StorageMapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public async Task<StorageMap> GetStorageMap(Uri archivalGroupUri, string? versio
ArchivalGroup = archivalGroupUri,
Version = objectVersion,
HeadVersion = headObjectVersion,
StorageType = "S3",
StorageType = StorageTypes.S3,
Root = fedoraAws.Bucket,
ObjectPath = agOrigin!,
AllVersions = inventoryVersions.ToArray(),
Expand Down

0 comments on commit 78a125d

Please sign in to comment.