From 9852de7984e5063008045d43f66d675d1a73f6b8 Mon Sep 17 00:00:00 2001 From: tomcrane Date: Mon, 11 Mar 2024 11:38:29 +0000 Subject: [PATCH] Use sha256 and new Fedora bucket --- .../Preservation.API/Controllers/ImportController.cs | 6 ++++-- .../Preservation.API/appsettings.Production.json | 2 +- LeedsExperiment/Preservation/FedoraWrapper.cs | 6 +++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/LeedsExperiment/Preservation.API/Controllers/ImportController.cs b/LeedsExperiment/Preservation.API/Controllers/ImportController.cs index 6a39ad6..4fbb2fb 100644 --- a/LeedsExperiment/Preservation.API/Controllers/ImportController.cs +++ b/LeedsExperiment/Preservation.API/Controllers/ImportController.cs @@ -363,8 +363,10 @@ private async Task GetImportSource(string source, Uri intendedPare // Unless coming from other information, we *require* that S3 source folders have sha256 hashes in their metadata // so we don't have to do this: + + // TODAY var s3Stream = await s3Client!.GetObjectStreamAsync(obj.BucketName, obj.Key, null); - var sha512Digest = Checksum.Sha512FromStream(s3Stream); + var sha256Digest = Checksum.Sha256FromStream(s3Stream); // (and all our Fedora objects have sha-256) // We can also do an eTag comparison for smaller files // We can also do a size comparison as a sanity check - this can't catch all changes obvs @@ -384,7 +386,7 @@ private async Task GetImportSource(string source, Uri intendedPare Path = sourcePath, StorageType = StorageTypes.S3, ExternalLocation = $"s3://{obj.BucketName}/{obj.Key}", - Digest = sha512Digest, + Digest = sha256Digest, ContentType = GetDefaultContentType(nameAndParentPath.Name) // we may overwrite this later, e.g., from PREMIS data }); } diff --git a/LeedsExperiment/Preservation.API/appsettings.Production.json b/LeedsExperiment/Preservation.API/appsettings.Production.json index ddc856f..63fb4bf 100644 --- a/LeedsExperiment/Preservation.API/appsettings.Production.json +++ b/LeedsExperiment/Preservation.API/appsettings.Production.json @@ -10,7 +10,7 @@ "Region": "eu-west-1" }, "Fedora-AWS-S3": { - "bucket": "uol-expts-fedora-01" + "bucket": "uol-expts-fedora-650" }, "AllowedHosts": "*" } \ No newline at end of file diff --git a/LeedsExperiment/Preservation/FedoraWrapper.cs b/LeedsExperiment/Preservation/FedoraWrapper.cs index 16a79c3..cabe1c1 100644 --- a/LeedsExperiment/Preservation/FedoraWrapper.cs +++ b/LeedsExperiment/Preservation/FedoraWrapper.cs @@ -218,7 +218,7 @@ private async void EnsureChecksum(BinaryFile binaryFile) { case StorageTypes.FileSystem: var fi = new FileInfo(binaryFile.ExternalLocation); - expected = Checksum.Sha512FromFile(fi); + expected = Checksum.Sha256FromFile(fi); break; case StorageTypes.S3: // TODO - get the SHA256 algorithm from AWS directly rather than compute it here @@ -233,7 +233,7 @@ private async void EnsureChecksum(BinaryFile binaryFile) // This would be an efficient way of doing this - but with this naive implementation // we're going to read the object twice var s3Stream = await s3Client!.GetObjectStreamAsync(s3Uri.Bucket, s3Uri.Key, null); - expected = Checksum.Sha512FromStream(s3Stream); + expected = Checksum.Sha256FromStream(s3Stream); // could get a byte array here and then pass it along eventually to MakeBinaryPutOrPost // for now just read it twice. // Later we'll get the sha256 checksum from metadata @@ -321,7 +321,7 @@ private async Task MakeBinaryPutOrPost(HttpMethod httpMethod { var req = MakeHttpRequestMessage(location, httpMethod) .InTransaction(transaction) - .WithDigest(binaryFile.Digest, "sha-512"); // move algorithm choice to config + .WithDigest(binaryFile.Digest, "sha-256"); // move algorithm choice to constant if (httpMethod == HttpMethod.Post) { req.WithSlug(binaryFile.Slug);