From 87ae64faac87622c1da96bc311506dc9fa5b038b Mon Sep 17 00:00:00 2001 From: Simon Date: Fri, 20 Sep 2024 17:40:15 +0200 Subject: [PATCH] Strip given location prefix on unknown file (#5149) Co-authored-by: Simon Dumas --- .../ship/config/FileProcessingConfig.scala | 2 + .../ship/resources/DistributionPatcher.scala | 38 ++++++++++--------- .../nexus/ship/resources/SourcePatcher.scala | 9 ++++- .../ship/config/ShipConfigFixtures.scala | 1 + .../resources/DistributionPatcherSuite.scala | 25 +++++++++++- 5 files changed, 56 insertions(+), 19 deletions(-) diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/config/FileProcessingConfig.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/config/FileProcessingConfig.scala index 5043e5b4ce..392df12816 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/config/FileProcessingConfig.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/config/FileProcessingConfig.scala @@ -1,5 +1,6 @@ package ch.epfl.bluebrain.nexus.ship.config +import akka.http.scaladsl.model.Uri import akka.http.scaladsl.model.Uri.Path import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig import pureconfig.ConfigReader @@ -10,6 +11,7 @@ final case class FileProcessingConfig( importBucket: String, targetBucket: String, prefix: Option[Path], + locationPrefixToStripOpt: Option[Uri], skipFileEvents: Boolean, mediaTypeDetector: MediaTypeDetectorConfig ) diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resources/DistributionPatcher.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resources/DistributionPatcher.scala index b551ffc252..420cacebd3 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resources/DistributionPatcher.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resources/DistributionPatcher.scala @@ -12,10 +12,10 @@ import ch.epfl.bluebrain.nexus.delta.rdf.utils.UriUtils import ch.epfl.bluebrain.nexus.delta.sdk.model.{BaseUri, ResourceUris} import ch.epfl.bluebrain.nexus.delta.sourcing.model.ResourceRef.{Latest, Revision, Tag} import ch.epfl.bluebrain.nexus.delta.sourcing.model.{ProjectRef, ResourceRef} -import ch.epfl.bluebrain.nexus.ship.{IriPatcher, ProjectMapper} import ch.epfl.bluebrain.nexus.ship.resources.DistributionPatcher._ +import ch.epfl.bluebrain.nexus.ship.{IriPatcher, ProjectMapper} import io.circe.optics.JsonPath.root -import io.circe.syntax.{EncoderOps, KeyOps} +import io.circe.syntax.KeyOps import io.circe.{Encoder, Json, JsonObject} final class DistributionPatcher( @@ -23,6 +23,7 @@ final class DistributionPatcher( projectMapper: ProjectMapper, iriPatcher: IriPatcher, targetBase: BaseUri, + locationPrefixToStripOpt: Option[Uri], fetchFileAttributes: (ProjectRef, ResourceRef) => IO[FileAttributes] ) { @@ -60,13 +61,21 @@ final class DistributionPatcher( .andThen(setDigest(attributes.digest)) ) case Left(e) => - logger.error(e)(s"File '$patchedResourceRef' in project '$targetProject' could not be fetched") >> + logger.warn(e)(s"File '$patchedResourceRef' in project '$targetProject' could not be fetched") >> IO.pure(identity) } fileAttributeModifications.map(_.andThen(setContentUrl(newContentUrl.toString()))) } + private def stripLocationOnUnknownFile(json: Json): Json = { + locationPrefixToStripOpt.fold(json) { locationPrefixToStrip => + root.atLocation.location.string.modify { location => + location.replaceFirst(locationPrefixToStrip.toString, "file://") + }(json) + } + } + private def createContentUrl(project: ProjectRef, resourceRef: ResourceRef): Uri = { val withoutVersioning = ResourceUris("files", project, resourceRef.iri).accessUri(targetBase) resourceRef match { @@ -76,17 +85,12 @@ final class DistributionPatcher( } } - private[resources] def single(json: Json): IO[Json] = { - for { - ids <- extractIds(json) - fileBasedModifications <- ids match { - case Some((project, resource)) => modificationsForFile(project, resource) - case None => IO.pure((json: Json) => json) - } - } yield { - toS3Location.andThen(fileBasedModifications)(json) + private[resources] def single(json: Json): IO[Json] = extractIds(json) + .flatMap { + case Some((project, resource)) => modificationsForFile(project, resource).map(_(json)) + case None => IO.pure(stripLocationOnUnknownFile(json)) } - } + .map(toS3Location) private def setContentUrl(newContentUrl: String) = root.contentUrl.string.replace(newContentUrl) private def setLocation(newLocation: String) = (json: Json) => @@ -126,11 +130,11 @@ final class DistributionPatcher( IO.fromEither(UriUtils.uri(string).leftMap(new IllegalArgumentException(_))) implicit private val digestEncoder: Encoder.AsObject[Digest] = Encoder.encodeJsonObject.contramapObject { - case ComputedDigest(algorithm, value) => JsonObject("algorithm" -> algorithm.asJson, "value" -> value.asJson) + case ComputedDigest(algorithm, value) => JsonObject("algorithm" := algorithm, "value" := value) case MultiPartDigest(algorithm, value, numberOfParts) => - JsonObject("algorithm" -> algorithm.asJson, "value" -> value.asJson, "numberOfParts" -> numberOfParts.asJson) - case NotComputedDigest => JsonObject("value" -> "".asJson) - case NoDigest => JsonObject("value" -> "".asJson) + JsonObject("algorithm" := algorithm, "value" := value, "numberOfParts" := numberOfParts) + case NotComputedDigest => JsonObject("value" := "") + case NoDigest => JsonObject("value" := "") } } diff --git a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resources/SourcePatcher.scala b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resources/SourcePatcher.scala index cb039d4766..b90ea2f46a 100644 --- a/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resources/SourcePatcher.scala +++ b/ship/src/main/scala/ch/epfl/bluebrain/nexus/ship/resources/SourcePatcher.scala @@ -59,7 +59,14 @@ object SourcePatcher { }.map(_.attributes) val distributionPatcher = - new DistributionPatcher(fileSelfParser, projectMapper, iriPatcher, targetBase, fetchFileAttributes) + new DistributionPatcher( + fileSelfParser, + projectMapper, + iriPatcher, + targetBase, + config.files.locationPrefixToStripOpt, + fetchFileAttributes + ) new SourcePatcher(distributionPatcher, iriPatcher) } diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/config/ShipConfigFixtures.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/config/ShipConfigFixtures.scala index 93564fedd2..e07ec064fb 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/config/ShipConfigFixtures.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/config/ShipConfigFixtures.scala @@ -63,6 +63,7 @@ trait ShipConfigFixtures extends ConfigFixtures with StorageFixtures with Classp importBucket, targetBucket, Some(Uri.Path("/prefix")), + None, skipFileEvents = false, MediaTypeDetectorConfig( "nwb" -> MediaType.applicationBinary("nwb", NotCompressible) diff --git a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/resources/DistributionPatcherSuite.scala b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/resources/DistributionPatcherSuite.scala index 9290e85a58..28868bc983 100644 --- a/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/resources/DistributionPatcherSuite.scala +++ b/ship/src/test/scala/ch/epfl/bluebrain/nexus/ship/resources/DistributionPatcherSuite.scala @@ -89,7 +89,14 @@ class DistributionPatcherSuite extends NexusSuite { private val projectMapping = Map(projectWithMapping -> mappedProject) private val iriPatcher = IriPatcher(originalPrefix, targetPrefix, projectMapping) private val patcher = - new DistributionPatcher(fileSelf, ProjectMapper(projectMapping), iriPatcher, destinationBaseUri, fileResolver) + new DistributionPatcher( + fileSelf, + ProjectMapper(projectMapping), + iriPatcher, + destinationBaseUri, + Some(uri"file:///location_to_strip"), + fileResolver + ) test("Do nothing on a distribution payload without fields to patch") { val input = json"""{ "anotherField": "XXX" }""" @@ -272,6 +279,22 @@ class DistributionPatcherSuite extends NexusSuite { patcher.patchAll(input).assertEquals(expected) } + test("Patch and strip the distribution location when it matches the given prefix") { + val input = + json"""{ + "distribution": { + "atLocation": { + "location": "file:///location_to_strip/project/a/b/c/d/file.txt" + } + } + }""" + + patcher + .patchAll(input) + .map(distributionLocation) + .assertEquals("file:///project/a/b/c/d/file.txt") + } + private def distributionContentSize(json: Json): JsonObject = { json.hcursor .downField("distribution")