Skip to content

Commit

Permalink
Patch content type during import (#5090)
Browse files Browse the repository at this point in the history
Co-authored-by: Simon Dumas <simon.dumas@epfl.ch>
  • Loading branch information
imsdu and Simon Dumas authored Aug 9, 2024
1 parent db25b93 commit cc7af98
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 15 deletions.
6 changes: 6 additions & 0 deletions ship/src/main/resources/ship-default.conf
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ ship {
target-bucket = "nexus-delta-production"
# To skip file events to make the batch run faster and focus on other events
skip-file-events = false
# Allows to define default media types for the given file extensions
media-type-detector {
extensions {
#extension = "application/custom"
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package ch.epfl.bluebrain.nexus.ship.config

import akka.http.scaladsl.model.Uri.Path
import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig
import pureconfig.ConfigReader
import ch.epfl.bluebrain.nexus.delta.sdk.instances._
import pureconfig.generic.semiauto.deriveReader
Expand All @@ -9,7 +10,8 @@ final case class FileProcessingConfig(
importBucket: String,
targetBucket: String,
prefix: Option[Path],
skipFileEvents: Boolean
skipFileEvents: Boolean,
mediaTypeDetector: MediaTypeDetectorConfig
)

object FileProcessingConfig {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package ch.epfl.bluebrain.nexus.ship.files

import akka.http.scaladsl.model.{ContentType, HttpCharsets}
import cats.effect.IO
import ch.epfl.bluebrain.nexus.delta.kernel.Logger
import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig
import ch.epfl.bluebrain.nexus.delta.kernel.utils.FileUtils
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.Files
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.Files.definition
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileCommand.CancelEvent
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileEvent._
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileRejection.{FileNotFound, IncorrectRev, ResourceAlreadyExists}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.{FileAttributes, FileCustomMetadata, FileEvent, FileId, FileLinkRequest}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model._
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.s3.client.S3StorageClient
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.{FetchStorage, StorageResource}
import ch.epfl.bluebrain.nexus.delta.rdf.jsonld.api.JsonLdApi
Expand All @@ -22,7 +25,7 @@ import ch.epfl.bluebrain.nexus.ship._
import ch.epfl.bluebrain.nexus.ship.acls.AclWiring.alwaysAuthorize
import ch.epfl.bluebrain.nexus.ship.config.InputConfig
import ch.epfl.bluebrain.nexus.ship.files.FileCopier.CopyResult.{CopySkipped, CopySuccess}
import ch.epfl.bluebrain.nexus.ship.files.FileProcessor.logger
import ch.epfl.bluebrain.nexus.ship.files.FileProcessor.{logger, patchMediaType}
import ch.epfl.bluebrain.nexus.ship.files.FileWiring._
import ch.epfl.bluebrain.nexus.ship.storages.StorageWiring
import io.circe.Decoder
Expand All @@ -32,7 +35,8 @@ class FileProcessor private (
projectMapper: ProjectMapper,
fileCopier: FileCopier,
clock: EventClock
) extends EventProcessor[FileEvent] {
)(implicit mediaTypeDetector: MediaTypeDetectorConfig)
extends EventProcessor[FileEvent] {

override def resourceType: EntityType = Files.entityType

Expand Down Expand Up @@ -67,7 +71,8 @@ class FileProcessor private (
val customMetadata = Some(getCustomMetadata(attrs))
fileCopier.copyFile(e.project, attrs).flatMap {
case CopySuccess(newPath) =>
val linkRequest = FileLinkRequest(newPath, attrs.mediaType, customMetadata)
val newMediaType = patchMediaType(attrs.filename, attrs.mediaType)
val linkRequest = FileLinkRequest(newPath, newMediaType, customMetadata)
files
.linkFile(Some(event.id), project, None, linkRequest, e.tag)
.as(ImportStatus.Success)
Expand All @@ -78,7 +83,8 @@ class FileProcessor private (
val customMetadata = Some(getCustomMetadata(attrs))
fileCopier.copyFile(e.project, attrs).flatMap {
case CopySuccess(newPath) =>
val linkRequest = FileLinkRequest(newPath, attrs.mediaType, customMetadata)
val newMediaType = patchMediaType(attrs.filename, attrs.mediaType)
val linkRequest = FileLinkRequest(newPath, newMediaType, customMetadata)
files
.updateLinkedFile(fileId, None, cRev, linkRequest, e.tag)
.as(ImportStatus.Success)
Expand Down Expand Up @@ -117,6 +123,16 @@ object FileProcessor {

private val logger = Logger[FileProcessor]

def patchMediaType(
filename: String,
original: Option[ContentType]
)(implicit mediaTypeDetector: MediaTypeDetectorConfig): Option[ContentType] =
FileUtils
.extension(filename)
.flatMap(mediaTypeDetector.find)
.map(ContentType(_, () => HttpCharsets.`UTF-8`))
.orElse(original)

private val noop = new EventProcessor[FileEvent] {
override def resourceType: EntityType = Files.entityType

Expand Down Expand Up @@ -157,7 +173,7 @@ object FileProcessor {
linkOperationOnly(s3Client)
)(FailingUUID)

new FileProcessor(files, projectMapper, fileCopier, clock)
new FileProcessor(files, projectMapper, fileCopier, clock)(config.files.mediaTypeDetector)
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package ch.epfl.bluebrain.nexus.ship.resources

import akka.http.scaladsl.model.Uri
import akka.http.scaladsl.model.{ContentType, Uri}
import cats.effect.IO
import cats.syntax.all._
import ch.epfl.bluebrain.nexus.delta.kernel.Logger
Expand Down Expand Up @@ -56,6 +56,7 @@ final class DistributionPatcher(
IO.pure(
setLocation(attributes.location.toString())
.andThen(setContentSize(attributes.bytes))
.andThen(setEncodingFormat(attributes.mediaType))
.andThen(setDigest(attributes.digest))
)
case Left(e) =>
Expand Down Expand Up @@ -87,12 +88,17 @@ final class DistributionPatcher(
}
}

private def setContentUrl(newContentUrl: String) = root.contentUrl.string.replace(newContentUrl)
private def setLocation(newLocation: String) = (json: Json) =>
private def setContentUrl(newContentUrl: String) = root.contentUrl.string.replace(newContentUrl)
private def setLocation(newLocation: String) = (json: Json) =>
json.deepMerge(Json.obj("atLocation" := Json.obj("location" := newLocation)))
private def setContentSize(newSize: Long) = (json: Json) =>
private def setContentSize(newSize: Long) = (json: Json) =>
json.deepMerge(Json.obj("contentSize" := Json.obj("unitCode" := "bytes", "value" := newSize)))
private def setDigest(digest: Digest) = (json: Json) => json.deepMerge(Json.obj("digest" := digest))
private def setEncodingFormat(contentType: Option[ContentType]) = (json: Json) =>
contentType.fold(json) { ct =>
json.deepMerge(Json.obj("encodingFormat" := ct.mediaType.value))
}

private def setDigest(digest: Digest) = (json: Json) => json.deepMerge(Json.obj("digest" := digest))

private def toS3Location: Json => Json = root.atLocation.store.json.replace(targetStorage)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ch.epfl.bluebrain.nexus.ship.config

import akka.http.scaladsl.model.Uri
import ch.epfl.bluebrain.nexus.delta.kernel.Secret
import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.StoragesConfig.S3StorageConfig
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.{permissions, StorageFixtures, StoragesConfig}
Expand Down Expand Up @@ -61,7 +62,8 @@ trait ShipConfigFixtures extends ConfigFixtures with StorageFixtures with Classp
importBucket,
targetBucket,
Some(Uri.Path("/prefix")),
skipFileEvents = false
skipFileEvents = false,
MediaTypeDetectorConfig()
),
IriPatcherConfig(enabled = false, iri"https://bbp.epfl.ch/", iri"https:/openbrainplatform.com/"),
Set.empty
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package ch.epfl.bluebrain.nexus.ship.files

import akka.http.scaladsl.model.{ContentTypes, MediaTypes}
import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig
import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite

class FileProcessorSuite extends NexusSuite {

implicit private val mediaTypeDetector: MediaTypeDetectorConfig = MediaTypeDetectorConfig(
"json" -> MediaTypes.`application/json`,
"pdf" -> MediaTypes.`application/pdf`
)

test("Return a new content type matching the config") {
assertEquals(
FileProcessor.patchMediaType("file.json", None),
Some(ContentTypes.`application/json`)
)
}

test("Return the original content type") {
assertEquals(
FileProcessor.patchMediaType("file.", Some(ContentTypes.`text/csv(UTF-8)`)),
Some(ContentTypes.`text/csv(UTF-8)`)
)
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package ch.epfl.bluebrain.nexus.ship.resources

import akka.http.scaladsl.model.Uri
import akka.http.scaladsl.model.{ContentTypes, Uri}
import cats.effect.IO
import ch.epfl.bluebrain.nexus.delta.plugins.storage.FileSelf
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileAttributes.FileAttributesOrigin
Expand Down Expand Up @@ -72,7 +72,7 @@ class DistributionPatcherSuite extends NexusSuite {
location,
path,
"file.txt",
None,
Some(ContentTypes.`text/plain(UTF-8)`),
Map.empty,
None,
None,
Expand Down Expand Up @@ -197,6 +197,21 @@ class DistributionPatcherSuite extends NexusSuite {
.assertEquals("/actual/path/file.txt")
}

test("Patch a encoding format based on what the file says") {
val input =
json"""{
"distribution": {
"contentUrl": "${sourceFileSelf(projectWithMapping, resource1)}",
"encodingFormat": "text/csv"
}
}"""

patcher
.patchAll(input)
.map(distributionEncodingFormat)
.assertEquals("text/plain")
}

test("Patch a file location based on what the resource says when no existing location present") {
val input =
json"""{
Expand Down Expand Up @@ -290,4 +305,12 @@ class DistributionPatcherSuite extends NexusSuite {
.getOrElse(fail("contentUrl was not present"))
}

private def distributionEncodingFormat(json: Json): String = {
json.hcursor
.downField("distribution")
.downField("encodingFormat")
.as[String]
.getOrElse(fail("encodingFormat was not present"))
}

}

0 comments on commit cc7af98

Please sign in to comment.