-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e570981
commit 29023d6
Showing
7 changed files
with
175 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
...ework/arcane-framework/src/main/scala/services/storage/models/azure/AdlsStoragePath.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.storage.models.azure | ||
|
||
import services.storage.models.base.BlobPath | ||
|
||
import scala.annotation.targetName | ||
import scala.util.{Failure, Success, Try} | ||
import scala.util.matching.Regex | ||
|
||
final case class AdlsStoragePath(accountName: String, container: String, blobPrefix: String) extends BlobPath: | ||
def toHdfsPath: String = s"abfss://$container@$accountName.dfs.core.windows.net/$blobPrefix" | ||
|
||
/** | ||
* Joins the given key name to the current path. | ||
* | ||
* @param part Blob prefix part to join | ||
* @return The new path. | ||
*/ | ||
@targetName("plus") | ||
def +(part: String): AdlsStoragePath = copy(blobPrefix = if (blobPrefix.isEmpty) part else s"$blobPrefix/$part") | ||
|
||
object AdlsStoragePath: | ||
private val matchRegex: String = "^abfss:\\/\\/([^@]+)@([^\\.]+)\\.dfs\\.core\\.windows\\.net\\/(.*)$" | ||
|
||
def apply(hdfsPath: String): Try[AdlsStoragePath] = matchRegex.r.findFirstMatchIn(hdfsPath) match { | ||
case Some(matched) => Success(new AdlsStoragePath(matched.group(2), matched.group(1), matched.group(3).stripSuffix("/"))) | ||
case None => Failure(IllegalArgumentException(s"An AdlsStoragePath must be in the format abfss://container@account.dfs.core.windows.net/path/to/file, but was: $hdfsPath")) | ||
} | ||
|
64 changes: 64 additions & 0 deletions
64
...rcane-framework/src/main/scala/services/storage/models/azure/AzureBlobStorageReader.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.storage.models.azure | ||
|
||
import services.storage.base.BlobStorageReader | ||
|
||
import com.azure.identity.{DefaultAzureCredential, DefaultAzureCredentialBuilder} | ||
import com.azure.storage.blob.{BlobAsyncClient, BlobClient, BlobContainerAsyncClient, BlobContainerClient, BlobServiceClientBuilder} | ||
import services.storage.models.base.StoredBlob | ||
import services.storage.models.azure.AzureModelConversions.given | ||
|
||
import scala.jdk.CollectionConverters.* | ||
import scala.language.implicitConversions | ||
import com.azure.storage.blob.models.ListBlobsOptions | ||
import com.azure.storage.common.policy.{RequestRetryOptions, RetryPolicyType} | ||
|
||
import java.time.Duration | ||
import scala.annotation.tailrec | ||
import scala.concurrent.Future | ||
|
||
final class AzureBlobStorageReader extends BlobStorageReader[AdlsStoragePath]: | ||
private val httpMaxRetries = 3 | ||
private val httpRetryTimeout = Duration.ofSeconds(60) | ||
private val httpMinRetryDelay = Duration.ofMillis(500) | ||
private val httpMaxRetryDelay = Duration.ofSeconds(3) | ||
|
||
private lazy val defaultCredential = new DefaultAzureCredentialBuilder().build() | ||
private lazy val serviceClient = new BlobServiceClientBuilder() | ||
.credential(defaultCredential) | ||
.retryOptions(RequestRetryOptions(RetryPolicyType.EXPONENTIAL, httpMaxRetries, httpRetryTimeout.toSeconds.toInt, httpMinRetryDelay.toMillis, httpMaxRetryDelay.toMillis, null)) | ||
.buildClient() | ||
private val defaultTimeout = Duration.ofSeconds(30) | ||
implicit val ec: scala.concurrent.ExecutionContext = scala.concurrent.ExecutionContext.global | ||
|
||
private def getBlobClient(blobPath: AdlsStoragePath): BlobClient = | ||
getBlobContainerClient(blobPath).getBlobClient(blobPath.blobPrefix) | ||
|
||
private def getBlobContainerClient(blobPath: AdlsStoragePath): BlobContainerClient = | ||
serviceClient.getBlobContainerClient(blobPath.container) | ||
|
||
def getBlobContent[Result](blobPath: AdlsStoragePath, deserializer: Array[Byte] => Result): Future[Result] = | ||
val client = getBlobClient(blobPath) | ||
Future(deserializer(client.downloadContent().toBytes)) | ||
|
||
def listBlobs(blobPath: AdlsStoragePath): LazyList[StoredBlob] = | ||
val client = getBlobContainerClient(blobPath) | ||
val listOptions = new ListBlobsOptions().setPrefix(blobPath.blobPrefix) | ||
|
||
@tailrec | ||
def getPage(pageToken: Option[String], result: Iterable[StoredBlob]): Iterable[StoredBlob] = | ||
val page = client.listBlobs(listOptions, pageToken.orNull, defaultTimeout) | ||
.iterableByPage() | ||
.iterator() | ||
.next() | ||
|
||
val pageData = page.getValue.asScala.map(implicitly) | ||
|
||
if page.getContinuationToken.isEmpty then | ||
result ++ pageData | ||
else | ||
getPage(Some(page.getContinuationToken), result ++ pageData) | ||
|
||
LazyList.from(getPage(None, List())) | ||
|
||
|
20 changes: 20 additions & 0 deletions
20
...arcane-framework/src/main/scala/services/storage/models/azure/AzureModelConversions.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.storage.models.azure | ||
|
||
import com.azure.storage.blob.models.BlobItem | ||
import services.storage.models.base.StoredBlob | ||
|
||
import scala.jdk.CollectionConverters.* | ||
|
||
object AzureModelConversions: | ||
given Conversion[BlobItem, StoredBlob] with | ||
override def apply(blobItem: BlobItem): StoredBlob = StoredBlob( | ||
name = blobItem.getName, | ||
createdOn = blobItem.getProperties.getCreationTime.toEpochSecond, | ||
metadata = blobItem.getMetadata.asScala.toMap, | ||
contentHash = Option(blobItem.getProperties.getContentMd5.map(_.toChar).mkString), | ||
contentEncoding = Option(blobItem.getProperties.getContentEncoding), | ||
contentType = Option(blobItem.getProperties.getContentType), | ||
contentLength = Option(blobItem.getProperties.getContentLength), | ||
lastModified = Option(blobItem.getProperties.getLastModified.toEpochSecond) | ||
) |
46 changes: 46 additions & 0 deletions
46
framework/arcane-framework/src/main/scala/services/storage/models/base/StoredBlob.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.storage.models.base | ||
|
||
/** | ||
* Blob object metadata. | ||
*/ | ||
case class StoredBlob( | ||
/** | ||
* Additional metadata attached to this object. | ||
*/ | ||
metadata: Map[String, String] = Map(), | ||
|
||
/** | ||
* Content hashsum. | ||
*/ | ||
contentHash: Option[String] = None, | ||
|
||
/** | ||
* Content encoding, for example utf-8. | ||
*/ | ||
contentEncoding: Option[String] = None, | ||
|
||
/** | ||
* Content type, for example text/plain. | ||
*/ | ||
contentType: Option[String] = None, | ||
|
||
/** | ||
* Content length in bytes. | ||
*/ | ||
contentLength: Option[Long] = None, | ||
|
||
/** | ||
* Blob filename. May contain full path, depending on the actual storage. | ||
*/ | ||
name: String, | ||
|
||
/** | ||
* Last modified timestamp. | ||
*/ | ||
lastModified: Option[Long] = None, | ||
|
||
/** | ||
* Created on timestamp. | ||
*/ | ||
createdOn: Long) |