-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
CDM (Synapse Link for Dynamics) plugin (#118)
- Loading branch information
1 parent
29023d6
commit 3a72b49
Showing
13 changed files
with
1,007 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
framework/arcane-framework/src/main/scala/services/cdm/CdmTable.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.cdm | ||
|
||
import models.cdm.CSVParser.replaceQuotedNewlines | ||
import models.cdm.{SimpleCdmEntity, given} | ||
import models.{ArcaneSchema, DataRow} | ||
import services.storage.models.azure.{AdlsStoragePath, AzureBlobStorageReader} | ||
|
||
import java.time.{OffsetDateTime, ZoneOffset} | ||
import scala.concurrent.Future | ||
|
||
class CdmTable(name: String, storagePath: AdlsStoragePath, entityModel: SimpleCdmEntity, reader: AzureBlobStorageReader): | ||
implicit val ec: scala.concurrent.ExecutionContext = scala.concurrent.ExecutionContext.global | ||
private val defaultFromYears: Int = 5 | ||
private val schema: ArcaneSchema = implicitly(entityModel) | ||
|
||
/** | ||
* Read top-level virtual directories to allow pre-filtering blobs | ||
* @param startDate Baseline date to start search from | ||
* @return A list of yyyy-MM-ddTHH prefixes to apply as filters | ||
*/ | ||
private def getListPrefixes(startDate: Option[OffsetDateTime], endDate: Option[OffsetDateTime] = None): IndexedSeq[String] = | ||
val currentMoment = endDate.getOrElse(OffsetDateTime.now(ZoneOffset.UTC)) | ||
val startMoment = startDate.getOrElse(currentMoment.minusYears(defaultFromYears)) | ||
Iterator.iterate(startMoment)(_.plusHours(1)) | ||
.takeWhile(_.toEpochSecond < currentMoment.toEpochSecond) | ||
.map { moment => | ||
val monthString = s"00${moment.getMonth.getValue}".takeRight(2) | ||
val dayString = s"00${moment.getDayOfMonth}".takeRight(2) | ||
val hourString = s"00${moment.getHour}".takeRight(2) | ||
s"${moment.getYear}-$monthString-${dayString}T$hourString" | ||
}.toIndexedSeq | ||
|
||
/** | ||
* Read a table snapshot, taking optional start time. Lowest precision available is 1 hour | ||
* @param startDate Folders from Synapse export to include in the snapshot, based on the start date provided. If not provided, ALL folders from now - defaultFromYears will be included | ||
* @param endDate Date to stop at when looking for prefixes. In production use None for this value to always look data up to current moment. | ||
* @return A stream of rows for this table | ||
*/ | ||
def snapshot(startDate: Option[OffsetDateTime] = None, endDate: Option[OffsetDateTime] = None): Future[LazyList[DataRow]] = | ||
// list all matching blobs | ||
Future.sequence(getListPrefixes(startDate, endDate) | ||
.flatMap(prefix => reader.listPrefixes(storagePath + prefix)) | ||
.flatMap(prefix => reader.listBlobs(storagePath + prefix.name + name)) | ||
// exclude any files other than CSV | ||
.collect { | ||
case blob if blob.name.endsWith(".csv") => reader.getBlobContent(storagePath + blob.name) | ||
}) | ||
.map(_.flatMap(content => replaceQuotedNewlines(content).split('\n').map(implicitly[DataRow](_, schema)))) | ||
.map(LazyList.from) | ||
|
||
object CdmTable: | ||
def apply(settings: CdmTableSettings, entityModel: SimpleCdmEntity, reader: AzureBlobStorageReader): CdmTable = new CdmTable( | ||
name = settings.name, | ||
storagePath = AdlsStoragePath(settings.rootPath).get, | ||
entityModel = entityModel, | ||
reader = reader | ||
) | ||
|
9 changes: 9 additions & 0 deletions
9
framework/arcane-framework/src/main/scala/services/cdm/CdmTableSettings.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.cdm | ||
|
||
/** | ||
* Settings for a CdmTable object | ||
* @param name Name of the table | ||
* @param rootPath HDFS-style path that includes table blob prefix, for example abfss://container@account.dfs.core.windows.net/path/to/table | ||
*/ | ||
case class CdmTableSettings(name: String, rootPath: String) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20 changes: 20 additions & 0 deletions
20
...amework/src/main/scala/services/storage/models/azure/AzureBlobStorageReaderSettings.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.storage.models.azure | ||
|
||
import java.time.Duration | ||
|
||
case class AzureBlobStorageReaderSettings(httpMaxRetries: Int, httpRetryTimeout: Duration, httpMinRetryDelay: Duration, httpMaxRetryDelay: Duration, maxResultsPerPage: Int) | ||
|
||
object AzureBlobStorageReaderSettings: | ||
def apply( | ||
httpMaxRetries: Int = 3, | ||
httpRetryTimeout: Duration = Duration.ofSeconds(60), | ||
httpMinRetryDelay: Duration = Duration.ofMillis(500), | ||
httpMaxRetryDelay: Duration = Duration.ofSeconds(3), | ||
maxResultsPerPage: Int = 5000): AzureBlobStorageReaderSettings = new AzureBlobStorageReaderSettings( | ||
httpMaxRetries = httpMaxRetries, | ||
httpRetryTimeout = httpRetryTimeout, | ||
httpMinRetryDelay = httpMinRetryDelay, | ||
httpMaxRetryDelay = httpMaxRetryDelay, | ||
maxResultsPerPage = maxResultsPerPage | ||
) |
Oops, something went wrong.