-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
29023d6
commit c3fd952
Showing
3 changed files
with
64 additions
and
2 deletions.
There are no files selected for viewing
55 changes: 55 additions & 0 deletions
55
framework/arcane-framework/src/main/scala/services/cdm/CdmTable.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.cdm | ||
|
||
import models.cdm.CSVParser.isComplete | ||
import models.cdm.{SimpleCdmEntity, given} | ||
import models.{ArcaneSchema, DataRow} | ||
import services.storage.models.azure.{AdlsStoragePath, AzureBlobStorageReader} | ||
|
||
import java.time.{OffsetDateTime, ZoneOffset} | ||
import scala.concurrent.Future | ||
|
||
class CdmTable(name: String, storagePath: AdlsStoragePath, entityModel: SimpleCdmEntity, reader: AzureBlobStorageReader): | ||
implicit val ec: scala.concurrent.ExecutionContext = scala.concurrent.ExecutionContext.global | ||
private val defaultFromYears: Int = 5 | ||
private val schema: ArcaneSchema = implicitly(entityModel) | ||
|
||
private def getListPrefixes(fromYears: Option[Int]): IndexedSeq[String] = | ||
val currentMoment = OffsetDateTime.now(ZoneOffset.UTC) | ||
val fromMoment = currentMoment.minusYears(fromYears.getOrElse(defaultFromYears)) | ||
Range.inclusive( | ||
fromMoment.getYear, | ||
currentMoment.getYear | ||
).flatMap(year => Range.inclusive( | ||
1, | ||
12 | ||
).map{ m => | ||
val mon = s"00$m".takeRight(2) | ||
s"$year-$mon-" | ||
}) | ||
|
||
/** | ||
* Read a table snapshot, taking optional start time. | ||
* @param fromYears Folders from Synapse export to include in the snapshot. If not provided, ALL folders will be included | ||
* @return A stream of rows for this table | ||
*/ | ||
def snapshot(fromYears: Option[Int]): Future[LazyList[DataRow]] = | ||
// list all matching blobs | ||
Future.sequence(getListPrefixes(fromYears) | ||
.flatMap(prefix => reader.listBlobs(storagePath + prefix)) | ||
.map(blob => reader.getBlobContent(storagePath + blob.name, _.map(_.toChar).mkString))) | ||
.map(_.flatMap(content => content.split('\n').foldLeft((Seq.empty[String], "")) { (agg, value) => | ||
if isComplete(agg._2) then | ||
(agg._1 :+ agg._2, "") | ||
else | ||
(agg._1, agg._2 + value) | ||
}._1.map(implicitly[DataRow](_, schema)))) | ||
.map(LazyList.from) | ||
|
||
object CdmTable: | ||
def apply(settings: CdmTableSettings, entityModel: SimpleCdmEntity, reader: AzureBlobStorageReader): CdmTable = new CdmTable( | ||
name = settings.name, | ||
storagePath = AdlsStoragePath(settings.rootPath).get, | ||
entityModel = entityModel, | ||
reader = reader | ||
) |
4 changes: 4 additions & 0 deletions
4
framework/arcane-framework/src/main/scala/services/cdm/CdmTableSettings.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.cdm | ||
|
||
case class CdmTableSettings(name: String, rootPath: String) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters