-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Integrate Iceberg Writer with StreamGraphBuilder (#103)
- Loading branch information
Showing
14 changed files
with
307 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
95 changes: 95 additions & 0 deletions
95
framework/arcane-framework/src/main/scala/services/streaming/IcebergConsumer.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.streaming | ||
|
||
import models.app.StreamContext | ||
import models.{ArcaneSchema, DataRow} | ||
import services.base.SchemaProvider | ||
import services.lakehouse.{CatalogWriter, given_Conversion_ArcaneSchema_Schema} | ||
import services.streaming.IcebergConsumer.getTableName | ||
import services.streaming.base.BatchConsumer | ||
|
||
import org.apache.iceberg.rest.RESTCatalog | ||
import org.apache.iceberg.{Schema, Table} | ||
import org.slf4j.{Logger, LoggerFactory} | ||
import zio.stream.{ZPipeline, ZSink} | ||
import zio.{Chunk, Task, ZIO, ZLayer} | ||
|
||
import java.time.format.DateTimeFormatter | ||
import java.time.{ZoneOffset, ZonedDateTime} | ||
|
||
/** | ||
* A consumer that writes the data to the staging table. | ||
* | ||
* @param streamContext The stream context. | ||
* @param catalogWriter The catalog writer. | ||
* @param schemaProvider The schema provider. | ||
*/ | ||
class IcebergConsumer(streamContext: StreamContext, | ||
catalogWriter: CatalogWriter[RESTCatalog, Table, Schema], | ||
schemaProvider: SchemaProvider[ArcaneSchema]) extends BatchConsumer[Chunk[DataRow]]: | ||
|
||
private val logger: Logger = LoggerFactory.getLogger(classOf[IcebergConsumer]) | ||
|
||
/** | ||
* Returns the sink that consumes the batch. | ||
* | ||
* @return ZSink (stream sink for the stream graph). | ||
*/ | ||
def consume: ZSink[Any, Throwable, Chunk[DataRow], Any, Unit] = | ||
writeStagingTable >>> logResults | ||
|
||
|
||
private def logResults: ZSink[Any, Throwable, Table, Nothing, Unit] = ZSink.foreach { e => | ||
logger.info(s"Received the table ${e.name()} from the streaming source") | ||
ZIO.unit | ||
} | ||
|
||
private def writeStagingTable: ZPipeline[Any, Throwable, Chunk[DataRow], Table] = ZPipeline[Chunk[DataRow]]() | ||
.mapAccum(0L) { (acc, chunk) => (acc + 1, (chunk, acc.getTableName(streamContext.streamId))) } | ||
.mapZIO({ | ||
case (rows, tableName) => writeWithWriter(rows, tableName) | ||
}) | ||
|
||
|
||
private def writeWithWriter(rows: Chunk[DataRow], name: String): Task[Table] = | ||
for | ||
schema <- ZIO.fromFuture(implicit ec => schemaProvider.getSchema) | ||
table <- ZIO.fromFuture(implicit ec => catalogWriter.write(rows, name, schema)) | ||
yield table | ||
|
||
object IcebergConsumer: | ||
val formatter: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss") | ||
|
||
extension (batchNumber: Long) def getTableName(streamId: String): String = | ||
s"$streamId-${ZonedDateTime.now(ZoneOffset.UTC).format(formatter)}-$batchNumber" | ||
|
||
|
||
/** | ||
* Factory method to create IcebergConsumer | ||
* | ||
* @param streamContext The stream context. | ||
* @param catalogWriter The catalog writer. | ||
* @param schemaProvider The schema provider. | ||
* @return The initialized IcebergConsumer instance | ||
*/ | ||
def apply(streamContext: StreamContext, | ||
catalogWriter: CatalogWriter[RESTCatalog, Table, Schema], | ||
schemaProvider: SchemaProvider[ArcaneSchema]): IcebergConsumer = | ||
new IcebergConsumer(streamContext, catalogWriter, schemaProvider) | ||
|
||
/** | ||
* The required environment for the IcebergConsumer. | ||
*/ | ||
type Environment = SchemaProvider[ArcaneSchema] & CatalogWriter[RESTCatalog, Table, Schema] & StreamContext | ||
|
||
/** | ||
* The ZLayer that creates the IcebergConsumer. | ||
*/ | ||
val layer: ZLayer[Environment, Nothing, IcebergConsumer] = | ||
ZLayer { | ||
for | ||
streamContext <- ZIO.service[StreamContext] | ||
catalogWriter <- ZIO.service[CatalogWriter[RESTCatalog, Table, Schema]] | ||
schemaProvider <- ZIO.service[SchemaProvider[ArcaneSchema]] | ||
yield IcebergConsumer(streamContext, catalogWriter, schemaProvider) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17 changes: 17 additions & 0 deletions
17
framework/arcane-framework/src/main/scala/services/streaming/base/BatchConsumer.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package com.sneaksanddata.arcane.framework | ||
package services.streaming.base | ||
|
||
import zio.stream.ZSink | ||
|
||
/** | ||
* A trait that represents a grouped data batch consumer. | ||
* @tparam ConsumableBatch The type of the consumable batch. | ||
*/ | ||
trait BatchConsumer[ConsumableBatch]: | ||
|
||
/** | ||
* Returns the sink that consumes the batch. | ||
* | ||
* @return ZSink (stream sink for the stream graph). | ||
*/ | ||
def consume: ZSink[Any, Throwable, ConsumableBatch, Any, Unit] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.