-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Partition export file to have a maximum number of events (#5066)
* Partition export file to have a maximum number of events * Revert change on temp directory fixture --------- Co-authored-by: Simon Dumas <simon.dumas@epfl.ch>
- Loading branch information
Showing
9 changed files
with
180 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
...l/src/main/scala/ch/epfl/bluebrain/nexus/delta/sourcing/stream/utils/StreamingUtils.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package ch.epfl.bluebrain.nexus.delta.sourcing.stream.utils | ||
|
||
import cats.effect.{IO, Resource} | ||
import cats.effect.std.Hotswap | ||
import fs2.io.file.{FileHandle, Files, Flag, Flags, Path, WriteCursor} | ||
import fs2.{text, Pipe, Pull, Stream} | ||
|
||
object StreamingUtils { | ||
|
||
private val flags = Flags.Write | ||
|
||
private val lineSeparator = "\n" | ||
|
||
private val newLine = Stream.emit(lineSeparator) | ||
|
||
def readLines(path: Path) = | ||
Files[IO].readUtf8Lines(path).filter(_.nonEmpty) | ||
|
||
/** | ||
* Writes all data to a sequence of files, each limited to a maximum number of lines | ||
* | ||
* Adapted from fs2.io.file.Files.writeRotate (which is not preserving lines) | ||
* | ||
* @param computePath | ||
* to compute the path of the first file and the subsequent ones | ||
* @param limit | ||
* maximum number of lines | ||
*/ | ||
def writeRotate(computePath: IO[Path], limit: Int): Pipe[IO, String, Nothing] = { | ||
def openNewFile: Resource[IO, FileHandle[IO]] = | ||
Resource | ||
.eval(computePath) | ||
.flatMap(p => Files[IO].open(p, flags.addIfAbsent(Flag.Write))) | ||
|
||
def newCursor(file: FileHandle[IO]): IO[WriteCursor[IO]] = | ||
Files[IO].writeCursorFromFileHandle(file, flags.contains(Flag.Append)) | ||
|
||
def go( | ||
fileHotswap: Hotswap[IO, FileHandle[IO]], | ||
cursor: WriteCursor[IO], | ||
acc: Int, | ||
s: Stream[IO, String] | ||
): Pull[IO, Unit, Unit] = { | ||
s.pull.unconsLimit(limit - acc).flatMap { | ||
case Some((hd, tl)) => | ||
val newAcc = acc + hd.size | ||
val hdAsBytes = | ||
Stream.chunk(hd).intersperse(lineSeparator).append(newLine).through(text.utf8.encode) | ||
cursor.writeAll(hdAsBytes).flatMap { nc => | ||
if (newAcc >= limit) | ||
Pull | ||
.eval { | ||
fileHotswap | ||
.swap(openNewFile) | ||
.flatMap(newCursor) | ||
} | ||
.flatMap(nc => go(fileHotswap, nc, 0, tl)) | ||
else | ||
go(fileHotswap, nc, newAcc, tl) | ||
} | ||
case None => Pull.done | ||
} | ||
} | ||
|
||
in => | ||
Stream | ||
.resource(Hotswap(openNewFile)) | ||
.flatMap { case (fileHotswap, fileHandle) => | ||
Stream.eval(newCursor(fileHandle)).flatMap { cursor => | ||
go(fileHotswap, cursor, 0, in).stream.drain | ||
} | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
.../test/scala/ch/epfl/bluebrain/nexus/delta/sourcing/stream/utils/StreamingUtilsSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package ch.epfl.bluebrain.nexus.delta.sourcing.stream.utils | ||
|
||
import cats.effect.IO | ||
import ch.epfl.bluebrain.nexus.testkit.file.TempDirectory | ||
import ch.epfl.bluebrain.nexus.testkit.mu.NexusSuite | ||
import fs2.Stream | ||
import fs2.io.file.Files | ||
import munit.AnyFixture | ||
|
||
class StreamingUtilsSuite extends NexusSuite with TempDirectory.Fixture { | ||
|
||
override def munitFixtures: Seq[AnyFixture[_]] = List(tempDirectory) | ||
|
||
private lazy val exportDirectory = tempDirectory() | ||
|
||
private val limitPerFile = 3 | ||
private val lines = Stream.emits(List("A", "B", "C", "D", "E")) | ||
|
||
test(s"Write stream of lines in a file rotating every $limitPerFile lines") { | ||
for { | ||
refCompute <- IO.ref(0) | ||
computePath = refCompute | ||
.updateAndGet(_ + 1) | ||
.map { counter => exportDirectory / s"part-$counter.txt" } | ||
_ <- lines.through(StreamingUtils.writeRotate(computePath, limitPerFile)).compile.drain | ||
_ <- Files[IO].list(exportDirectory).assertSize(2) | ||
firstFile = exportDirectory / "part-1.txt" | ||
_ <- Files[IO].readUtf8Lines(firstFile).assert("A", "B", "C") | ||
secondFile = exportDirectory / "part-2.txt" | ||
_ <- Files[IO].readUtf8Lines(secondFile).assert("D", "E") | ||
} yield () | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters