Skip to content

Commit

Permalink
Better relation embedder cli (#2788)
Browse files Browse the repository at this point in the history
* start creating CLIMain

* make a local CLIMain work

* enable CLI to read from live database

* make pipeline data an env var

* make pipeline date an argument

* Apply auto-formatting rules

* remove debugging code

* de-akkafy reading from stdin

* better handling of dodgy input

* try removing internal model from batcher

* improve StdInNDJSON interface

* improve commentary

* report parse failure

---------

Co-authored-by: Github on behalf of Wellcome Collection <wellcomedigitalplatform@wellcome.ac.uk>
  • Loading branch information
paul-butcher and weco-bot authored Dec 16, 2024
1 parent 3e0a466 commit 2622c78
Showing 1 changed file with 31 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,15 +1,41 @@
package weco.pipeline.relation_embedder.lib
import grizzled.slf4j.Logging
import weco.json.JsonUtil._
import weco.pipeline.relation_embedder.models.Batch

import scala.io.Source.stdin
import scala.util.{Failure, Success, Try}

trait StdInBatches {
/** Trait to deal with Newline Delimited JSON being provided on STDIN.
*
* Each JSON object in the input is transformed to an instance of T, according
* to jsonToInstance (provided by the extending class) and used to populate the
* instances Iterator.
*/

trait StdInNDJSON[T] extends Logging {
protected def jsonToInstance(str: String): Try[T]
private val stdInStrings: Iterator[String] = stdin.getLines()

private def toBatch(jsonString: String) =
fromJson[Batch](jsonString).get
private def toInstance(jsonString: String): Option[T] =
jsonToInstance(jsonString) match {
case Failure(exception) =>
error(exception.getMessage)
None
case Success(value) => Some(value)
}

protected val instances: Iterator[T] =
stdInStrings
.flatMap(
toInstance
)

}

trait StdInBatches extends StdInNDJSON[Batch] {
def jsonToInstance(jsonString: String): Try[Batch] =
fromJson[Batch](jsonString)

protected val batches: Iterator[Batch] =
stdInStrings.map(toBatch)
protected val batches: Iterator[Batch] = instances
}

0 comments on commit 2622c78

Please sign in to comment.