diff --git a/.github/workflows/ci-scala.yaml b/.github/workflows/ci-scala.yaml new file mode 100644 index 0000000..ec021d3 --- /dev/null +++ b/.github/workflows/ci-scala.yaml @@ -0,0 +1,26 @@ +name: Scala CI + +on: + push: + branches: [ "main" ] + pull_request: {} + workflow_dispatch: {} + +permissions: + contents: read + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up JDK 11 + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'temurin' + cache: 'sbt' + - name: Run tests + run: sbt test diff --git a/build.sbt b/build.sbt index 0432905..3425d61 100644 --- a/build.sbt +++ b/build.sbt @@ -1,4 +1,9 @@ +lazy val projectVersion = "0.0.1" + lazy val project = Project("lakefs-spark-extension", file("./src/")) + .settings( + version := projectVersion, + ) semanticdbEnabled := true semanticdbVersion := scalafixSemanticdb.revision @@ -7,6 +12,8 @@ scalacOptions += "-Ywarn-unused-import" libraryDependencies ++= Seq( "io.lakefs" % "api-client" % "0.91.0", "org.apache.spark" %% "spark-sql" % "3.2.4" % "provided", + "org.apache.spark" %% "spark-hive" % "3.2.4" % "test", + "joda-time" % "joda-time" % "2.12.5" % "test", // TODO(ariels): Wrap api-client calls in: "dev.failsafe" % "failsafe" % "3.2.4", @@ -14,6 +21,21 @@ libraryDependencies ++= Seq( "org.scalatestplus" %% "scalacheck-1-17" % "3.2.16.0" % "test", ) +val nexus = "https://s01.oss.sonatype.org/" +lazy val publishSettings = Seq( + publishTo := { + if (isSnapshot.value) Some("snapshots" at nexus + "content/repositories/snapshots") + else Some("releases" at nexus + "service/local/staging/deploy/maven2") + }, + // Remove all additional repository other than Maven Central from POM + pomIncludeRepository := { _ => false }, + credentials ++= Seq( + Credentials(Path.userHome / ".sbt" / "credentials"), + Credentials(Path.userHome / ".sbt" / "sonatype_credentials") + ) +) + +ThisBuild / isSnapshot := false ThisBuild / versionScheme := Some("early-semver") ThisBuild / organization := "io.lakefs" ThisBuild / organizationName := "Treeverse Labs" diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 0000000..52413ab --- /dev/null +++ b/project/build.properties @@ -0,0 +1 @@ +sbt.version=1.9.3 diff --git a/project/pgp-sign.sbt b/project/pgp-sign.sbt new file mode 100644 index 0000000..678cac5 --- /dev/null +++ b/project/pgp-sign.sbt @@ -0,0 +1,6 @@ +credentials += Credentials( + "GnuPG Key ID", + "gpg", + "F638279A7281EE8EAE58C4B98D3CFE7E7D0262DA", // key identifier + "ignored" // this field is ignored; passwords are supplied by pinentry +) diff --git a/project/plugins.sbt b/project/plugins.sbt new file mode 100644 index 0000000..82afe94 --- /dev/null +++ b/project/plugins.sbt @@ -0,0 +1,2 @@ +addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.11.0") +addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") diff --git a/src/test/scala/io/lakefs/iceberg/extension/ExtensionSpec.scala b/src/test/scala/io/lakefs/iceberg/extension/ExtensionSpec.scala new file mode 100644 index 0000000..7864f7e --- /dev/null +++ b/src/test/scala/io/lakefs/iceberg/extension/ExtensionSpec.scala @@ -0,0 +1,36 @@ +package io.lakefs.iceberg.extension + +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.should + +import org.apache.spark.sql.Row + +class ExtensionSpec extends AnyFunSpec + with SparkSessionWithExtensionSetup[LakeFSSparkSessionExtensions] + with should.Matchers { + + val _ = new LakeFSSparkSessionExtensions() + + describe("Extension") { + it("should SELECT") { + withSparkSession(spark => { + import spark.implicits._ + + spark.sql("CREATE DATABASE first") + val df = Seq(("a", 1), ("b", 2), ("c", 3)).toDF + df.writeTo("spark_catalog.first.table").create() + + spark.sql("CREATE DATABASE second") + val df2 = Seq(("a", 1), ("xyzzy", 2), ("c", 3), ("d", 4)).toDF + df2.writeTo("spark_catalog.second.table").create() + + val diff = spark.sql("SELECT * FROM schema_diff('spark_catalog', 'first', 'second', 'table')") + .collect() + .toSet + diff should equal(Set(Row("-", "b", 2), Row("+", "xyzzy", 2), Row("+", "d", 4))) + }) + } + + // TODO(ariels): Test SQL identifier quotation. + } +} diff --git a/src/test/scala/io/lakefs/iceberg/extension/SparkSessionSetup.scala b/src/test/scala/io/lakefs/iceberg/extension/SparkSessionSetup.scala new file mode 100644 index 0000000..8f9dce1 --- /dev/null +++ b/src/test/scala/io/lakefs/iceberg/extension/SparkSessionSetup.scala @@ -0,0 +1,34 @@ +package io.lakefs.iceberg.extension + +import java.nio.file.Files +import org.apache.commons.io.FileUtils +import scala.reflect.ClassTag + +import org.apache.spark.sql.SparkSession +import org.apache.spark.SparkConf + +trait SparkSessionWithExtensionSetup[E] { + def withSparkSession(testMethod: (SparkSession) => Any)(implicit tag: ClassTag[E]) { + val tmpDir = Files.createTempDirectory("sparktest").toString + val conf = new SparkConf() + .setMaster("local") + .setAppName("Spark test") + .set("spark.sql.extensions", tag.runtimeClass.getCanonicalName) + .set("spark.sql.warehouse.dir", tmpDir) + val spark = new SparkSession.Builder() + .appName("extension-test") + .config(conf) + .enableHiveSupport() + .getOrCreate + try { + testMethod(spark) + } finally { + // Clean up catalog dir + FileUtils.deleteDirectory(new java.io.File(tmpDir)) + // local metastore_db always created in current working directory, and + // current working directory cannot be changed in Java. + FileUtils.deleteDirectory(new java.io.File("./metastore_db")) + spark.close() + } + } +}