diff --git a/README.md b/README.md index d8247e7f..f4708f60 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ Spark ClickHouse Connector === -[![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.github.housepower/clickhouse-spark-runtime-3.3_2.12/badge.svg)](https://search.maven.org/search?q=clickhouse-spark-runtime) +[![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.clickhouse.spark/clickhouse-spark-runtime-3.5_2.12/badge.svg)](https://search.maven.org/search?q=clickhouse-spark-runtime) [![License](https://img.shields.io/github/license/clickhouse/spark-clickhouse-connector)](https://github.com/ClickHouse/spark-clickhouse-connector/blob/main/LICENSE) Build on Apache Spark DataSourceV2 API. @@ -24,11 +24,14 @@ Notes: 2. Since 0.6.0, HTTP becomes the default protocol. 3. Since 0.7.0, gRPC is deprecated and not recommended, it may be removed in the future. 4. Since 0.8.0, gRPC is removed. +5. Since 0.8.0, the project groupId is rename from `com.github.housepower` to `com.clickhouse.spark`, and + the class `xenon.clickhouse.ClickHouseCatalog` is renamed to `com.clickhouse.spark.ClickHouseCatalog` ## Compatible Matrix | Version | Compatible Spark Versions | ClickHouse JDBC version | |---------|---------------------------|-------------------------| +| 0.8.1 | Spark 3.3, 3.4, 3.5 | 0.6.3 | | 0.8.0 | Spark 3.3, 3.4, 3.5 | 0.6.3 | | 0.7.3 | Spark 3.3, 3.4 | 0.4.6 | | 0.6.0 | Spark 3.3 | 0.3.2-patch11 | diff --git a/clickhouse-core/src/main/scala/com/clickhouse/spark/client/NodeClient.scala b/clickhouse-core/src/main/scala/com/clickhouse/spark/client/NodeClient.scala index 0d777d95..81c485a6 100644 --- a/clickhouse-core/src/main/scala/com/clickhouse/spark/client/NodeClient.scala +++ b/clickhouse-core/src/main/scala/com/clickhouse/spark/client/NodeClient.scala @@ -45,12 +45,16 @@ class NodeClient(val nodeSpec: NodeSpec) extends AutoCloseable with Logging { private lazy val userAgent = { val title = getClass.getPackage.getImplementationTitle val version = getClass.getPackage.getImplementationVersion - if (version != null) { + if (version != null && title != null) { val versions = version.split("_") - val sparkVersion = versions(0) - val scalaVersion = versions(1) - val connectorVersion = versions(2) - s"${title}/${connectorVersion} (fv:spark/${sparkVersion}, lv:scala/${scalaVersion})" + if (versions.length < 3) { + "Spark-ClickHouse-Connector" + } else { + val sparkVersion = versions(0) + val scalaVersion = versions(1) + val connectorVersion = versions(2) + s"${title}/${connectorVersion} (fv:spark/${sparkVersion}, lv:scala/${scalaVersion})" + } } else { "Spark-ClickHouse-Connector" } diff --git a/clickhouse-core/src/main/scala/com/clickhouse/spark/parse/AstVisitor.scala b/clickhouse-core/src/main/scala/com/clickhouse/spark/parse/AstVisitor.scala index f4f4f8fc..dc1a7331 100644 --- a/clickhouse-core/src/main/scala/com/clickhouse/spark/parse/AstVisitor.scala +++ b/clickhouse-core/src/main/scala/com/clickhouse/spark/parse/AstVisitor.scala @@ -102,7 +102,8 @@ class AstVisitor extends ClickHouseSQLBaseVisitor[AnyRef] with Logging { case eg: String if "ReplacingMergeTree" equalsIgnoreCase eg => ReplacingMergeTreeEngineSpec( engine_clause = engineExpr, - version_column = seqToOption(engineArgs).map(_.asInstanceOf[FieldRef]), + version_column = engineArgs.lift(0).map(_.asInstanceOf[FieldRef]), + is_deleted_column = engineArgs.lift(1).map(_.asInstanceOf[FieldRef]), _sorting_key = tupleIfNeeded(orderByOpt.toList), _primary_key = tupleIfNeeded(pkOpt.toList), _partition_key = tupleIfNeeded(partOpt.toList), @@ -127,7 +128,8 @@ class AstVisitor extends ClickHouseSQLBaseVisitor[AnyRef] with Logging { engine_clause = engineExpr, zk_path = engineArgs.head.asInstanceOf[StringLiteral].value, replica_name = engineArgs(1).asInstanceOf[StringLiteral].value, - version_column = seqToOption(engineArgs.drop(2)).map(_.asInstanceOf[FieldRef]), + version_column = engineArgs.lift(2).map(_.asInstanceOf[FieldRef]), + is_deleted_column = engineArgs.lift(3).map(_.asInstanceOf[FieldRef]), _sorting_key = tupleIfNeeded(orderByOpt.toList), _primary_key = tupleIfNeeded(pkOpt.toList), _partition_key = tupleIfNeeded(partOpt.toList), diff --git a/clickhouse-core/src/main/scala/com/clickhouse/spark/spec/TableEngineSpec.scala b/clickhouse-core/src/main/scala/com/clickhouse/spark/spec/TableEngineSpec.scala index 13534d76..48f56c3b 100644 --- a/clickhouse-core/src/main/scala/com/clickhouse/spark/spec/TableEngineSpec.scala +++ b/clickhouse-core/src/main/scala/com/clickhouse/spark/spec/TableEngineSpec.scala @@ -88,6 +88,7 @@ case class ReplicatedMergeTreeEngineSpec( case class ReplacingMergeTreeEngineSpec( engine_clause: String, version_column: Option[FieldRef] = None, + is_deleted_column: Option[FieldRef] = None, var _sorting_key: TupleExpr = TupleExpr(List.empty), var _primary_key: TupleExpr = TupleExpr(List.empty), var _partition_key: TupleExpr = TupleExpr(List.empty), @@ -109,6 +110,7 @@ case class ReplicatedReplacingMergeTreeEngineSpec( zk_path: String, replica_name: String, version_column: Option[FieldRef] = None, + is_deleted_column: Option[FieldRef] = None, var _sorting_key: TupleExpr = TupleExpr(List.empty), var _primary_key: TupleExpr = TupleExpr(List.empty), var _partition_key: TupleExpr = TupleExpr(List.empty), diff --git a/clickhouse-core/src/test/scala/com/clickhouse/spark/parse/SQLParserSuite.scala b/clickhouse-core/src/test/scala/com/clickhouse/spark/parse/SQLParserSuite.scala index 181e7f88..bbcdf245 100644 --- a/clickhouse-core/src/test/scala/com/clickhouse/spark/parse/SQLParserSuite.scala +++ b/clickhouse-core/src/test/scala/com/clickhouse/spark/parse/SQLParserSuite.scala @@ -83,6 +83,21 @@ class SQLParserSuite extends AnyFunSuite { assert(actual === expected) } + test("parse ReplacingMergeTree - 3") { + val ddl = "ReplacingMergeTree(ts, is_deleted) " + + "PARTITION BY toYYYYMM(created) ORDER BY id SETTINGS index_granularity = 8192" + val actual = parser.parseEngineClause(ddl) + val expected = ReplacingMergeTreeEngineSpec( + engine_clause = "ReplacingMergeTree(ts, is_deleted)", + version_column = Some(FieldRef("ts")), + is_deleted_column = Some(FieldRef("is_deleted")), + _sorting_key = TupleExpr(FieldRef("id") :: Nil), + _partition_key = TupleExpr(List(FuncExpr("toYYYYMM", List(FieldRef("created"))))), + _settings = Map("index_granularity" -> "8192") + ) + assert(actual === expected) + } + test("parse ReplicatedReplacingMergeTree - 1") { val ddl = "ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/wj_report/wj_respondent', '{replica}') " + "PARTITION BY toYYYYMM(created) ORDER BY id SETTINGS index_granularity = 8192" @@ -115,6 +130,25 @@ class SQLParserSuite extends AnyFunSuite { assert(actual === expected) } + test("parse ReplicatedReplacingMergeTree - 3") { + val ddl = "ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/wj_report/wj_respondent', '{replica}', " + + "ts, is_deleted) PARTITION BY toYYYYMM(created) ORDER BY id SETTINGS index_granularity = 8192" + val actual = parser.parseEngineClause(ddl) + val expected = ReplicatedReplacingMergeTreeEngineSpec( + engine_clause = + "ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/wj_report/wj_respondent', '{replica}', " + + "ts, is_deleted)", + zk_path = "/clickhouse/tables/{shard}/wj_report/wj_respondent", + replica_name = "{replica}", + version_column = Some(FieldRef("ts")), + is_deleted_column = Some(FieldRef("is_deleted")), + _sorting_key = TupleExpr(FieldRef("id") :: Nil), + _partition_key = TupleExpr(List(FuncExpr("toYYYYMM", List(FieldRef("created"))))), + _settings = Map("index_granularity" -> "8192") + ) + assert(actual === expected) + } + test("parse Distributed - 1") { val ddl = "Distributed('default', 'wj_report', 'wj_respondent_local')" val actual = parser.parseEngineClause(ddl) diff --git a/docker/.env b/docker/.env index 735cff35..dc6605a9 100644 --- a/docker/.env +++ b/docker/.env @@ -13,20 +13,20 @@ # # latest stable version -PROJECT_VERSION=0.8.1-SNAPSHOT +PROJECT_VERSION=0.8.1 CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:23.8 AWS_JAVA_SDK_VERSION=1.12.367 CLICKHOUSE_JDBC_VERSION=0.6.3 HADOOP_VERSION=3.3.6 HIVE_VERSION=2.3.9 -ICEBERG_VERSION=1.4.3 -KYUUBI_VERSION=1.9.0 +ICEBERG_VERSION=1.6.0 +KYUUBI_VERSION=1.9.2 KYUUBI_HADOOP_VERSION=3.3.6 POSTGRES_VERSION=12 POSTGRES_JDBC_VERSION=42.3.4 SCALA_BINARY_VERSION=2.12 -SPARK_VERSION=3.4.2 -SPARK_BINARY_VERSION=3.4 +SPARK_VERSION=3.5.2 +SPARK_BINARY_VERSION=3.5 SPARK_HADOOP_VERSION=3.3.4 ZOOKEEPER_VERSION=3.6.3 diff --git a/docker/.env-dev b/docker/.env-dev index 618329b7..90ab0158 100644 --- a/docker/.env-dev +++ b/docker/.env-dev @@ -13,20 +13,20 @@ # # current developing version -PROJECT_VERSION=0.8.1-SNAPSHOT +PROJECT_VERSION=0.8.1 CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:23.8 AWS_JAVA_SDK_VERSION=1.12.367 CLICKHOUSE_JDBC_VERSION=0.6.3 HADOOP_VERSION=3.3.6 HIVE_VERSION=2.3.9 -ICEBERG_VERSION=1.4.3 -KYUUBI_VERSION=1.9.0 +ICEBERG_VERSION=1.6.0 +KYUUBI_VERSION=1.9.2 KYUUBI_HADOOP_VERSION=3.3.6 POSTGRES_VERSION=12 POSTGRES_JDBC_VERSION=42.3.4 SCALA_BINARY_VERSION=2.12 -SPARK_VERSION=3.4.2 -SPARK_BINARY_VERSION=3.4 +SPARK_VERSION=3.5.2 +SPARK_BINARY_VERSION=3.5 SPARK_HADOOP_VERSION=3.3.4 ZOOKEEPER_VERSION=3.6.3 diff --git a/docker/compose-dev.yml b/docker/compose-dev.yml index ffb6cb27..6646d107 100644 --- a/docker/compose-dev.yml +++ b/docker/compose-dev.yml @@ -153,7 +153,7 @@ services: - zookeeper cloudbeaver: - image: dbeaver/cloudbeaver:24.0.2 + image: dbeaver/cloudbeaver:24.1 environment: CB_LOCAL_HOST_ADDR: 0.0.0.0 CB_SERVER_NAME: Kyuubi Playground diff --git a/docker/compose.yml b/docker/compose.yml index 7b500135..b1dafc33 100644 --- a/docker/compose.yml +++ b/docker/compose.yml @@ -152,7 +152,7 @@ services: - zookeeper cloudbeaver: - image: pan3793/cloudbeaver:20240223 + image: dbeaver/cloudbeaver:24.1 environment: CB_LOCAL_HOST_ADDR: 0.0.0.0 CB_SERVER_NAME: Kyuubi Playground diff --git a/docker/image/scc-metastore.Dockerfile b/docker/image/scc-metastore.Dockerfile index c621c6e0..f9d24b41 100644 --- a/docker/image/scc-metastore.Dockerfile +++ b/docker/image/scc-metastore.Dockerfile @@ -24,7 +24,7 @@ ENV HIVE_HOME=/opt/hive ENV HIVE_CONF_DIR=/etc/hive/conf RUN set -x && \ - wget -q ${APACHE_MIRROR}/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz && \ + wget -q https://archive.apache.org/dist/hive/hive-${HIVE_VERSION}/apache-hive-${HIVE_VERSION}-bin.tar.gz && \ tar -xzf apache-hive-${HIVE_VERSION}-bin.tar.gz -C /opt && \ ln -s /opt/apache-hive-${HIVE_VERSION}-bin ${HIVE_HOME} && \ rm apache-hive-${HIVE_VERSION}-bin.tar.gz diff --git a/docker/image/scc-spark.Dockerfile b/docker/image/scc-spark.Dockerfile index f4b0159c..5b9b0ac6 100644 --- a/docker/image/scc-spark.Dockerfile +++ b/docker/image/scc-spark.Dockerfile @@ -59,4 +59,4 @@ RUN set -x && \ CLICKHOUSE_JDBC_JAR_NAME=clickhouse-jdbc && \ wget -q ${MAVEN_MIRROR}/com/clickhouse/${CLICKHOUSE_JDBC_JAR_NAME}/${CLICKHOUSE_JDBC_VERSION}/${CLICKHOUSE_JDBC_JAR_NAME}-${CLICKHOUSE_JDBC_VERSION}-all.jar -P ${SPARK_HOME}/jars && \ SCC_JAR_NAME=clickhouse-spark-runtime-${SPARK_BINARY_VERSION}_${SCALA_BINARY_VERSION} && \ - if [ "$(echo ${PROJECT_VERSION} | grep SNAPSHOT)" = "" ]; then wget -q ${MAVEN_MIRROR}/com/github/housepower/${SCC_JAR_NAME}/${PROJECT_VERSION}/${SCC_JAR_NAME}-${PROJECT_VERSION}.jar -P ${SPARK_HOME}/jars; fi + if [ "$(echo ${PROJECT_VERSION} | grep SNAPSHOT)" = "" ]; then wget -q ${MAVEN_MIRROR}/com/clickhouse/spark/${SCC_JAR_NAME}/${PROJECT_VERSION}/${SCC_JAR_NAME}-${PROJECT_VERSION}.jar -P ${SPARK_HOME}/jars; fi diff --git a/docs/developers/04_public_release.md b/docs/developers/04_public_release.md index d8c2fce8..d9879b77 100644 --- a/docs/developers/04_public_release.md +++ b/docs/developers/04_public_release.md @@ -37,7 +37,7 @@ workflow, it is scheduled to be deployed at midnight every day. 3. Create new tag, e.g. `v0.3.0`, it will trigger the [Publish Release](https://github.com/ClickHouse/spark-clickhouse-connector/blob/main/.github/workflows/publish-release.yml) workflow; 4. Verify, close, and release in [Sonatype Repository](https://oss.sonatype.org/#stagingRepositories) -5. Announce in [GitHub Release](https://github.com/housepower/spark-clickhouse-connector/releases) +5. Announce in [GitHub Release](https://github.com/ClickHouse/spark-clickhouse-connector/releases) 6. Update version in `version.txt` and `docker/.env-dev`, e.g. from `0.3.0` to `0.3.1-SNAPSHOT`; 7. Update version on main branch in `version.txt` and `docker/.env-dev`, e.g. from `0.3.0-SNAPSHOT` to `0.4.0-SNAPSHOT`; 8. [Publish Docker image](https://github.com/ClickHouse/spark-clickhouse-connector/tree/main/docker) after jars diff --git a/docs/index.md b/docs/index.md index 84d8e650..7705e826 100644 --- a/docs/index.md +++ b/docs/index.md @@ -46,6 +46,7 @@ For old versions, please refer the compatible matrix. | Version | Compatible Spark Versions | ClickHouse JDBC version | |---------|---------------------------|-------------------------| | main | Spark 3.3, 3.4, 3.5 | 0.6.3 | +| 0.8.0 | Spark 3.3, 3.4, 3.5 | 0.6.3 | | 0.7.3 | Spark 3.3, 3.4 | 0.4.6 | | 0.6.0 | Spark 3.3 | 0.3.2-patch11 | | 0.5.0 | Spark 3.2, 3.3 | 0.3.2-patch11 | diff --git a/docs/quick_start/01_get_the_library.md b/docs/quick_start/01_get_the_library.md index d610ab30..31ee05a7 100644 --- a/docs/quick_start/01_get_the_library.md +++ b/docs/quick_start/01_get_the_library.md @@ -22,8 +22,8 @@ The name pattern of binary jar is ``` clickhouse-spark-runtime-${spark_binary_version}_${scala_binary_version}-${version}.jar ``` -you can find all available released jars under [Maven Central Repository](https://repo1.maven.org/maven2/com/github/housepower) -and all daily build SNAPSHOT jars under [Sonatype OSS Snapshots Repository](https://oss.sonatype.org/content/repositories/snapshots/com/github/housepower/). +you can find all available released jars under [Maven Central Repository](https://repo1.maven.org/maven2/com/clickhouse/spark) +and all daily build SNAPSHOT jars under [Sonatype OSS Snapshots Repository](https://oss.sonatype.org/content/repositories/snapshots/com/clickhouse/spark/). ## Import as Dependency @@ -31,7 +31,7 @@ and all daily build SNAPSHOT jars under [Sonatype OSS Snapshots Repository](http ``` dependencies { - implementation("com.github.housepower:clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}:{{ stable_version }}") + implementation("com.clickhouse.spark:clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}:{{ stable_version }}") implementation("com.clickhouse:clickhouse-jdbc:{{ clickhouse_jdbc_version }}:all") { transitive = false } } ``` @@ -48,7 +48,7 @@ repositries { ``` - com.github.housepower + com.clickhouse.spark clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }} {{ stable_version }} diff --git a/docs/quick_start/02_play_with_spark_sql.md b/docs/quick_start/02_play_with_spark_sql.md index 7805591a..b1fc59ec 100644 --- a/docs/quick_start/02_play_with_spark_sql.md +++ b/docs/quick_start/02_play_with_spark_sql.md @@ -40,7 +40,7 @@ The following argument can be replaced by ``` --repositories https://{maven-cental-mirror or private-nexus-repo} \ - --packages com.github.housepower:clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}:{{ stable_version }},com.clickhouse:clickhouse-jdbc:{{ clickhouse_jdbc_version }}:all + --packages com.clickhouse.spark:clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}:{{ stable_version }},com.clickhouse:clickhouse-jdbc:{{ clickhouse_jdbc_version }}:all ``` to avoid copying jar to your Spark client node. diff --git a/docs/quick_start/03_play_with_spark_shell.md b/docs/quick_start/03_play_with_spark_shell.md index dcd6b1c2..80065d34 100644 --- a/docs/quick_start/03_play_with_spark_shell.md +++ b/docs/quick_start/03_play_with_spark_shell.md @@ -37,7 +37,7 @@ The following argument can be replaced by ``` --repositories https://{maven-cental-mirror or private-nexus-repo} \ - --packages com.github.housepower:clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}:{{ stable_version }},com.clickhouse:clickhouse-jdbc:{{ clickhouse_jdbc_version }}:all + --packages com.clickhouse.spark:clickhouse-spark-runtime-{{ spark_binary_version }}_{{ scala_binary_version }}:{{ stable_version }},com.clickhouse:clickhouse-jdbc:{{ clickhouse_jdbc_version }}:all ``` to avoid copying jar to your Spark client node. diff --git a/examples/scala/spark-3.5/build.sbt b/examples/scala/spark-3.5/build.sbt index 668a3d36..794c6aa1 100644 --- a/examples/scala/spark-3.5/build.sbt +++ b/examples/scala/spark-3.5/build.sbt @@ -18,11 +18,9 @@ version := "1.0" scalaVersion := "2.12.18" -resolvers += "Maven Repo" at "https://s01.oss.sonatype.org/content/repositories/snapshots/" - libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.5.0" libraryDependencies += "org.apache.spark" %% "spark-core" % "3.5.0" libraryDependencies += "com.clickhouse" % "clickhouse-jdbc" % "0.6.3" classifier "all" -libraryDependencies += "com.clickhouse.spark" %% "clickhouse-spark-runtime-3.5" % "0.8.0-SNAPSHOT" +libraryDependencies += "com.clickhouse.spark" %% "clickhouse-spark-runtime-3.5" % "0.8.0" diff --git a/gradle.properties b/gradle.properties index b265eed2..ef0863bc 100644 --- a/gradle.properties +++ b/gradle.properties @@ -58,7 +58,7 @@ commons_codec_version=1.16.0 jakarta_annotation_api_version=1.3.5 # Test only -kyuubi_version=1.9.0 +kyuubi_version=1.9.2 testcontainers_scala_version=0.41.2 scalatest_version=3.2.16 flexmark_version=0.62.2 diff --git a/version.txt b/version.txt index 160a6bb7..6f4eebdf 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.8.1-SNAPSHOT +0.8.1