From 5506fb8863c27fecc8e76057ac66d205f960cb37 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Thu, 27 Apr 2023 13:07:06 +0800 Subject: [PATCH] Spark: Deprecate gRPC protocol (#233) --- README.md | 1 + docs/configurations/01_catalog_configurations.md | 12 ++++++------ docs/index.md | 2 +- docs/internals/index.md | 2 +- docs/quick_start/02_play_with_spark_sql.md | 2 +- docs/quick_start/03_play_with_spark_shell.md | 8 ++++---- .../scala/xenon/clickhouse/ClickHouseCatalog.scala | 4 ++++ .../scala/xenon/clickhouse/ClickHouseCatalog.scala | 4 ++++ 8 files changed, 22 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e51af158..90f7958b 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Notes: [ClickHouse Official Java Client](https://github.com/ClickHouse/clickhouse-jdbc), which brings HTTP protocol support, extends the range of supported versions of ClickHouse Server. 2. Since 0.6.0, HTTP becomes the default protocol. +3. Since 0.7.0, gRPC is deprecated and not recommended, it may be removed in the future. ## Compatible Matrix diff --git a/docs/configurations/01_catalog_configurations.md b/docs/configurations/01_catalog_configurations.md index fb1ee20a..1687502f 100644 --- a/docs/configurations/01_catalog_configurations.md +++ b/docs/configurations/01_catalog_configurations.md @@ -49,16 +49,16 @@ Then you can access ClickHouse table `.` from Spark SQL by usin For ClickHouse cluster, give an unique catalog name for each instances. -Suppose you have two ClickHouse instances, one installed on `10.0.0.1` and exposes gRPC on port `9100` named -clickhouse1, and another installed on `10.0.0.2` and exposes gRPC on port `9100` named clickhouse2. +Suppose you have two ClickHouse instances, one installed on `10.0.0.1` and exposes HTTP on port `8123` named +clickhouse1, and another installed on `10.0.0.2` and exposes HTTP on port `8123` named clickhouse2. Edit `$SPARK_HOME/conf/spark-defaults.conf`. ``` spark.sql.catalog.clickhouse1 xenon.clickhouse.ClickHouseCatalog spark.sql.catalog.clickhouse1.host 10.0.0.1 -spark.sql.catalog.clickhouse1.protocol grpc -spark.sql.catalog.clickhouse1.grpc_port 9100 +spark.sql.catalog.clickhouse1.protocol http +spark.sql.catalog.clickhouse1.http_port 8123 spark.sql.catalog.clickhouse1.user default spark.sql.catalog.clickhouse1.password spark.sql.catalog.clickhouse1.database default @@ -66,8 +66,8 @@ spark.sql.catalog.clickhouse1.option.async false spark.sql.catalog.clickhouse2 xenon.clickhouse.ClickHouseCatalog spark.sql.catalog.clickhouse2.host 10.0.0.2 -spark.sql.catalog.clickhouse2.protocol grpc -spark.sql.catalog.clickhouse2.grpc_port 9100 +spark.sql.catalog.clickhouse2.protocol http +spark.sql.catalog.clickhouse2.http_port 8123 spark.sql.catalog.clickhouse2.user default spark.sql.catalog.clickhouse2.password spark.sql.catalog.clickhouse2.database default diff --git a/docs/index.md b/docs/index.md index c0927624..5dbaeda5 100644 --- a/docs/index.md +++ b/docs/index.md @@ -32,7 +32,7 @@ Spark ClickHouse Connector is a high performance connector build on top of Spark 3. An available Spark cluster, and Spark version should be 3.3 or above, because we need the interfaces of Spark DataSource V2 added in 3.3.0. 4. Make sure your network policy satisfies the following requirements, both driver and executor of Spark need to access - ClickHouse HTTP/gRPC port. If you are using it to access ClickHouse cluster, ensure the connectivity between driver and + ClickHouse HTTP port. If you are using it to access ClickHouse cluster, ensure the connectivity between driver and executor of Spark and each node of ClickHouse cluster. ## Notes diff --git a/docs/internals/index.md b/docs/internals/index.md index b6122e0e..12e70f4f 100644 --- a/docs/internals/index.md +++ b/docs/internals/index.md @@ -17,7 +17,7 @@ Overview Design === In high level, Spark ClickHouse Connector is a connector build on top of Spark DataSource V2 and -ClickHouse gRPC protocol. +ClickHouse HTTP protocol.
![Overview](../imgs/scc_overview.drawio.png) diff --git a/docs/quick_start/02_play_with_spark_sql.md b/docs/quick_start/02_play_with_spark_sql.md index a04da05a..fa6e0b57 100644 --- a/docs/quick_start/02_play_with_spark_sql.md +++ b/docs/quick_start/02_play_with_spark_sql.md @@ -26,7 +26,7 @@ $SPARK_HOME/bin/spark-sql \ --conf spark.sql.catalog.clickhouse=xenon.clickhouse.ClickHouseCatalog \ --conf spark.sql.catalog.clickhouse.host=${CLICKHOUSE_HOST:-127.0.0.1} \ --conf spark.sql.catalog.clickhouse.protocol=http \ - --conf spark.sql.catalog.clickhouse.http_port=${CLICKHOUSE_GRPC_PORT:-8123} \ + --conf spark.sql.catalog.clickhouse.http_port=${CLICKHOUSE_HTTP_PORT:-8123} \ --conf spark.sql.catalog.clickhouse.user=${CLICKHOUSE_USER:-default} \ --conf spark.sql.catalog.clickhouse.password=${CLICKHOUSE_PASSWORD:-} \ --conf spark.sql.catalog.clickhouse.database=default \ diff --git a/docs/quick_start/03_play_with_spark_shell.md b/docs/quick_start/03_play_with_spark_shell.md index 36cbf211..9fb56145 100644 --- a/docs/quick_start/03_play_with_spark_shell.md +++ b/docs/quick_start/03_play_with_spark_shell.md @@ -22,8 +22,8 @@ Play with Spark Shell $SPARK_HOME/bin/spark-shell \ --conf spark.sql.catalog.clickhouse=xenon.clickhouse.ClickHouseCatalog \ --conf spark.sql.catalog.clickhouse.host=${CLICKHOUSE_HOST:-127.0.0.1} \ - --conf spark.sql.catalog.clickhouse.protocol=grpc \ - --conf spark.sql.catalog.clickhouse.grpc_port=${CLICKHOUSE_GRPC_PORT:-9100} \ + --conf spark.sql.catalog.clickhouse.protocol=http \ + --conf spark.sql.catalog.clickhouse.http_port=${CLICKHOUSE_HTTP_PORT:-8123} \ --conf spark.sql.catalog.clickhouse.user=${CLICKHOUSE_USER:-default} \ --conf spark.sql.catalog.clickhouse.password=${CLICKHOUSE_PASSWORD:-} \ --conf spark.sql.catalog.clickhouse.database=default \ @@ -104,8 +104,8 @@ Execute ClickHouse native SQL. ``` scala> val options = Map( | "host" -> "clickhouse", - | "protocol" -> "grpc", - | "grpc_port" -> "9100", + | "protocol" -> "http", + | "http_port" -> "8123", | "user" -> "default", | "password" -> "" | ) diff --git a/spark-3.3/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseCatalog.scala b/spark-3.3/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseCatalog.scala index ec98de4c..f95dd083 100644 --- a/spark-3.3/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseCatalog.scala +++ b/spark-3.3/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseCatalog.scala @@ -14,6 +14,7 @@ package xenon.clickhouse +import com.clickhouse.client.ClickHouseProtocol import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.clickhouse.{ExprUtils, SchemaUtils} import org.apache.spark.sql.connector.catalog._ @@ -64,6 +65,9 @@ class ClickHouseCatalog extends TableCatalog override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { this.catalogName = name this.nodeSpec = buildNodeSpec(options) + if (nodeSpec.protocol == ClickHouseProtocol.GRPC) { + log.warn("gPRC is deprecated and not recommended since v0.7.0, it may be removed in the future.") + } this.currentDb = nodeSpec.database this.nodeClient = NodeClient(nodeSpec) diff --git a/spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseCatalog.scala b/spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseCatalog.scala index f63fb68d..17a8202c 100644 --- a/spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseCatalog.scala +++ b/spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/ClickHouseCatalog.scala @@ -14,6 +14,7 @@ package xenon.clickhouse +import com.clickhouse.client.ClickHouseProtocol import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.clickhouse.{ExprUtils, SchemaUtils} import org.apache.spark.sql.connector.catalog._ @@ -64,6 +65,9 @@ class ClickHouseCatalog extends TableCatalog override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { this.catalogName = name this.nodeSpec = buildNodeSpec(options) + if (nodeSpec.protocol == ClickHouseProtocol.GRPC) { + log.warn("gPRC is deprecated and not recommended since v0.7.0, it may be removed in the future.") + } this.currentDb = nodeSpec.database this.nodeClient = NodeClient(nodeSpec)