Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SEDONA-636] fix some bugs when sedona parser running with other parser #1706

Merged
merged 1 commit into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.sedona.spark

import org.apache.sedona.common.utils.TelemetryCollector
import org.apache.sedona.core.serde.SedonaKryoRegistrator
import org.apache.sedona.sql.{ParserRegistrator, RasterRegistrator}
import org.apache.sedona.sql.RasterRegistrator
import org.apache.sedona.sql.UDF.UdfRegistrator
import org.apache.sedona.sql.UDT.UdtRegistrator
import org.apache.spark.serializer.KryoSerializer
Expand Down Expand Up @@ -65,9 +65,6 @@ object SedonaContext {
RasterRegistrator.registerAll(sparkSession)
UdtRegistrator.registerAll()
UdfRegistrator.registerAll(sparkSession)
if (sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean) {
ParserRegistrator.register(sparkSession)
}
freamdx marked this conversation as resolved.
Show resolved Hide resolved
sparkSession
}

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,24 @@
package org.apache.sedona.sql

import org.apache.sedona.spark.SedonaContext
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSessionExtensions
import org.apache.spark.sql.parser.ParserFactory

class SedonaSqlExtensions extends (SparkSessionExtensions => Unit) {
private lazy val enableParser =
SparkContext.getOrCreate().getConf.get("spark.sedona.enableParserExtension", "true").toBoolean

def apply(e: SparkSessionExtensions): Unit = {
e.injectCheckRule(spark => {
SedonaContext.create(spark)
_ => ()
})

if (enableParser) {
e.injectParser { case (_, parser) =>
ParserFactory.getParser("org.apache.sedona.sql.parser.SedonaSqlParser", parser)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val keyParserExtension = "spark.sedona.enableParserExtension"
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.session.timeZone", "UTC")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config(keyParserExtension, ThreadLocalRandom.current().nextBoolean())
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val keyParserExtension = "spark.sedona.enableParserExtension"
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
// We need to be explicit about broadcasting in tests.
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config(keyParserExtension, ThreadLocalRandom.current().nextBoolean())
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val keyParserExtension = "spark.sedona.enableParserExtension"
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
// We need to be explicit about broadcasting in tests.
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config(keyParserExtension, ThreadLocalRandom.current().nextBoolean())
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down
Loading