Skip to content

Commit

Permalink
[SEDONA-636] fix some bugs when sedona parser running with other pars…
Browse files Browse the repository at this point in the history
…er (#1706)
  • Loading branch information
freamdx authored Dec 11, 2024
1 parent b081941 commit 12903e1
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 94 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.sedona.spark

import org.apache.sedona.common.utils.TelemetryCollector
import org.apache.sedona.core.serde.SedonaKryoRegistrator
import org.apache.sedona.sql.{ParserRegistrator, RasterRegistrator}
import org.apache.sedona.sql.RasterRegistrator
import org.apache.sedona.sql.UDF.UdfRegistrator
import org.apache.sedona.sql.UDT.UdtRegistrator
import org.apache.spark.serializer.KryoSerializer
Expand Down Expand Up @@ -65,9 +65,6 @@ object SedonaContext {
RasterRegistrator.registerAll(sparkSession)
UdtRegistrator.registerAll()
UdfRegistrator.registerAll(sparkSession)
if (sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean) {
ParserRegistrator.register(sparkSession)
}
sparkSession
}

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,24 @@
package org.apache.sedona.sql

import org.apache.sedona.spark.SedonaContext
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSessionExtensions
import org.apache.spark.sql.parser.ParserFactory

class SedonaSqlExtensions extends (SparkSessionExtensions => Unit) {
private lazy val enableParser =
SparkContext.getOrCreate().getConf.get("spark.sedona.enableParserExtension", "true").toBoolean

def apply(e: SparkSessionExtensions): Unit = {
e.injectCheckRule(spark => {
SedonaContext.create(spark)
_ => ()
})

if (enableParser) {
e.injectParser { case (_, parser) =>
ParserFactory.getParser("org.apache.sedona.sql.parser.SedonaSqlParser", parser)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val keyParserExtension = "spark.sedona.enableParserExtension"
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.session.timeZone", "UTC")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config(keyParserExtension, ThreadLocalRandom.current().nextBoolean())
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val keyParserExtension = "spark.sedona.enableParserExtension"
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
// We need to be explicit about broadcasting in tests.
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config(keyParserExtension, ThreadLocalRandom.current().nextBoolean())
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("true")
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
sparkSession.sparkContext.getConf.get(keyParserExtension) should be("false")
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val keyParserExtension = "spark.sedona.enableParserExtension"
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
// We need to be explicit about broadcasting in tests.
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config(keyParserExtension, ThreadLocalRandom.current().nextBoolean())
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down

0 comments on commit 12903e1

Please sign in to comment.