Skip to content

Commit

Permalink
[SEDONA-636] fix some bugs when sedona parser running with other parser
Browse files Browse the repository at this point in the history
  • Loading branch information
freamdx committed Nov 29, 2024
1 parent 6107145 commit cbb420a
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ object SedonaContext {
RasterRegistrator.registerAll(sparkSession)
UdtRegistrator.registerAll()
UdfRegistrator.registerAll(sparkSession)
if (sparkSession.conf.get("spark.sedona.enableParserExtension", "true").toBoolean) {
ParserRegistrator.register(sparkSession)
}
sparkSession
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,6 @@ object ParserRegistrator {
val field = sparkSession.sessionState.getClass.getDeclaredField("sqlParser")
field.setAccessible(true)
field.set(sparkSession.sessionState, parser)
return // return if the new constructor is available
} catch {
case _: Exception =>
}

// try to register the parser with the legacy constructor for spark 3.0
try {
val parserClassName = "org.apache.sedona.sql.parser.SedonaSqlParser"
val delegate: ParserInterface = sparkSession.sessionState.sqlParser

val parser =
ParserFactory.getParser(parserClassName, sparkSession.sessionState.conf, delegate)
val field = sparkSession.sessionState.getClass.getDeclaredField("sqlParser")
field.setAccessible(true)
field.set(sparkSession.sessionState, parser)
} catch {
case _: Exception =>
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,26 @@
package org.apache.sedona.sql

import org.apache.sedona.spark.SedonaContext
import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSessionExtensions
import org.apache.spark.sql.parser.ParserFactory

class SedonaSqlExtensions extends (SparkSessionExtensions => Unit) {
def apply(e: SparkSessionExtensions): Unit = {
e.injectCheckRule(spark => {
SedonaContext.create(spark)
_ => ()
})

if (SedonaSqlExtensions.enableParser) {
e.injectParser { case (_, parser) =>
ParserFactory.getParser("org.apache.sedona.sql.parser.SedonaSqlParser", parser)
}
}
}
}

object SedonaSqlExtensions {
private lazy val enableParser =
SparkContext.getOrCreate().getConf.get("spark.sedona.enableParserExtension", "true").toBoolean
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
enableParser should be(true)
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
enableParser should be(false)
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
enableParser should be(true)
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
enableParser should be(false)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val enableParser = ThreadLocalRandom.current().nextBoolean()
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.session.timeZone", "UTC")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config("spark.sedona.enableParserExtension", enableParser)
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
enableParser should be(true)
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
enableParser should be(false)
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
enableParser should be(true)
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
enableParser should be(false)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val enableParser = ThreadLocalRandom.current().nextBoolean()
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
// We need to be explicit about broadcasting in tests.
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config("spark.sedona.enableParserExtension", enableParser)
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,8 @@ class SedonaSqlParser(delegate: ParserInterface) extends SparkSqlParser {
override def parsePlan(sqlText: String): LogicalPlan =
try {
parse(sqlText) { parser =>
parserBuilder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _ =>
delegate.parsePlan(sqlText)
}
}
parserBuilder.visit(parser.singleStatement())
}.asInstanceOf[LogicalPlan]
} catch {
case _: Exception =>
delegate.parsePlan(sqlText)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,29 @@ class SQLSyntaxTestScala extends TestBaseScala with TableDrivenPropertyChecks {

it(
"should be able to create a regular table with geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
try {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY (GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY") should be(true)
enableParser should be(true)
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
enableParser should be(false)
}
}

it(
"should be able to create a regular table with regular and geometry column should work without a workaround") {
sparkSession.sql("CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
try {
sparkSession.sql(
"CREATE TABLE T_TEST_EXPLICIT_GEOMETRY_2 (INT_COL INT, GEO_COL GEOMETRY)")
sparkSession.catalog.tableExists("T_TEST_EXPLICIT_GEOMETRY_2") should be(true)
enableParser should be(true)
} catch {
case ex: Exception =>
ex.getClass.getName.endsWith("ParseException") should be(true)
enableParser should be(false)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ import org.apache.sedona.spark.SedonaContext
import org.apache.spark.sql.DataFrame
import org.scalatest.{BeforeAndAfterAll, FunSpec}

import java.util.concurrent.ThreadLocalRandom

trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
Logger.getRootLogger().setLevel(Level.WARN)
Logger.getLogger("org.apache").setLevel(Level.WARN)
Logger.getLogger("com").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
Logger.getLogger("org.apache.sedona.core").setLevel(Level.WARN)

val enableParser = ThreadLocalRandom.current().nextBoolean()
val warehouseLocation = System.getProperty("user.dir") + "/target/"
val sparkSession = SedonaContext
.builder()
Expand All @@ -38,6 +41,8 @@ trait TestBaseScala extends FunSpec with BeforeAndAfterAll {
.config("spark.sql.warehouse.dir", warehouseLocation)
// We need to be explicit about broadcasting in tests.
.config("sedona.join.autoBroadcastJoinThreshold", "-1")
.config("spark.sql.extensions", "org.apache.sedona.sql.SedonaSqlExtensions")
.config("spark.sedona.enableParserExtension", enableParser)
.getOrCreate()

val sparkSessionMinio = SedonaContext
Expand Down

0 comments on commit cbb420a

Please sign in to comment.