Skip to content

Commit

Permalink
[Coral-hive] Enable parsing spark created views without quoted keywor…
Browse files Browse the repository at this point in the history
…ds (#503)
  • Loading branch information
nagarathnam200 authored Apr 23, 2024
1 parent 0d5dd3f commit a83b648
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,12 @@ import org.slf4j.LoggerFactory;
}
protected boolean useSQL11ReservedKeywordsForIdentifier() {
try {
return !HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS);
/*
* Use the config string hive.support.sql11.reserved.keywords directly as
* HiveConf.ConfVars.HIVE_SUPPORT_SQL11_RESERVED_KEYWORDS might not be available in the hive-common present in the
* classpath during translation triggering the exception path defaulting to false
*/
return !hiveConf.get("hive.support.sql11.reserved.keywords").equalsIgnoreCase("true");
} catch (Throwable throwable) {
LOG.warn(throwable.getMessage());
return false;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -96,7 +96,7 @@ protected SqlToRelConverter getSqlToRelConverter() {
}

@Override
protected SqlNode toSqlNode(String sql, Table hiveView) {
public SqlNode toSqlNode(String sql, Table hiveView) {
final SqlNode sqlNode = parseTreeBuilder.process(trimParenthesis(sql), hiveView);
if (hiveView != null) {
sqlNode.accept(new FuzzyUnionSqlRewriter(hiveView.getTableName(), this));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -8,6 +8,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -104,8 +105,20 @@ public SqlNode processSql(String sql) {
return process(sql, null);
}

/**
* Returns true if the view is created using spark sql. This relies on the presence of the
* spark.sql.create.version property in the views when created using spark sql.
*
* @param hiveView
* @return true if the view is created using spark sql
*/
private static boolean isCreatedUsingSpark(Table hiveView) {
Map<String, String> tableParams = hiveView.getParameters();
return tableParams != null && tableParams.containsKey("spark.sql.create.version");
}

public SqlNode process(String sql, @Nullable Table hiveView) {
ParseDriver pd = new CoralParseDriver();
ParseDriver pd = new CoralParseDriver(hiveView != null && isCreatedUsingSpark(hiveView));
try {
ASTNode root = pd.parse(sql);
return processAST(root, hiveView);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2021-2022 LinkedIn Corporation. All rights reserved.
* Copyright 2021-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -13,13 +13,26 @@
import org.antlr.runtime.TokenRewriteStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;


public class CoralParseDriver extends ParseDriver {

private static final Log LOG =
LogFactory.getLog("com.linkedin.coral.hive.hive2rel.parsetree.parser.CoralParseDriver");

private boolean useSQL11ReservedKeywordsForIdentifier;

public CoralParseDriver(boolean useSQL11ReservedKeywordsForIdentifier) {
super();
this.useSQL11ReservedKeywordsForIdentifier = useSQL11ReservedKeywordsForIdentifier;
}

public CoralParseDriver() {
super();
this.useSQL11ReservedKeywordsForIdentifier = false;
}

@Override
public ASTNode parse(String command) throws ParseException {
if (LOG.isDebugEnabled()) {
Expand All @@ -29,6 +42,17 @@ public ASTNode parse(String command) throws ParseException {
HiveLexerCoral lexer = new HiveLexerCoral(new ANTLRNoCaseStringStream(command));
TokenRewriteStream tokens = new TokenRewriteStream(lexer);
HiveParser parser = new HiveParser(tokens);
HiveConf hiveConf = new HiveConf();
/*
* This enables usage of keywords as column names without adding backquotes. This is required for translating views
* created using spark engine as certain keywords in hive like timestamp are not keywords in spark. This will
* result in creation of views without backquoting those keywords. This will be removed when coral-spark becomes
* a supported LHS for translations.
*/
if (useSQL11ReservedKeywordsForIdentifier) {
hiveConf.set("hive.support.sql11.reserved.keywords", "false");
parser.setHiveConf(hiveConf);
}
parser.setTreeAdaptor(adaptor);
HiveParser.statement_return r = null;
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -172,6 +172,10 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException {
driver.run("CREATE VIEW IF NOT EXISTS view_schema_evolve_wrapper AS SELECT * from view_schema_evolve");
driver.run("ALTER TABLE schema_evolve CHANGE COLUMN b b array<struct<b1:string, b2:double, b3:int>>");

driver.run("CREATE OR REPLACE VIEW test.spark_created_view AS SELECT 1 AS `timestamp` FROM test.tableOne");
// Simulate the creation of view using spark by setting the corresponding table property of the view.
driver.run("ALTER VIEW test.spark_created_view SET TBLPROPERTIES ('spark.sql.create.version'='3.1.1')");

CommandProcessorResponse response = driver
.run("create function test_tableOneView_LessThanHundred as 'com.linkedin.coral.hive.hive2rel.CoralTestUDF'");
response = driver.run(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -20,6 +20,7 @@
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeClass;
Expand Down Expand Up @@ -233,4 +234,18 @@ public void testUnsupportedOuterExplodeWithoutColumns() {
SqlNode sqlNode = convert(input);
assertEquals(sqlNode.toString().toLowerCase().replaceAll("\n", " "), expected.toLowerCase());
}

/**
* Validates if coral-hive can translate views with unquoted reserved keywords when the views are created using spark.
*/
@Test
public void testUnquotedKeywordAsColumnName() {
HiveToRelConverter hiveToRelConverter = new HiveToRelConverter(msc);
Table table = msc.getTable("test", "spark_created_view");
// Remove the backquotes associated with the view text
String input = table.getViewExpandedText().replaceAll("`", "");
SqlNode sqlNode = hiveToRelConverter.toSqlNode(input, table);
// Validate if the translation is successful
assertEquals(sqlNode.toString().replaceAll("\\r?\\n", " "), table.getViewExpandedText());
}
}

0 comments on commit a83b648

Please sign in to comment.