From 9a7ffa7ce0dc8382302f773dfecee09b6de38add Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 12 Feb 2024 16:01:46 -0800 Subject: [PATCH] feat: Add support for reduction operation in set comparison queries (#93) This addresses issue #92 . --- .../textplan/converter/PlanPrinterVisitor.cpp | 22 ++- .../textplan/data/set-comparision-any.json | 126 ++++++++++++++++++ .../SubstraitPlanSubqueryRelationVisitor.cpp | 9 ++ .../parser/grammar/SubstraitPlanLexer.g4 | 1 + .../parser/grammar/SubstraitPlanParser.g4 | 3 +- .../parser/tests/TextPlanParserTest.cpp | 2 +- 6 files changed, 159 insertions(+), 4 deletions(-) create mode 100644 src/substrait/textplan/data/set-comparision-any.json diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index f6e3fbe7..8187c067 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -285,7 +285,7 @@ std::any PlanPrinterVisitor::visitSubquerySetComparison( const ::substrait::proto::Expression_Subquery_SetComparison& query) { std::stringstream result; if (query.has_left()) { - result << ANY_CAST(std::string, visitExpression(query.left())); + result << ANY_CAST(std::string, visitExpression(query.left())) << " "; } else { errorListener_->addError( "No expression defined for set comparison operation."); @@ -324,7 +324,25 @@ std::any PlanPrinterVisitor::visitSubquerySetComparison( errorListener_->addError("Did not recognize the subquery comparison."); return std::string("UNSUPPORTED SUBQUERY"); } - result << "ALL SUBQUERY "; + switch (query.reduction_op()) { + case ::substrait::proto:: + Expression_Subquery_SetComparison_ReductionOp_REDUCTION_OP_ANY: + result << "ANY "; + break; + case ::substrait::proto:: + Expression_Subquery_SetComparison_ReductionOp_REDUCTION_OP_ALL: + result << "ALL "; + break; + case ::substrait::proto:: + Expression_Subquery_SetComparison_ReductionOp_REDUCTION_OP_UNSPECIFIED: + case ::substrait::proto:: + Expression_Subquery_SetComparison_ReductionOp_Expression_Subquery_SetComparison_ReductionOp_INT_MIN_SENTINEL_DO_NOT_USE_: + case ::substrait::proto:: + Expression_Subquery_SetComparison_ReductionOp_Expression_Subquery_SetComparison_ReductionOp_INT_MAX_SENTINEL_DO_NOT_USE_: + errorListener_->addError("Did not recognize the subquery reduction op."); + return std::string("UNSUPPORTED SUBQUERY"); + } + result << "SUBQUERY "; if (query.has_right()) { const SymbolInfo* symbol = symbolTable_->lookupSymbolByParentQueryAndType( currentScope_->sourceLocation, diff --git a/src/substrait/textplan/data/set-comparision-any.json b/src/substrait/textplan/data/set-comparision-any.json new file mode 100644 index 00000000..35960148 --- /dev/null +++ b/src/substrait/textplan/data/set-comparision-any.json @@ -0,0 +1,126 @@ +# "CREATE TABLE NATION (N_REGIONKEY BIGINT NOT NULL)" "SELECT * FROM NATION WHERE N_REGIONKEY < ANY (SELECT N_REGIONKEY FROM NATION)" +{ + "extensionUris": [], + "extensions": [], + "relations": [{ + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [1] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_REGIONKEY"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + }, + "condition": { + "subquery": { + "setComparison": { + "comparisonOp": 3, + "reductionOp": 1, + "left": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "right": { + "project": { + "common": { + "emit": { + "outputMapping": [1] + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_REGIONKEY"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + } + } + } + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + } + }, + "names": ["N_REGIONKEY"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.cpp index 3c21777b..29e91976 100644 --- a/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.cpp @@ -1362,6 +1362,15 @@ SubstraitPlanSubqueryRelationVisitor::visitExpressionSetComparisonSubquery( ::substrait::proto::Expression, visitExpression(ctx->expression())); expr.mutable_subquery()->mutable_set_comparison()->set_comparison_op( comparisonToProto(ctx->COMPARISON()->getText())); + if (ctx->ANY() != nullptr) { + expr.mutable_subquery()->mutable_set_comparison()->set_reduction_op( + ::substrait::proto:: + Expression_Subquery_SetComparison_ReductionOp_REDUCTION_OP_ANY); + } else { + expr.mutable_subquery()->mutable_set_comparison()->set_reduction_op( + ::substrait::proto:: + Expression_Subquery_SetComparison_ReductionOp_REDUCTION_OP_ALL); + } // Next find the relation created in a previous step. auto symbol = symbolTable_->lookupSymbolByName(ctx->relation_ref()->getText()); diff --git a/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 b/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 index 75c9cb86..04371ab0 100644 --- a/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 +++ b/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 @@ -51,6 +51,7 @@ EXISTS: 'EXISTS'; UNIQUE: 'UNIQUE'; IN: 'IN'; ALL: 'ALL'; +ANY: 'ANY'; COMPARISON: 'EQ'|'NE'|'LT'|'GT'|'LE'|'GE'; VIRTUAL_TABLE: 'VIRTUAL_TABLE'; diff --git a/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 b/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 index 2a932a7d..791c5d6d 100644 --- a/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 +++ b/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 @@ -93,7 +93,7 @@ expression | SUBQUERY relation_ref # expressionScalarSubquery | expression_list IN SUBQUERY relation_ref # expressionInPredicateSubquery | (UNIQUE|EXISTS) IN SUBQUERY relation_ref # expressionSetPredicateSubquery - | expression COMPARISON ALL SUBQUERY relation_ref # expressionSetComparisonSubquery + | expression COMPARISON (ALL|ANY) SUBQUERY relation_ref # expressionSetComparisonSubquery ; expression_list @@ -235,5 +235,6 @@ simple_id | EMIT | NAMED | ALL + | ANY | COMPARISON ; diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index f9a0dca3..9884f2ea 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -1061,7 +1061,7 @@ std::vector getTestCases() { "1:0 → extraneous input 'relation' expecting {, " "'EXTENSION_SPACE', 'NAMED', 'SCHEMA', 'PIPELINES', 'FILTER', " "'GROUPING', 'MEASURE', 'SORT', 'COUNT', 'TYPE', 'EMIT', " - "'ALL', COMPARISON, 'SOURCE', 'ROOT', 'NULL', IDENTIFIER}", + "'ALL', 'ANY', COMPARISON, 'SOURCE', 'ROOT', 'NULL', IDENTIFIER}", "1:24 → mismatched input '{' expecting 'RELATION'", "1:9 → Unrecognized relation type: notyperelation", "1:9 → Internal error: Previously encountered symbol "