From 6439189094640ad48d8f12a9f73749640911805c Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 31 May 2023 12:57:23 -0700 Subject: [PATCH 01/33] feat: fields, schemas, and sources binary output for the textplan parser * Fields are now emitted in expressions completing expression handling. * Schemas and sources are also now implemented. --- .../textplan/parser/tests/TextPlanParserTest.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 901753c4..a501cf4b 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -748,16 +748,11 @@ std::vector getTestCases() { expression 123_i8 AS i32; expression 123_i8 AS i32 AS i64; })", - AllOf( - HasErrors({}), - AsBinaryPlan(EqualsProto<::substrait::proto::Plan>( - R"(relations { root { input { project { - expressions { cast { type { i32 {} } - input { literal { i8: 123 } } } } - expressions { cast { type { i64 {} } - input { cast { type { i32 {} } - input { literal { i8: 123 } } } } } } - } } } })"))), + AsBinaryPlan(Partially(EqualsProto<::substrait::proto::Plan>( + R"(relations { root { input { project { + expressions { cast { type { i32 {} } input { literal { i8: 123 } } } } + expressions { cast { type { i64 {} } input { cast { type { i32 {} } input { literal { i8: 123 } } } } } } + } } } })"))), }, { "test13-functions", From 628cec524ae86f51f1ce8f3a2dfb993710251110 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 1 Jun 2023 20:50:03 -0700 Subject: [PATCH 02/33] Pulled over from parser-part6. --- src/substrait/textplan/SymbolTablePrinter.cpp | 164 +- .../textplan/converter/CMakeLists.txt | 3 +- .../textplan/converter/PlanPrinterVisitor.cpp | 145 +- .../textplan/converter/PlanPrinterVisitor.h | 1 + .../tests/BinaryToTextPlanConversionTest.cpp | 44 +- src/substrait/textplan/data/tpch-plan01.json | 823 +++++++ src/substrait/textplan/data/tpch-plan03.json | 830 +++++++ src/substrait/textplan/data/tpch-plan05.json | 1066 ++++++++ src/substrait/textplan/data/tpch-plan06.json | 594 +++++ src/substrait/textplan/data/tpch-plan07.json | 1281 ++++++++++ src/substrait/textplan/data/tpch-plan09.json | 1092 +++++++++ src/substrait/textplan/data/tpch-plan10.json | 1045 ++++++++ src/substrait/textplan/data/tpch-plan13.json | 522 ++++ src/substrait/textplan/data/tpch-plan14.json | 812 +++++++ src/substrait/textplan/data/tpch-plan19.json | 2151 +++++++++++++++++ src/substrait/textplan/parser/CMakeLists.txt | 2 + src/substrait/textplan/parser/ParseText.cpp | 31 +- .../parser/SubstraitPlanRelationVisitor.cpp | 533 ++-- .../parser/SubstraitPlanRelationVisitor.h | 53 +- .../parser/SubstraitPlanTypeVisitor.cpp | 212 ++ .../parser/SubstraitPlanTypeVisitor.h | 48 + .../textplan/parser/SubstraitPlanVisitor.cpp | 93 +- .../textplan/parser/SubstraitPlanVisitor.h | 39 +- .../parser/grammar/SubstraitPlanLexer.g4 | 17 +- .../parser/grammar/SubstraitPlanParser.g4 | 61 +- .../parser/tests/TextPlanParserTest.cpp | 44 +- src/substrait/textplan/tests/CMakeLists.txt | 37 + .../textplan/tests/RoundtripTest.cpp | 119 + src/substrait/type/tests/TypeTest.cpp | 9 + 29 files changed, 11410 insertions(+), 461 deletions(-) create mode 100644 src/substrait/textplan/data/tpch-plan01.json create mode 100644 src/substrait/textplan/data/tpch-plan03.json create mode 100644 src/substrait/textplan/data/tpch-plan05.json create mode 100644 src/substrait/textplan/data/tpch-plan06.json create mode 100644 src/substrait/textplan/data/tpch-plan07.json create mode 100644 src/substrait/textplan/data/tpch-plan09.json create mode 100644 src/substrait/textplan/data/tpch-plan10.json create mode 100644 src/substrait/textplan/data/tpch-plan13.json create mode 100644 src/substrait/textplan/data/tpch-plan14.json create mode 100644 src/substrait/textplan/data/tpch-plan19.json create mode 100644 src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp create mode 100644 src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h create mode 100644 src/substrait/textplan/tests/RoundtripTest.cpp diff --git a/src/substrait/textplan/SymbolTablePrinter.cpp b/src/substrait/textplan/SymbolTablePrinter.cpp index c0217f84..9e87ad9c 100644 --- a/src/substrait/textplan/SymbolTablePrinter.cpp +++ b/src/substrait/textplan/SymbolTablePrinter.cpp @@ -74,83 +74,9 @@ void localFileToText( } std::string typeToText(const ::substrait::proto::Type& type) { - switch (type.kind_case()) { - case ::substrait::proto::Type::kBool: - if (type.bool_().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "bool?"; - } - return "bool"; - case ::substrait::proto::Type::kI8: - if (type.i8().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "i8?"; - } - return "i8"; - case ::substrait::proto::Type::kI16: - if (type.i16().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "i16?"; - } - return "i16"; - case ::substrait::proto::Type::kI32: - if (type.i32().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "i32?"; - } - return "i32"; - case ::substrait::proto::Type::kI64: - if (type.i64().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "i64?"; - } - return "i64"; - case ::substrait::proto::Type::kFp32: - if (type.fp32().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "fp32?"; - } - return "fp32"; - case ::substrait::proto::Type::kFp64: - if (type.fp64().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "fp64?"; - } - return "fp64"; - case ::substrait::proto::Type::kString: - if (type.string().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "string?"; - } - return "string"; - case ::substrait::proto::Type::kDecimal: - if (type.string().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "decimal?"; - } - return "decimal"; - case ::substrait::proto::Type::kVarchar: - if (type.varchar().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "varchar?"; - } - return "varchar"; - case ::substrait::proto::Type::kFixedChar: - if (type.fixed_char().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "fixedchar?"; - } - return "fixedchar"; - case ::substrait::proto::Type::kDate: - if (type.date().nullability() == - ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return "date?"; - } - return "date"; - case ::substrait::proto::Type::KIND_NOT_SET: - default: - return "UNSUPPORTED_TYPE"; - } + SymbolTable symbolTable; + PlanPrinterVisitor visitor(symbolTable); + return visitor.typeToText(type); }; std::string relationToText( @@ -386,6 +312,88 @@ std::string outputFunctionsSection(const SymbolTable& symbolTable) { return text.str(); } +void outputExtensionSpacesToBinaryPlan( + const SymbolTable& symbolTable, + ::substrait::proto::Plan* plan) { + for (const SymbolInfo& info : symbolTable) { + if (info.type != SymbolType::kExtensionSpace) { + continue; + } + + auto extensionData = + ANY_CAST(std::shared_ptr, info.blob); + auto uri = plan->add_extension_uris(); + uri->set_uri(info.name); + uri->set_extension_uri_anchor(extensionData->anchorReference); + } +} + +void outputFunctionsToBinaryPlan( + const SymbolTable& symbolTable, + ::substrait::proto::Plan* plan) { + std::map spaceNames; + std::set usedSpaces; + + // Look at the existing spaces. + for (const SymbolInfo& info : symbolTable) { + if (info.type != SymbolType::kExtensionSpace) { + continue; + } + + auto extensionData = + ANY_CAST(std::shared_ptr, info.blob); + spaceNames.insert( + std::make_pair(extensionData->anchorReference, info.name)); + } + + // Find any spaces that are used but undefined. + for (const SymbolInfo& info : symbolTable) { + if (info.type != SymbolType::kFunction) { + continue; + } + + auto extension = ANY_CAST(std::shared_ptr, info.blob); + if (extension->extensionUriReference.has_value()) { + usedSpaces.insert(extension->extensionUriReference.value()); + } + } + + // Output the extensions by space in the order they were encountered. + for (const uint32_t space : usedSpaces) { + for (const SymbolInfo& info : symbolTable) { + if (info.type != SymbolType::kFunction) { + continue; + } + + auto functionData = ANY_CAST(std::shared_ptr, info.blob); + if (functionData->extensionUriReference != space) { + continue; + } + + auto func = plan->add_extensions()->mutable_extension_function(); + func->set_function_anchor(functionData->anchor); + func->set_name(functionData->name); + + if (spaceNames.find(space) != spaceNames.end()) { + func->set_extension_uri_reference(space); + } + } + } + + for (const SymbolInfo& info : symbolTable) { + if (info.type != SymbolType::kFunction) { + continue; + } + + auto functionData = ANY_CAST(std::shared_ptr, info.blob); + if (!functionData->extensionUriReference.has_value()) { + auto func = plan->add_extensions()->mutable_extension_function(); + func->set_function_anchor(functionData->anchor); + func->set_name(functionData->name); + } + } +} + } // namespace std::string SymbolTablePrinter::outputToText(const SymbolTable& symbolTable) { @@ -631,6 +639,8 @@ void SymbolTablePrinter::addInputsToRelation( ::substrait::proto::Plan SymbolTablePrinter::outputToBinaryPlan( const SymbolTable& symbolTable) { ::substrait::proto::Plan plan; + outputExtensionSpacesToBinaryPlan(symbolTable, &plan); + outputFunctionsToBinaryPlan(symbolTable, &plan); for (const SymbolInfo& info : symbolTable) { if (info.type != SymbolType::kRelation) { continue; diff --git a/src/substrait/textplan/converter/CMakeLists.txt b/src/substrait/textplan/converter/CMakeLists.txt index 182225fa..9ac41c27 100644 --- a/src/substrait/textplan/converter/CMakeLists.txt +++ b/src/substrait/textplan/converter/CMakeLists.txt @@ -18,7 +18,8 @@ add_library(substrait_textplan_converter ${TEXTPLAN_SRCS}) target_link_libraries( substrait_textplan_converter substrait_common substrait_expression - substrait_proto symbol_table error_listener) + substrait_proto symbol_table error_listener + date::date) if(${SUBSTRAIT_CPP_BUILD_TESTING}) add_subdirectory(tests) diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index 6e791255..15ddcee5 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -2,10 +2,13 @@ #include "substrait/textplan/converter/PlanPrinterVisitor.h" +#include #include #include #include +#include "date/date.h" +#include "fmt/format.h" #include "substrait/expression/DecimalLiteral.h" #include "substrait/proto/ProtoUtils.h" #include "substrait/proto/algebra.pb.h" @@ -38,6 +41,19 @@ std::string stringEscape(std::string_view str) { return result.str(); } +std::string invocationToString( + ::substrait::proto::AggregateFunction_AggregationInvocation invocation) { + switch (invocation) { + case ::substrait::proto::AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_ALL: + return "all"; + case ::substrait::proto::AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_DISTINCT: + return "distinct"; + case ::substrait::proto::AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED: + default: + return "unspecified"; + } +} + } // namespace std::string PlanPrinterVisitor::printRelation(const SymbolInfo& symbol) { @@ -68,6 +84,11 @@ std::string PlanPrinterVisitor::printRelation(const SymbolInfo& symbol) { return text.str(); } +std::string PlanPrinterVisitor::typeToText( + const ::substrait::proto::Type& type) { + return ANY_CAST(std::string, visitType(type)); +} + std::string PlanPrinterVisitor::lookupFieldReference(uint32_t field_reference) { if (*currentScope_ != SymbolInfo::kUnknown) { auto relationData = @@ -110,62 +131,85 @@ std::any PlanPrinterVisitor::visitType(const ::substrait::proto::Type& type) { case ::substrait::proto::Type::kBool: if (type.bool_().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_bool"); + return std::string("bool?"); } return std::string("bool"); case ::substrait::proto::Type::kI8: if (type.i8().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_i8"); + return std::string("i8?"); } return std::string("i8"); case ::substrait::proto::Type::kI16: if (type.i16().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_i16"); + return std::string("i16?"); } return std::string("i16"); case ::substrait::proto::Type::kI32: if (type.i32().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_i32"); + return std::string("i32?"); } return std::string("i32"); case ::substrait::proto::Type::kI64: if (type.i64().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_i64"); + return std::string("i64?"); } return std::string("i64"); case ::substrait::proto::Type::kFp32: if (type.fp32().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_fp32"); + return std::string("fp32?"); } return std::string("fp32"); case ::substrait::proto::Type::kFp64: if (type.fp64().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_fp64"); + return std::string("fp64?"); } return std::string("fp64"); case ::substrait::proto::Type::kString: if (type.string().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_string"); + return std::string("string?"); } return std::string("string"); - case ::substrait::proto::Type::kDecimal: - if (type.string().nullability() == + case ::substrait::proto::Type::kDecimal: { + std::stringstream result; + result << "decimal"; + if (type.decimal().nullability() == ::substrait::proto::Type::NULLABILITY_NULLABLE) { - return std::string("opt_decimal"); + result << '?'; } - return std::string("decimal"); - case ::substrait::proto::Type::kVarchar: - return std::string("varchar"); - case ::substrait::proto::Type::kFixedChar: - return std::string("fixedchar"); + result << "<" << type.decimal().precision() << ","; + result << type.decimal().scale() << ">"; + return result.str(); + } + case ::substrait::proto::Type::kVarchar: { + std::stringstream result; + result << "varchar"; + if (type.varchar().nullability()) { + result << "?"; + } + result << "<" << type.varchar().length() << ">"; + return result.str(); + } + case ::substrait::proto::Type::kFixedChar: { + std::stringstream result; + result << "fixedchar"; + if (type.fixed_char().nullability()) { + result << "?"; + } + result << "<" << type.fixed_char().length() << ">"; + return result.str(); + } case ::substrait::proto::Type::kDate: + if (type.date().nullability() == + ::substrait::proto::Type::NULLABILITY_NULLABLE) { + return std::string("date?"); + } return std::string("date"); case ::substrait::proto::Type::KIND_NOT_SET: errorListener_->addError( @@ -226,12 +270,9 @@ std::any PlanPrinterVisitor::visitLiteral( text << literal.fp64() << "_fp64"; break; case ::substrait::proto::Expression::Literal ::kDate: { - // TODO -- Format this as a date instead of a delta since an epoch. - if (literal.date() >= 0) { - text << "\"epoch+" << literal.date() << " days\"_date"; - } else { - text << "\"epoch" << literal.date() << " days\"_date"; - } + auto refDate = date::sys_days{}; + date::sys_days newDate = refDate + date::days{literal.date()}; + text << '"' << date::year_month_day{newDate} << "\"_date"; break; } case ::substrait::proto::Expression::Literal::kString: @@ -245,43 +286,17 @@ std::any PlanPrinterVisitor::visitLiteral( literal.ShortDebugString()); return std::string("UNSUPPORTED_LITERAL_TYPE"); case ::substrait::proto::Expression_Literal::kIntervalYearToMonth: { - text << "{"; - bool hasPreviousText = false; - if (literal.interval_year_to_month().years() != 0) { - text << literal.interval_year_to_month().years() << "years"; - hasPreviousText = true; - } - if (literal.interval_year_to_month().months() != 0) { - if (hasPreviousText) { - text << ", "; - } - text << literal.interval_year_to_month().months() << "months"; - } - text << "}_interval_year"; // TODO - Change spec to better name. + text << "{" << literal.interval_year_to_month().years() << "_years" + << ", " << literal.interval_year_to_month().months() << "_months" + << "}_interval_year"; // TODO - Change spec to better name. break; } case ::substrait::proto::Expression_Literal::kIntervalDayToSecond: { - text << "{"; - bool hasPreviousText = false; - if (literal.interval_day_to_second().days() != 0) { - text << literal.interval_day_to_second().days() << "days"; - hasPreviousText = true; - } - if (literal.interval_day_to_second().seconds() != 0) { - if (hasPreviousText) { - text << ", "; - } - text << literal.interval_day_to_second().seconds() << "seconds"; - hasPreviousText = true; - } - if (literal.interval_day_to_second().microseconds() != 0) { - if (hasPreviousText) { - text << ", "; - } - text << literal.interval_day_to_second().microseconds() - << "microseconds"; - } - text << "}_interval_day"; // TODO - Change spec to better name. + text << "{" << literal.interval_day_to_second().days() << "_days" + << ", " << literal.interval_day_to_second().seconds() << "_seconds" + << ", " << literal.interval_day_to_second().microseconds() + << "_microseconds" + << "}_interval_day"; // TODO - Change spec to better name. break; } case ::substrait::proto::Expression_Literal::kFixedChar: @@ -592,11 +607,6 @@ std::any PlanPrinterVisitor::visitReferenceSegment( std::any PlanPrinterVisitor::visitExpression( const ::substrait::proto::Expression& expression) { - if (expression.rex_type_case() == - ::substrait::proto::Expression::RexTypeCase::REX_TYPE_NOT_SET) { - // TODO -- Remove this check after expressions are finished. - return std::string("EXPR-NOT-YET-IMPLEMENTED"); - } return BasePlanProtoVisitor::visitExpression(expression); } @@ -681,7 +691,7 @@ std::any PlanPrinterVisitor::visitFilterRelation( const ::substrait::proto::FilterRel& relation) { std::stringstream text; if (relation.has_condition()) { - text << " condition " + text << " filter " << ANY_CAST(std::string, visitExpression(relation.condition())) << ";\n"; } @@ -716,9 +726,14 @@ std::any PlanPrinterVisitor::visitAggregateRelation( << ANY_CAST(std::string, visitAggregateFunction(measure.measure())) << ";\n"; if (measure.has_filter()) { - text << " filter " + - ANY_CAST(std::string, visitExpression(measure.filter())) - << ";\n"; + text << " filter " + << ANY_CAST(std::string, visitExpression(measure.filter())) << ";\n"; + } + if (measure.measure().invocation() != + ::substrait::proto:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED) { + text << " invocation " + << invocationToString(measure.measure().invocation()) << ";\n"; } text << " }\n"; } diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.h b/src/substrait/textplan/converter/PlanPrinterVisitor.h index 825b3b4b..2be16b09 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.h +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.h @@ -31,6 +31,7 @@ class PlanPrinterVisitor : public BasePlanProtoVisitor { }; std::string printRelation(const SymbolInfo& symbol); + std::string typeToText(const ::substrait::proto::Type& type); private: std::string lookupFieldReference(uint32_t field_reference); diff --git a/src/substrait/textplan/converter/tests/BinaryToTextPlanConversionTest.cpp b/src/substrait/textplan/converter/tests/BinaryToTextPlanConversionTest.cpp index e97b2d81..9bc1f689 100644 --- a/src/substrait/textplan/converter/tests/BinaryToTextPlanConversionTest.cpp +++ b/src/substrait/textplan/converter/tests/BinaryToTextPlanConversionTest.cpp @@ -408,7 +408,7 @@ std::vector getTestCases() { } filter relation filter { - condition functionref#4(field#2, 0.07_fp64); + filter functionref#4(field#2, 0.07_fp64); })"))), }, { @@ -461,7 +461,47 @@ std::vector getTestCases() { } filter relation filter { - condition functionref#4(field#2, 0.07_fp64); + filter functionref#4(field#2, 0.07_fp64); + })"))), + }, + { + "cast expression", + R"(relations: { + root: { + input: { + filter: { + condition: { + cast: { + type: { + fixed_char: { + length: 10, + type_variation_reference: 0, + nullability: NULLABILITY_REQUIRED + } + }, + input: { + literal: { + fixed_char: "HOUSEHOLD", + nullable: false, + type_variation_reference: 0 + } + }, + failure_behavior: FAILURE_BEHAVIOR_UNSPECIFIED + } + } + } + } + } + })", + AllOf( + HasSymbols({"filter", "root"}), + WhenSerialized(EqSquashingWhitespace( + R"(pipelines { + filter -> root; + } + + filter relation filter { + filter "HOUSEHOLD"_fixedchar<9> AS fixedchar<10>; })"))), }, { diff --git a/src/substrait/textplan/data/tpch-plan01.json b/src/substrait/textplan/data/tpch-plan01.json new file mode 100644 index 00000000..bb975373 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan01.json @@ -0,0 +1,823 @@ +# select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice smoke.sh tpch_smoke.sh (1 - l_discount)) as sum_disc_price, sum(l_extendedprice smoke.sh tpch_smoke.sh (1 - l_discount) smoke.sh tpch_smoke.sh (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order from lineitem where l_shipdate <= date '1998-12-01' - interval '120' day (3) group by l_returnflag, l_linestatus order by l_returnflag, l_linestatus +{ + "extensionUris": [{ + "extensionUriAnchor": 3, + "uri": "/functions_aggregate_generic.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_datetime.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "lte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 1, + "name": "subtract:date_day" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 2, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 3, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 4, + "name": "add:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 5, + "name": "sum:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 6, + "name": "avg:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 7, + "name": "count:" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16, 17, 18, 19, 20, 21, 22] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "date": 10561, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "intervalDayToSecond": { + "days": 120, + "seconds": 0, + "microseconds": 0 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }, { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 7, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "names": ["L_RETURNFLAG", "L_LINESTATUS", "SUM_QTY", "SUM_BASE_PRICE", "SUM_DISC_PRICE", "SUM_CHARGE", "AVG_QTY", "AVG_PRICE", "AVG_DISC", "COUNT_ORDER"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan03.json b/src/substrait/textplan/data/tpch-plan03.json new file mode 100644 index 00000000..ca718207 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan03.json @@ -0,0 +1,830 @@ +# select l.l_orderkey, sum(l.l_extendedprice smoke.sh tpch_smoke.sh (1 - l.l_discount)) as revenue, o.o_orderdate, o.o_shippriority from "customer" c, "orders" o, "lineitem" l where c.c_mktsegment = 'HOUSEHOLD' and c.c_custkey = o.o_custkey and l.l_orderkey = o.o_orderkey and o.o_orderdate < date '1995-03-25' and l.l_shipdate > date '1995-03-25' group by l.l_orderkey, o.o_orderdate, o.o_shippriority order by revenue desc, o.o_orderdate limit 10 +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "lt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "gt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 4, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 6, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "fetch": { + "common": { + "direct": { + } + }, + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [4, 5, 6, 7] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [33, 34, 35, 36] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 117, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["CUSTOMER"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "HOUSEHOLD", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 9214, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 27 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 9214, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 15 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 23 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "offset": "0", + "count": "10" + } + }, + "names": ["L_ORDERKEY", "REVENUE", "O_ORDERDATE", "O_SHIPPRIORITY"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan05.json b/src/substrait/textplan/data/tpch-plan05.json new file mode 100644 index 00000000..67c02049 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan05.json @@ -0,0 +1,1066 @@ +# select n.n_name, sum(l.l_extendedprice smoke.sh tpch_smoke.sh (1 - l.l_discount)) as revenue from "customer" c, "orders" o, "lineitem" l, "supplier" s, "nation" n, "region" r where c.c_custkey = o.o_custkey and l.l_orderkey = o.o_orderkey and l.l_suppkey = s.s_suppkey and c.c_nationkey = s.s_nationkey and s.s_nationkey = n.n_nationkey and n.n_regionkey = r.r_regionkey and r.r_name = 'EUROPE' and o.o_orderdate >= date '1997-01-01' and o.o_orderdate < date '1997-01-01' + interval '1' year group by n.n_name order by revenue desc +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "gte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "lt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "add:date_year" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 6, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 7, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [47, 48] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 117, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["CUSTOMER"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["R_REGIONKEY", "R_NAME", "R_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["REGION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 19 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 33 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 36 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 36 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 40 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 42 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 44 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 45 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "EUROPE", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 9862, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "date": 9862, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "intervalYearToMonth": { + "years": 1, + "months": 0 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 41 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 23 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 7, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }] + } + }, + "names": ["N_NAME", "REVENUE"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan06.json b/src/substrait/textplan/data/tpch-plan06.json new file mode 100644 index 00000000..9d094213 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan06.json @@ -0,0 +1,594 @@ +# select sum(l_extendedprice smoke.sh tpch_smoke.sh l_discount) as revenue from "lineitem" where l_shipdate >= date '1997-01-01' and l_shipdate < date '1997-01-01' + interval '1' year and l_discount between 0.03 - 0.01 and 0.03 + 0.01 and l_quantity < 24 +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "gte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 2, + "name": "lt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 3, + "name": "add:date_year" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "gte:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 6, + "name": "lte:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 7, + "name": "add:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 8, + "name": "lt:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 9, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 10, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 9862, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "date": 9862, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "intervalYearToMonth": { + "years": 1, + "months": 0 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "decimal": { + "scale": 2, + "precision": 4, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "decimal": { + "value": "AwAAAAAAAAAAAAAAAAAAAA==", + "precision": 3, + "scale": 2 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "decimal": { + "value": "AQAAAAAAAAAAAAAAAAAAAA==", + "precision": 3, + "scale": 2 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 7, + "args": [], + "outputType": { + "decimal": { + "scale": 2, + "precision": 4, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "decimal": { + "value": "AwAAAAAAAAAAAAAAAAAAAA==", + "precision": 3, + "scale": 2 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "decimal": { + "value": "AQAAAAAAAAAAAAAAAAAAAA==", + "precision": 3, + "scale": 2 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 8, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 24, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 9, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 10, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "names": ["REVENUE"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan07.json b/src/substrait/textplan/data/tpch-plan07.json new file mode 100644 index 00000000..62d2739a --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan07.json @@ -0,0 +1,1281 @@ +# select supp_nation, cust_nation, l_year, sum(volume) as revenue from ( select n1.n_name as supp_nation, n2.n_name as cust_nation, extract(year from l.l_shipdate) as l_year, l.l_extendedprice smoke.sh tpch_smoke.sh (1 - l.l_discount) as volume from "supplier" s, "lineitem" l, "orders" o, "customer" c, "nation" n1, "nation" n2 where s.s_suppkey = l.l_suppkey and o.o_orderkey = l.l_orderkey and c.c_custkey = o.o_custkey and s.s_nationkey = n1.n_nationkey and c.c_nationkey = n2.n_nationkey and ( (n1.n_name = 'EGYPT' and n2.n_name = 'UNITED STATES') or (n1.n_name = 'UNITED STATES' and n2.n_name = 'EGYPT') ) and l.l_shipdate between date '1995-01-01' and date '1996-12-31' ) as shipping group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 2, + "name": "or:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "gte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "lte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 5, + "name": "extract:req_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 6, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 7, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 8, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [48, 49, 50, 51] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 117, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["CUSTOMER"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 23 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 32 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 24 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 40 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 35 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 44 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 41 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "EGYPT", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 45 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "UNITED STATES", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 41 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "UNITED STATES", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 45 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "EGYPT", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 9131, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 9861, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 41 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 45 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "enum": "YEAR" + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 7, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 13 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 8, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "names": ["SUPP_NATION", "CUST_NATION", "L_YEAR", "REVENUE"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan09.json b/src/substrait/textplan/data/tpch-plan09.json new file mode 100644 index 00000000..c376b73d --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan09.json @@ -0,0 +1,1092 @@ +# select nation, o_year, sum(amount) as sum_profit from ( select n.n_name as nation, extract(year from o.o_orderdate) as o_year, l.l_extendedprice smoke.sh tpch_smoke.sh (1 - l.l_discount) - ps.ps_supplycost smoke.sh tpch_smoke.sh l.l_quantity as amount from "part" p, "supplier" s, "lineitem" l, "partsupp" ps, "orders" o, "nation" n where s.s_suppkey = l.l_suppkey and ps.ps_suppkey = l.l_suppkey and ps.ps_partkey = l.l_partkey and p.p_partkey = l.l_partkey and o.o_orderkey = l.l_orderkey and s.s_nationkey = n.n_nationkey and p.p_name like '%yellow%' ) as profit group by nation, o_year order by nation, o_year desc +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 5, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "like:vchar_vchar" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 3, + "name": "extract:req_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 4, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 5, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 6, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [50, 51, 52] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["P_PARTKEY", "P_NAME", "P_MFGR", "P_BRAND", "P_TYPE", "P_SIZE", "P_CONTAINER", "P_RETAILPRICE", "P_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 23, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PART"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 199, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PARTSUPP"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 18 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 33 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 18 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 32 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 37 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 46 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "%yellow%", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 47 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "enum": "YEAR" + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 41 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 35 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 20 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }] + } + }, + "names": ["NATION", "O_YEAR", "SUM_PROFIT"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan10.json b/src/substrait/textplan/data/tpch-plan10.json new file mode 100644 index 00000000..c2e716be --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan10.json @@ -0,0 +1,1045 @@ +# select c.c_custkey, c.c_name, sum(l.l_extendedprice smoke.sh tpch_smoke.sh (1 - l.l_discount)) as revenue, c.c_acctbal, n.n_name, c.c_address, c.c_phone, c.c_comment from "customer" c, "orders" o, "lineitem" l, "nation" n where c.c_custkey = o.o_custkey and l.l_orderkey = o.o_orderkey and o.o_orderdate >= date '1994-03-01' and o.o_orderdate < date '1994-03-01' + interval '3' month and l.l_returnflag = 'R' and c.c_nationkey = n.n_nationkey group by c.c_custkey, c.c_name, c.c_acctbal, c.c_phone, n.n_name, c.c_address, c.c_comment order by revenue desc limit 20 +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "gte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "lt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "add:date_year" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 6, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 7, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "fetch": { + "common": { + "direct": { + } + }, + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [8, 9, 10, 11, 12, 13, 14, 15] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [37, 38, 39, 40, 41, 42, 43, 44] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 117, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["CUSTOMER"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 8825, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "date": 8825, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "intervalYearToMonth": { + "years": 0, + "months": 3 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 25 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "R", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 33 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 34 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 23 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 7, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }] + } + }, + "offset": "0", + "count": "20" + } + }, + "names": ["C_CUSTKEY", "C_NAME", "REVENUE", "C_ACCTBAL", "N_NAME", "C_ADDRESS", "C_PHONE", "C_COMMENT"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan13.json b/src/substrait/textplan/data/tpch-plan13.json new file mode 100644 index 00000000..5c1d750a --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan13.json @@ -0,0 +1,522 @@ +# select c_count, count(*) as custdist from ( select c.c_custkey, count(o.o_orderkey) from "customer" c left outer join "orders" o on c.c_custkey = o.o_custkey and o.o_comment not like '%special%requests%' group by c.c_custkey ) as orders (c_custkey, c_count) group by c_count order by custdist desc, c_count desc +{ + "extensionUris": [{ + "extensionUriAnchor": 4, + "uri": "/functions_aggregate_generic.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 2, + "name": "not:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "like:vchar_vchar" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 4, + "name": "count:any" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "count:" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [2, 3] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [2] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [17, 18] + } + }, + "input": { + "join": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 117, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["CUSTOMER"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + }, + "expression": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "%special%requests%", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }, + "type": "JOIN_TYPE_LEFT" + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 4, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [], + "options": [] + } + }] + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }] + } + }, + "names": ["C_COUNT", "CUSTDIST"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan14.json b/src/substrait/textplan/data/tpch-plan14.json new file mode 100644 index 00000000..eb81caec --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan14.json @@ -0,0 +1,812 @@ +# select 100.00 smoke.sh tpch_smoke.sh sum(case when p.p_type like 'PROMO%' then l.l_extendedprice smoke.sh tpch_smoke.sh (1 - l.l_discount) else 0 end) / sum(l.l_extendedprice smoke.sh tpch_smoke.sh (1 - l.l_discount)) as promo_revenue from "lineitem" l, "part" p where l.l_partkey = p.p_partkey and l.l_shipdate >= date '1994-08-01' and l.l_shipdate < date '1994-08-01' + interval '1' month +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 5, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "gte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "lt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "add:date_year" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "like:vchar_vchar" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 6, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 7, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 8, + "name": "sum:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 9, + "name": "divide:dec_dec" + } + }], + "relations": [{ + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [2] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [25, 26] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["P_PARTKEY", "P_NAME", "P_MFGR", "P_BRAND", "P_TYPE", "P_SIZE", "P_CONTAINER", "P_RETAILPRICE", "P_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 23, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PART"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 8978, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "date": 8978, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "intervalYearToMonth": { + "years": 0, + "months": 1 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "ifThen": { + "ifs": [{ + "if": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 20 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "PROMO%", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + }, + "then": { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 7, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }], + "else": { + "literal": { + "decimal": { + "value": "AAAAAAAAAAAAAAAAAAAAAA==", + "precision": 19, + "scale": 0 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + } + }, { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 7, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 8, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 8, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 9, + "args": [], + "outputType": { + "decimal": { + "scale": 2, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "decimal": { + "scale": 2, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "literal": { + "decimal": { + "value": "ECcAAAAAAAAAAAAAAAAAAA==", + "precision": 5, + "scale": 2 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "names": ["PROMO_REVENUE"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan19.json b/src/substrait/textplan/data/tpch-plan19.json new file mode 100644 index 00000000..f9ef7e41 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan19.json @@ -0,0 +1,2151 @@ +# select sum(l.l_extendedprice* (1 - l.l_discount)) as revenue from "lineitem" l, "part" p where ( p.p_partkey = l.l_partkey and p.p_brand = 'Brand#41' and p.p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') and l.l_quantity >= 2 and l.l_quantity <= 2 + 10 and p.p_size between 1 and 5 and l.l_shipmode in ('AIR', 'AIR REG') and l.l_shipinstruct = 'DELIVER IN PERSON' ) or ( p.p_partkey = l.l_partkey and p.p_brand = 'Brand#13' and p.p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') and l.l_quantity >= 14 and l.l_quantity <= 14 + 10 and p.p_size between 1 and 10 and l.l_shipmode in ('AIR', 'AIR REG') and l.l_shipinstruct = 'DELIVER IN PERSON' ) or ( p.p_partkey = l.l_partkey and p.p_brand = 'Brand#55' and p.p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') and l.l_quantity >= 23 and l.l_quantity <= 23 + 10 and p.p_size between 1 and 15 and l.l_shipmode in ('AIR', 'AIR REG') and l.l_shipinstruct = 'DELIVER IN PERSON' ) +{ + "extensionUris": [{ + "extensionUriAnchor": 3, + "uri": "/functions_arithmetic.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "or:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 1, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 2, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 3, + "name": "gte:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 4, + "name": "lte:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 5, + "name": "add:i32_i32" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 6, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 7, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 8, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [25] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["P_PARTKEY", "P_NAME", "P_MFGR", "P_BRAND", "P_TYPE", "P_SIZE", "P_CONTAINER", "P_RETAILPRICE", "P_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 23, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PART"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 19 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "Brand#41", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "SM CASE", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "SM BOX", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "SM PACK", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "SM PKG", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 10, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 5, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 14 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "AIR", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 14 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "AIR REG", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 13 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "DELIVER IN PERSON", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 19 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "Brand#13", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "MED BAG", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "MED BOX", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "MED PKG", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "MED PACK", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 14, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "i32": 14, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 10, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 10, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 14 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "AIR", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 14 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "AIR REG", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 13 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "DELIVER IN PERSON", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 19 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "Brand#55", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "LG CASE", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "LG BOX", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "LG PACK", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "LG PKG", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 23, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "i32": 23, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 10, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 15, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 14 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "AIR", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 14 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "AIR REG", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 13 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "DELIVER IN PERSON", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 7, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 8, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "names": ["REVENUE"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/parser/CMakeLists.txt b/src/substrait/textplan/parser/CMakeLists.txt index dffd4bad..498d572c 100644 --- a/src/substrait/textplan/parser/CMakeLists.txt +++ b/src/substrait/textplan/parser/CMakeLists.txt @@ -10,6 +10,8 @@ add_library( SubstraitPlanPipelineVisitor.h SubstraitPlanRelationVisitor.cpp SubstraitPlanRelationVisitor.h + SubstraitPlanTypeVisitor.cpp + SubstraitPlanTypeVisitor.h ParseText.cpp ParseText.h SubstraitParserErrorListener.cpp) diff --git a/src/substrait/textplan/parser/ParseText.cpp b/src/substrait/textplan/parser/ParseText.cpp index 285c10de..bfea7682 100644 --- a/src/substrait/textplan/parser/ParseText.cpp +++ b/src/substrait/textplan/parser/ParseText.cpp @@ -2,10 +2,11 @@ #include "ParseText.h" +#include #include #include -#include #include +#include #include "SubstraitPlanLexer/SubstraitPlanLexer.h" #include "SubstraitPlanParser/SubstraitPlanParser.h" @@ -37,22 +38,28 @@ antlr4::ANTLRInputStream loadTextString(std::string_view text) { } ParseResult parseStream(antlr4::ANTLRInputStream stream) { + io::substrait::textplan::SubstraitParserErrorListener errorListener; + SubstraitPlanLexer lexer(&stream); + lexer.removeErrorListeners(); + lexer.addErrorListener(&errorListener); antlr4::CommonTokenStream tokens(&lexer); tokens.fill(); SubstraitPlanParser parser(&tokens); parser.removeErrorListeners(); - io::substrait::textplan::SubstraitParserErrorListener parserErrorListener; - parser.addErrorListener(&parserErrorListener); + parser.addErrorListener(&errorListener); auto* tree = parser.plan(); - auto visitor = std::make_shared(); + SymbolTable visitorSymbolTable; + auto visitorErrorListener = std::make_shared(); + auto visitor = std::make_shared( + visitorSymbolTable, visitorErrorListener); try { visitor->visitPlan(tree); } catch (...) { - parserErrorListener.syntaxError( + errorListener.syntaxError( &parser, nullptr, /*line=*/1, @@ -66,7 +73,7 @@ ParseResult parseStream(antlr4::ANTLRInputStream stream) { try { pipelineVisitor->visitPlan(tree); } catch (...) { - parserErrorListener.syntaxError( + errorListener.syntaxError( &parser, nullptr, /*line=*/1, @@ -79,8 +86,16 @@ ParseResult parseStream(antlr4::ANTLRInputStream stream) { *pipelineVisitor->getSymbolTable(), pipelineVisitor->getErrorListener()); try { relationVisitor->visitPlan(tree); + } catch (std::invalid_argument ex) { + errorListener.syntaxError( + &parser, + nullptr, + /*line=*/1, + /*charPositionInLine=*/1, + ex.what(), + std::current_exception()); } catch (...) { - parserErrorListener.syntaxError( + errorListener.syntaxError( &parser, nullptr, /*line=*/1, @@ -92,7 +107,7 @@ ParseResult parseStream(antlr4::ANTLRInputStream stream) { auto finalSymbolTable = relationVisitor->getSymbolTable(); return { *finalSymbolTable, - parserErrorListener.getErrorMessages(), + errorListener.getErrorMessages(), relationVisitor->getErrorListener()->getErrorMessages()}; } diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 794ba660..9537fc36 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -9,6 +9,7 @@ #include #include "SubstraitPlanParser/SubstraitPlanParser.h" +#include "SubstraitPlanTypeVisitor.h" #include "absl/strings/numbers.h" #include "date/tz.h" #include "substrait/expression/DecimalLiteral.h" @@ -25,6 +26,10 @@ namespace io::substrait::textplan { namespace { +std::string kAggregationPhasePrefix = "aggregationphase"; +std::string kAggregationInvocationPrefix = "aggregationinvocation"; +std::string kSortDirectionPrefix = "sortdirection"; + enum RelationFilterBehavior { kDefault = 0, kBestEffort = 1, @@ -346,6 +351,10 @@ std::any SubstraitPlanRelationVisitor::visitRelationFilter( "specified."); break; } + if (result.type() != typeid(::substrait::proto::Expression)) { + // MEGAHACK - expression not of the right type needs to be returned + return defaultResult(); + } *parentRelationData->relation.mutable_filter()->mutable_condition() = ANY_CAST(::substrait::proto::Expression, result); } else { @@ -387,7 +396,7 @@ std::any SubstraitPlanRelationVisitor::visitRelationUsesSchema( schema->add_names(sym.name); auto typeText = ANY_CAST(std::string, sym.blob); // TODO -- Use the location of the schema item for errors. - auto typeProto = textToTypeProto(ctx->getStart(), typeText); + auto typeProto = textToTypeProto(ctx, typeText); if (typeProto.kind_case() != ::substrait::proto::Type::KIND_NOT_SET) { *schema->mutable_struct_()->add_types() = typeProto; } @@ -434,56 +443,279 @@ std::any SubstraitPlanRelationVisitor::visitRelationExpression( return defaultResult(); } -std::any SubstraitPlanRelationVisitor::visitExpression( - SubstraitPlanParser::ExpressionContext* ctx) { - if (auto* funcUseCtx = - dynamic_cast( - ctx)) { - return visitExpressionFunctionUse(funcUseCtx); - } else if ( - auto* constantCtx = - dynamic_cast(ctx)) { - return visitExpressionConstant(constantCtx); - } else if ( - auto* columnCtx = - dynamic_cast(ctx)) { - return visitExpressionColumn(columnCtx); - } else if ( - auto* castCtx = - dynamic_cast(ctx)) { - return visitExpressionCast(castCtx); +std::any SubstraitPlanRelationVisitor::visitRelationGrouping( + SubstraitPlanParser::RelationGroupingContext* ctx) { + auto parentSymbol = symbolTable_->lookupSymbolByLocation( + Location(dynamic_cast(ctx->parent))); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol.blob); + auto result = SubstraitPlanRelationVisitor::visitChildren(ctx); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + switch (parentRelationType) { + case RelationType::kAggregate: { + if (parentRelationData->relation.aggregate().groupings_size() == 0) { + parentRelationData->relation.mutable_aggregate()->add_groupings(); + } + // Always add new expressions to the first groupings group. + auto newExpr = parentRelationData->relation.mutable_aggregate() + ->mutable_groupings(0) + ->add_grouping_expressions(); + *newExpr = ANY_CAST(::substrait::proto::Expression, result); + if (newExpr->has_selection()) { + newExpr->mutable_selection()->mutable_root_reference(); + } + break; + } + default: + errorListener_->addError( + ctx->getStart(), + "Groupings are not permitted for this kind of relation."); + break; } return defaultResult(); } -std::any SubstraitPlanRelationVisitor::visitRelationSourceReference( - SubstraitPlanParser::RelationSourceReferenceContext* ctx) { +std::any SubstraitPlanRelationVisitor::visitRelationMeasure( + SubstraitPlanParser::RelationMeasureContext* ctx) { + // Construct the measure. + ::substrait::proto::AggregateRel_Measure measure; + auto invocation = ::substrait::proto:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED; + std::vector<::substrait::proto::SortField> sorts; + for (auto detail : ctx->measure_detail()) { + auto detailItem = ANY_CAST( + ::substrait::proto::AggregateRel_Measure, visitMeasure_detail(detail)); + if (detail->getStart()->getType() == SubstraitPlanParser::MEASURE) { + if (measure.has_measure()) { + errorListener_->addError( + detail->getStart(), + "A measure expression has already been provided for this measure."); + break; + } + *measure.mutable_measure() = detailItem.measure(); + } else if (detail->getStart()->getType() == SubstraitPlanParser::FILTER) { + if (measure.has_filter()) { + errorListener_->addError( + detail->getStart(), + "A filter has already been provided for this measure."); + break; + } + *measure.mutable_filter() = detailItem.filter(); + } else if ( + detail->getStart()->getType() == SubstraitPlanParser::INVOCATION) { + invocation = detailItem.measure().invocation(); + } else if (detail->getStart()->getType() == SubstraitPlanParser::SORT) { + auto newSorts = detailItem.measure().sorts(); + sorts.insert(sorts.end(), newSorts.begin(), newSorts.end()); + } + } + if (invocation != + ::substrait::proto:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED) { + measure.mutable_measure()->set_invocation(invocation); + } + for (const auto& sort : sorts) { + *measure.mutable_measure()->add_sorts() = sort; + } + + // Add it to our relation. auto parentSymbol = symbolTable_->lookupSymbolByLocation( Location(dynamic_cast(ctx->parent))); auto parentRelationData = ANY_CAST(std::shared_ptr, parentSymbol.blob); auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + switch (parentRelationType) { + case RelationType::kAggregate: + *parentRelationData->relation.mutable_aggregate()->add_measures() = + measure; + break; + default: + errorListener_->addError( + ctx->getStart(), + "Measures are not permitted for this kind of relation."); + break; + } + return defaultResult(); +} - if (parentRelationType == RelationType::kRead) { - auto sourceName = ctx->source_reference()->id()->getText(); - auto* symbol = symbolTable_->lookupSymbolByName(sourceName); - if (symbol != nullptr) { - auto* source = - parentRelationData->relation.mutable_read()->mutable_named_table(); - for (const auto& sym : *symbolTable_) { - if (sym.type != SymbolType::kSourceDetail) { - continue; +int32_t SubstraitPlanRelationVisitor::visitAggregationInvocation( + SubstraitPlanParser::IdContext* ctx) { + std::string id = ctx->getText(); + id.erase( + std::remove_if( + id.begin(), + id.end(), + [](auto const& c) -> bool { return !std::isalpha(c); }), + id.end()); + std::transform(id.begin(), id.end(), id.begin(), [](unsigned char c) { + return std::tolower(c); + }); + if (startsWith(id, kAggregationInvocationPrefix)) { + id = id.substr(kAggregationInvocationPrefix.length()); + } + // TODO -- Replace this with a handcrafted function or a trie. + if (id == "unspecified") { + return ::substrait::proto::AggregateFunction:: + AGGREGATION_INVOCATION_UNSPECIFIED; + } else if (id == "all") { + return ::substrait::proto::AggregateFunction::AGGREGATION_INVOCATION_ALL; + } else if (id == "distinct") { + return ::substrait::proto::AggregateFunction:: + AGGREGATION_INVOCATION_DISTINCT; + } + this->errorListener_->addError( + ctx->getStart(), + "Unrecognized aggregation invocation: " + ctx->getText()); + return ::substrait::proto::AggregateFunction:: + AGGREGATION_INVOCATION_UNSPECIFIED; +} + +int32_t SubstraitPlanRelationVisitor::visitAggregationPhase( + SubstraitPlanParser::IdContext* ctx) { + std::string id = ctx->getText(); + id.erase( + std::remove_if( + id.begin(), + id.end(), + [](auto const& c) -> bool { return !std::isalpha(c); }), + id.end()); + std::transform(id.begin(), id.end(), id.begin(), [](unsigned char c) { + return std::tolower(c); + }); + if (startsWith(id, kAggregationPhasePrefix)) { + id = id.substr(kAggregationPhasePrefix.length()); + } + // TODO -- Replace this with a handcrafted function or a trie. + if (id == "unspecified") { + return ::substrait::proto::AGGREGATION_PHASE_UNSPECIFIED; + } else if (id == "initialtointermediate") { + return ::substrait::proto::AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE; + } else if (id == "intermediatetointermediate") { + return ::substrait::proto::AGGREGATION_PHASE_INTERMEDIATE_TO_INTERMEDIATE; + } else if (id == "initialtoresult") { + return ::substrait::proto::AGGREGATION_PHASE_INITIAL_TO_RESULT; + } else if (id == "intermediatetoresult") { + return ::substrait::proto::AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT; + } + this->errorListener_->addError( + ctx->getStart(), "Unrecognized aggregation phase: " + ctx->getText()); + return ::substrait::proto::AGGREGATION_PHASE_UNSPECIFIED; +} + +std::any SubstraitPlanRelationVisitor::visitMeasure_detail( + SubstraitPlanParser::Measure_detailContext* ctx) { + ::substrait::proto::AggregateRel_Measure measure; + switch (ctx->getStart()->getType()) { + case SubstraitPlanParser::MEASURE: { + auto function = measure.mutable_measure(); + auto result = visitExpression(ctx->expression()); + auto expr = ANY_CAST(::substrait::proto::Expression, result); + if (expr.has_scalar_function()) { + const auto& scalarFunc = expr.scalar_function(); + function->set_function_reference(scalarFunc.function_reference()); + for (const auto& arg : scalarFunc.arguments()) { + *function->add_arguments() = arg; } - if (sym.location != symbol->location) { - continue; + for (const auto& option : scalarFunc.options()) { + *function->add_options() = option; + } + if (scalarFunc.has_output_type()) { + *function->mutable_output_type() = scalarFunc.output_type(); + } + if (ctx->literal_complex_type() != nullptr) { + // The version here overrides any that might be in the function. + *function->mutable_output_type() = ANY_CAST( + ::substrait::proto::Type, + visitLiteral_complex_type(ctx->literal_complex_type())); } - source->add_names(sym.name); + if (ctx->id() != nullptr) { + measure.mutable_measure()->set_phase( + static_cast<::substrait::proto::AggregationPhase>( + visitAggregationPhase(ctx->id()))); + } + } else { + // MEGAHACK -- Raise an error as this is not a function use. } + + return measure; } - } else { - errorListener_->addError( - ctx->getStart(), - "Source references are not defined for this kind of relation."); + case SubstraitPlanParser::FILTER: + *measure.mutable_filter() = ANY_CAST( + ::substrait::proto::Expression, visitExpression(ctx->expression())); + return measure; + case SubstraitPlanParser::INVOCATION: + measure.mutable_measure()->set_invocation( + static_cast< + ::substrait::proto::AggregateFunction_AggregationInvocation>( + visitAggregationInvocation(ctx->id()))); + return measure; + case SubstraitPlanParser::SORT: + *measure.mutable_measure()->add_sorts() = ANY_CAST( + ::substrait::proto::SortField, visitSort_field(ctx->sort_field())); + return measure; + default: + // Alert that this kind of measure detail is not in the grammar. + return measure; + } +} + +std::any SubstraitPlanRelationVisitor::visitRelationSourceReference( + SubstraitPlanParser::RelationSourceReferenceContext* ctx) { + auto parentSymbol = symbolTable_->lookupSymbolByLocation( + Location(dynamic_cast(ctx->parent))); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol.blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + switch (parentRelationType) { + case RelationType::kRead: + *parentRelationData->relation.mutable_read()->mutable_base_schema() = + constructSchema(parentSymbol); + break; + default: + errorListener_->addError( + ctx->getStart(), + "Only read relations support base_schema properties."); + break; + } + return defaultResult(); +} + +std::any SubstraitPlanRelationVisitor::visitRelationSort( + SubstraitPlanParser::RelationSortContext* ctx) { + auto parentSymbol = symbolTable_->lookupSymbolByLocation( + Location(dynamic_cast(ctx->parent))); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol.blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + switch (parentRelationType) { + case RelationType::kSort: + *parentRelationData->relation.mutable_sort()->add_sorts() = ANY_CAST( + ::substrait::proto::SortField, visitSort_field(ctx->sort_field())); + break; + default: + errorListener_->addError( + ctx->getStart(), + "Sorts are not permitted for this kind of relation."); + break; + } + return defaultResult(); +} + +std::any SubstraitPlanRelationVisitor::visitExpression( + SubstraitPlanParser::ExpressionContext* ctx) { + if (dynamic_cast(ctx)) { + return visitExpressionFunctionUse( + dynamic_cast(ctx)); + } else if (dynamic_cast( + ctx)) { + return visitExpressionConstant( + dynamic_cast(ctx)); + } else if (dynamic_cast(ctx)) { + return visitExpressionColumn( + dynamic_cast(ctx)); + } else if (dynamic_cast(ctx)) { + return visitExpressionCast( + dynamic_cast(ctx)); } return defaultResult(); } @@ -494,7 +726,7 @@ std::any SubstraitPlanRelationVisitor::visitExpressionFunctionUse( std::string funcName = ctx->id()->getText(); uint32_t funcReference = 0; auto symbol = symbolTable_->lookupSymbolByName(funcName); - if (symbol->type != SymbolType::kFunction) { + if (symbol == nullptr || symbol->type != SymbolType::kFunction) { errorListener_->addError( ctx->id()->getStart(), ctx->id()->getText() + " is not a function reference."); @@ -505,8 +737,12 @@ std::any SubstraitPlanRelationVisitor::visitExpressionFunctionUse( expr.mutable_scalar_function()->set_function_reference(funcReference); for (const auto& exp : ctx->expression()) { - auto newExpr = - ANY_CAST(::substrait::proto::Expression, visitExpression(exp)); + auto result = visitExpression(exp); + if (result.type() != typeid(::substrait::proto::Expression)) { + // MEGAHACK -- Add an error for a bad type. + return expr; + } + auto newExpr = ANY_CAST(::substrait::proto::Expression, result); *expr.mutable_scalar_function()->add_arguments()->mutable_value() = newExpr; } return expr; @@ -593,22 +829,6 @@ std::any SubstraitPlanRelationVisitor::visitConstant( return literal; } -std::any SubstraitPlanRelationVisitor::visitLiteral_specifier( - SubstraitPlanParser::Literal_specifierContext* ctx) { - // Provides detail for the width of the type. - return visitChildren(ctx); -} - -std::any SubstraitPlanRelationVisitor::visitLiteral_basic_type( - SubstraitPlanParser::Literal_basic_typeContext* ctx) { - return textToTypeProto(ctx->getStart(), ctx->getText()); -} - -std::any SubstraitPlanRelationVisitor::visitLiteral_complex_type( - SubstraitPlanParser::Literal_complex_typeContext* ctx) { - return textToTypeProto(ctx->getStart(), ctx->getText()); -} - std::any SubstraitPlanRelationVisitor::visitMap_literal( SubstraitPlanParser::Map_literalContext* ctx) { ::substrait::proto::Expression_Literal literal; @@ -1243,160 +1463,65 @@ ::substrait::proto::Expression_Literal SubstraitPlanRelationVisitor::visitTime( return literal; } -::substrait::proto::Type SubstraitPlanRelationVisitor::textToTypeProto( - const antlr4::Token* token, - const std::string& typeText) { - std::shared_ptr decodedType; - try { - decodedType = Type::decode(typeText); - } catch (...) { - errorListener_->addError(token, "Failed to decode type."); - return ::substrait::proto::Type{}; +std::any SubstraitPlanRelationVisitor::visitSort_field( + SubstraitPlanParser::Sort_fieldContext* ctx) { + ::substrait::proto::SortField sort; + *sort.mutable_expr() = ANY_CAST( + ::substrait::proto::Expression, visitExpression(ctx->expression())); + if (ctx->id() != nullptr) { + sort.set_direction(static_cast<::substrait::proto::SortField_SortDirection>( + visitSortDirection(ctx->id()))); } - return typeToProto(token, *decodedType); + return sort; } -::substrait::proto::Type SubstraitPlanRelationVisitor::typeToProto( - const antlr4::Token* token, - const ParameterizedType& decodedType) { - ::substrait::proto::Type type; - auto nullValue = ::substrait::proto::Type_Nullability_NULLABILITY_UNSPECIFIED; - if (decodedType.nullable()) { - nullValue = ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE; +int32_t SubstraitPlanRelationVisitor::visitSortDirection( + SubstraitPlanParser::IdContext* ctx) { + std::string id = ctx->getText(); +#if 1 + // MEGAHACK -- Turn this common code into a subfunction. + id.erase( + std::remove_if( + id.begin(), + id.end(), + [](auto const& c) -> bool { return !std::isalpha(c); }), + id.end()); + std::transform(id.begin(), id.end(), id.begin(), [](unsigned char c) { + return std::tolower(c); + }); + if (startsWith(id, kSortDirectionPrefix)) { + id = id.substr(kSortDirectionPrefix.length()); } - switch (decodedType.kind()) { - case TypeKind::kBool: - type.mutable_bool_()->set_nullability(nullValue); - break; - case TypeKind::kI8: - type.mutable_i8()->set_nullability(nullValue); - break; - case TypeKind::kI16: - type.mutable_i16()->set_nullability(nullValue); - break; - case TypeKind::kI32: - type.mutable_i32()->set_nullability(nullValue); - break; - case TypeKind::kI64: - type.mutable_i64()->set_nullability(nullValue); - break; - case TypeKind::kFp32: - type.mutable_fp32()->set_nullability(nullValue); - break; - case TypeKind::kFp64: - type.mutable_fp64()->set_nullability(nullValue); - break; - case TypeKind::kString: - type.mutable_string()->set_nullability(nullValue); - break; - case TypeKind::kBinary: - type.mutable_binary()->set_nullability(nullValue); - break; - case TypeKind::kTimestamp: - type.mutable_timestamp()->set_nullability(nullValue); - break; - case TypeKind::kDate: - type.mutable_date()->set_nullability(nullValue); - break; - case TypeKind::kTime: - type.mutable_time()->set_nullability(nullValue); - break; - case TypeKind::kIntervalYear: - type.mutable_interval_year()->set_nullability(nullValue); - break; - case TypeKind::kIntervalDay: - type.mutable_interval_day()->set_nullability(nullValue); - break; - case TypeKind::kTimestampTz: - type.mutable_timestamp_tz()->set_nullability(nullValue); - break; - case TypeKind::kUuid: - type.mutable_uuid()->set_nullability(nullValue); - break; - case TypeKind::kFixedChar: { - auto fixedChar = - reinterpret_cast(&decodedType); - if (fixedChar == nullptr) { - break; - } - try { - int32_t length = std::stoi(fixedChar->length()->value()); - type.mutable_fixed_char()->set_length(length); - } catch (...) { - errorListener_->addError(token, "Could not parse fixedchar length."); - } - type.mutable_fixed_char()->set_nullability(nullValue); - break; - } - case TypeKind::kVarchar: { - auto varChar = - reinterpret_cast(&decodedType); - if (varChar == nullptr) { - break; - } - try { - int32_t length = std::stoi(varChar->length()->value()); - type.mutable_varchar()->set_length(length); - } catch (...) { - errorListener_->addError(token, "Could not parse varchar length."); - } - type.mutable_varchar()->set_nullability(nullValue); - break; - } - case TypeKind::kFixedBinary: - type.mutable_fixed_binary()->set_nullability(nullValue); - break; - case TypeKind::kDecimal: { - auto dec = reinterpret_cast(&decodedType); - if (dec == nullptr) { - break; - } - try { - int32_t precision = std::stoi(dec->precision()->value()); - int32_t scale = std::stoi(dec->scale()->value()); - type.mutable_decimal()->set_precision(precision); - type.mutable_decimal()->set_scale(scale); - } catch (...) { - errorListener_->addError( - token, "Could not parse decimal precision and scale."); - } - type.mutable_decimal()->set_nullability(nullValue); - break; - } - case TypeKind::kStruct: { - auto structure = - reinterpret_cast(&decodedType); - for (const auto& t : structure->children()) { - *type.mutable_struct_()->add_types() = typeToProto(token, *t); - } - type.mutable_struct_()->set_nullability(nullValue); - break; - } - case TypeKind::kList: { - auto list = reinterpret_cast(&decodedType); - *type.mutable_list()->mutable_type() = - typeToProto(token, *list->elementType()); - type.mutable_list()->set_nullability(nullValue); - break; - } - case TypeKind::kMap: { - auto map = reinterpret_cast(&decodedType); - if (map->keyType() == nullptr || map->valueType() == nullptr) { - errorListener_->addError( - token, "Maps require both a key and a value type."); - break; - } - *type.mutable_map()->mutable_key() = typeToProto(token, *map->keyType()); - *type.mutable_map()->mutable_value() = - typeToProto(token, *map->valueType()); - type.mutable_map()->set_nullability(nullValue); - break; +#endif + // TODO -- Replace this with a handcrafted function or a trie. + if (id == "unspecified") { + return ::substrait::proto::SortField::SORT_DIRECTION_UNSPECIFIED; + } else if (id == "ascnullsfirst") { + return ::substrait::proto::SortField::SORT_DIRECTION_ASC_NULLS_FIRST; + } else if (id == "ascnullslast") { + return ::substrait::proto::SortField::SORT_DIRECTION_ASC_NULLS_LAST; + } else if (id == "descnullslast") { + return ::substrait::proto::SortField::SORT_DIRECTION_DESC_NULLS_FIRST; + } else if (id == "descnullslast") { + return ::substrait::proto::SortField::SORT_DIRECTION_DESC_NULLS_LAST; + } else if (id == "clustered") { + return ::substrait::proto::SortField::SORT_DIRECTION_CLUSTERED; + } + this->errorListener_->addError( + ctx->getStart(), "Unrecognized sort direction: " + ctx->getText()); + return ::substrait::proto::SortField::SORT_DIRECTION_UNSPECIFIED; +} + +::substrait::proto::NamedStruct SubstraitPlanRelationVisitor::constructSchema( + SymbolInfo info) { + ::substrait::proto::NamedStruct schema; + for (const auto& symbol : *symbolTable_) { + if (symbol.type != SymbolType::kSchemaColumn) { + continue; } - case TypeKind::kKindNotSet: - errorListener_->addError(token, "Unable to recognize requested type."); - break; + *schema.add_names() = symbol.name; } - return type; + return schema; } } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h index 17c78d66..92b21294 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h @@ -3,28 +3,26 @@ #pragma once #include "SubstraitPlanParser/SubstraitPlanParser.h" -#include "SubstraitPlanParser/SubstraitPlanParserBaseVisitor.h" #include "substrait/textplan/SymbolTable.h" #include "substrait/textplan/parser/SubstraitParserErrorListener.h" -#include "substrait/type/Type.h" +#include "substrait/textplan/parser/SubstraitPlanTypeVisitor.h" namespace substrait::proto { class Expression_Literal; class Expression_Literal_Map_KeyValue; +class NamedStruct; class Type; class Type_Struct; } // namespace substrait::proto namespace io::substrait::textplan { -class SubstraitPlanRelationVisitor : public SubstraitPlanParserBaseVisitor { +class SubstraitPlanRelationVisitor : public SubstraitPlanTypeVisitor { public: SubstraitPlanRelationVisitor( const SymbolTable& symbolTable, - std::shared_ptr errorListener) { - symbolTable_ = std::make_shared(symbolTable); - errorListener_ = std::move(errorListener); - } + std::shared_ptr errorListener) + : SubstraitPlanTypeVisitor(symbolTable, std::move(errorListener)) {} [[nodiscard]] std::shared_ptr getSymbolTable() const { return symbolTable_; @@ -51,9 +49,25 @@ class SubstraitPlanRelationVisitor : public SubstraitPlanParserBaseVisitor { std::any visitRelationExpression( SubstraitPlanParser::RelationExpressionContext* ctx) override; + std::any visitRelationGrouping( + SubstraitPlanParser::RelationGroupingContext* ctx) override; + + std::any visitRelationMeasure( + SubstraitPlanParser::RelationMeasureContext* ctx) override; + + int32_t visitAggregationInvocation(SubstraitPlanParser::IdContext* ctx); + + int32_t visitAggregationPhase(SubstraitPlanParser::IdContext* ctx); + + std::any visitMeasure_detail( + SubstraitPlanParser::Measure_detailContext* ctx) override; + std::any visitRelationSourceReference( SubstraitPlanParser::RelationSourceReferenceContext* ctx) override; + std::any visitRelationSort( + SubstraitPlanParser::RelationSortContext* ctx) override; + // visitExpression is a new method delegating to the methods below. std::any visitExpression(SubstraitPlanParser::ExpressionContext* ctx); @@ -71,15 +85,6 @@ class SubstraitPlanRelationVisitor : public SubstraitPlanParserBaseVisitor { std::any visitConstant(SubstraitPlanParser::ConstantContext* ctx) override; - std::any visitLiteral_basic_type( - SubstraitPlanParser::Literal_basic_typeContext* ctx) override; - - std::any visitLiteral_complex_type( - SubstraitPlanParser::Literal_complex_typeContext* ctx) override; - - std::any visitLiteral_specifier( - SubstraitPlanParser::Literal_specifierContext* ctx) override; - std::any visitMap_literal( SubstraitPlanParser::Map_literalContext* ctx) override; @@ -92,6 +97,9 @@ class SubstraitPlanRelationVisitor : public SubstraitPlanParserBaseVisitor { std::any visitColumn_name( SubstraitPlanParser::Column_nameContext* ctx) override; + std::any visitSort_field( + SubstraitPlanParser::Sort_fieldContext* ctx) override; + ::substrait::proto::Expression_Literal visitConstantWithType( SubstraitPlanParser::ConstantContext* ctx, const ::substrait::proto::Type& literalType); @@ -147,21 +155,14 @@ class SubstraitPlanRelationVisitor : public SubstraitPlanParserBaseVisitor { ::substrait::proto::Expression_Literal visitTime( SubstraitPlanParser::ConstantContext* ctx); + int32_t visitSortDirection(SubstraitPlanParser::IdContext* ctx); + private: std::string escapeText( const antlr4::tree::TerminalNode* node, const std::string& str); - ::substrait::proto::Type textToTypeProto( - const antlr4::Token* token, - const std::string& typeText); - - ::substrait::proto::Type typeToProto( - const antlr4::Token* token, - const ParameterizedType& decodedType); - - std::shared_ptr symbolTable_; - std::shared_ptr errorListener_; + ::substrait::proto::NamedStruct constructSchema(SymbolInfo info); const SymbolInfo* currentRelationScope_{nullptr}; // Not owned. }; diff --git a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp new file mode 100644 index 00000000..8e050004 --- /dev/null +++ b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "SubstraitPlanTypeVisitor.h" + +#include +#include +#include "SubstraitPlanParser/SubstraitPlanParser.h" +#include "substrait/proto/type.pb.h" +#include "substrait/textplan/SymbolTable.h" +#include "substrait/type/Type.h" + +namespace io::substrait::textplan { + +std::any SubstraitPlanTypeVisitor::visitLiteral_specifier( + SubstraitPlanParser::Literal_specifierContext* ctx) { + // Provides detail for the width of the type. + return visitChildren(ctx); +} + +std::any SubstraitPlanTypeVisitor::visitLiteral_basic_type( + SubstraitPlanParser::Literal_basic_typeContext* ctx) { + std::__1::shared_ptr decodedType; + try { + decodedType = Type::decode(ctx->getText()); + } catch (...) { + errorListener_->addError(ctx->getStart(), "Failed to decode type."); + return ::substrait::proto::Type{}; + } + return typeToProto(ctx, *decodedType); +} + +std::any SubstraitPlanTypeVisitor::visitLiteral_complex_type( + SubstraitPlanParser::Literal_complex_typeContext* ctx) { + std::shared_ptr decodedType; + try { + decodedType = Type::decode(ctx->getText()); + } catch (...) { + errorListener_->addError(ctx->getStart(), "Failed to decode type."); + return ::substrait::proto::Type{}; + } + return typeToProto(ctx, *decodedType); +} + +::substrait::proto::Type SubstraitPlanTypeVisitor::textToTypeProto( + const antlr4::ParserRuleContext* ctx, + const std::string& typeText) { + std::shared_ptr decodedType; + try { + decodedType = Type::decode(typeText); + } catch (...) { + errorListener_->addError(ctx->getStart(), "Failed to decode type."); + return ::substrait::proto::Type{}; + } + return typeToProto(ctx, *decodedType); +} + +::substrait::proto::Type SubstraitPlanTypeVisitor::typeToProto( + const antlr4::ParserRuleContext* ctx, + const ParameterizedType& decodedType) { + ::substrait::proto::Type type; + auto nullValue = ::substrait::proto::Type_Nullability_NULLABILITY_REQUIRED; + if (decodedType.nullable()) { + nullValue = ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE; + } + switch (decodedType.kind()) { + case TypeKind::kBool: + type.mutable_bool_()->set_nullability(nullValue); + break; + case TypeKind::kI8: + type.mutable_i8()->set_nullability(nullValue); + break; + case TypeKind::kI16: + type.mutable_i16()->set_nullability(nullValue); + break; + case TypeKind::kI32: + type.mutable_i32()->set_nullability(nullValue); + break; + case TypeKind::kI64: + type.mutable_i64()->set_nullability(nullValue); + break; + case TypeKind::kFp32: + type.mutable_fp32()->set_nullability(nullValue); + break; + case TypeKind::kFp64: + type.mutable_fp64()->set_nullability(nullValue); + break; + case TypeKind::kString: + type.mutable_string()->set_nullability(nullValue); + break; + case TypeKind::kBinary: + type.mutable_binary()->set_nullability(nullValue); + break; + case TypeKind::kTimestamp: + type.mutable_timestamp()->set_nullability(nullValue); + break; + case TypeKind::kDate: + type.mutable_date()->set_nullability(nullValue); + break; + case TypeKind::kTime: + type.mutable_time()->set_nullability(nullValue); + break; + case TypeKind::kIntervalYear: + type.mutable_interval_year()->set_nullability(nullValue); + break; + case TypeKind::kIntervalDay: + type.mutable_interval_day()->set_nullability(nullValue); + break; + case TypeKind::kTimestampTz: + type.mutable_timestamp_tz()->set_nullability(nullValue); + break; + case TypeKind::kUuid: + type.mutable_uuid()->set_nullability(nullValue); + break; + case TypeKind::kFixedChar: { + auto fixedChar = + reinterpret_cast(&decodedType); + if (fixedChar == nullptr) { + break; + } + try { + int32_t length = std::stoi(fixedChar->length()->value()); + type.mutable_fixed_char()->set_length(length); + } catch (...) { + errorListener_->addError( + ctx->getStart(), "Could not parse fixedchar length."); + } + type.mutable_fixed_char()->set_nullability(nullValue); + break; + } + case TypeKind::kVarchar: { + auto varChar = + reinterpret_cast(&decodedType); + if (varChar == nullptr) { + break; + } + try { + int32_t length = std::stoi(varChar->length()->value()); + type.mutable_varchar()->set_length(length); + } catch (...) { + errorListener_->addError( + ctx->getStart(), "Could not parse varchar length."); + } + type.mutable_varchar()->set_nullability(nullValue); + break; + } + case TypeKind::kFixedBinary: + type.mutable_fixed_binary()->set_nullability(nullValue); + break; + case TypeKind::kDecimal: { + auto dec = reinterpret_cast(&decodedType); + if (dec == nullptr) { + break; + } + try { + int32_t precision = std::stoi(dec->precision()->value()); + int32_t scale = std::stoi(dec->scale()->value()); + type.mutable_decimal()->set_precision(precision); + type.mutable_decimal()->set_scale(scale); + } catch (...) { + errorListener_->addError( + ctx->getStart(), "Could not parse decimal precision and scale."); + } + type.mutable_decimal()->set_nullability(nullValue); + break; + } + case TypeKind::kStruct: { + auto structure = + reinterpret_cast(&decodedType); + for (const auto& t : structure->children()) { + *type.mutable_struct_()->add_types() = typeToProto(ctx, *t); + } + type.mutable_struct_()->set_nullability(nullValue); + break; + } + case TypeKind::kList: { + auto list = reinterpret_cast(&decodedType); + *type.mutable_list()->mutable_type() = + typeToProto(ctx, *list->elementType()); + type.mutable_list()->set_nullability(nullValue); + break; + } + case TypeKind::kMap: { + auto map = reinterpret_cast(&decodedType); + *type.mutable_map()->mutable_key() = typeToProto(ctx, *map->keyType()); + *type.mutable_map()->mutable_value() = + typeToProto(ctx, *map->valueType()); + type.mutable_map()->set_nullability(nullValue); + break; + } + case TypeKind::kKindNotSet: + if (!insideStructLiteralWithExternalType(ctx)) { + errorListener_->addError( + ctx->getStart(), "Unable to recognize requested type."); + } + break; + } + return type; +} + +bool SubstraitPlanTypeVisitor::insideStructLiteralWithExternalType( + const antlr4::RuleContext* ctx) { + if (ctx == nullptr) { + return false; + } + if (ctx->getRuleIndex() == SubstraitPlanParser::RuleConstant) { + return true; + } + return insideStructLiteralWithExternalType( + dynamic_cast(ctx->parent)); +} + +} // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h new file mode 100644 index 00000000..aa61ea91 --- /dev/null +++ b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +#include "SubstraitPlanParser/SubstraitPlanParser.h" +#include "SubstraitPlanParser/SubstraitPlanParserBaseVisitor.h" +#include "substrait/textplan/SymbolTable.h" +#include "substrait/textplan/parser/SubstraitParserErrorListener.h" +#include "substrait/type/Type.h" + +namespace substrait::proto { +class Type; +} + +namespace io::substrait::textplan { + +class SubstraitPlanTypeVisitor : public SubstraitPlanParserBaseVisitor { + public: + SubstraitPlanTypeVisitor( + const SymbolTable& symbolTable, + std::shared_ptr errorListener) { + symbolTable_ = std::make_shared(symbolTable); + errorListener_ = std::move(errorListener); + } + + std::any visitLiteral_specifier( + SubstraitPlanParser::Literal_specifierContext* ctx) override; + std::any visitLiteral_basic_type( + SubstraitPlanParser::Literal_basic_typeContext* ctx) override; + std::any visitLiteral_complex_type( + SubstraitPlanParser::Literal_complex_typeContext* ctx) override; + + protected: + ::substrait::proto::Type textToTypeProto( + const antlr4::ParserRuleContext* ctx, + const std::string& typeText); + + ::substrait::proto::Type typeToProto( + const antlr4::ParserRuleContext* ctx, + const ParameterizedType& decodedType); + + bool insideStructLiteralWithExternalType(const antlr4::RuleContext* ctx); + + std::shared_ptr symbolTable_; + std::shared_ptr errorListener_; +}; + +} // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp index 30dfb3a6..d52ed537 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp @@ -9,6 +9,7 @@ #include "substrait/textplan/Finally.h" #include "substrait/textplan/Location.h" #include "substrait/textplan/StructuredSymbolData.h" +#include "substrait/type/Type.h" namespace io::substrait::textplan { @@ -51,10 +52,14 @@ std::any SubstraitPlanVisitor::visitPipeline( std::any SubstraitPlanVisitor::visitExtensionspace( SubstraitPlanParser::ExtensionspaceContext* ctx) { + if (ctx->URI() == nullptr) { + // Nothing to keep track of at this level. + return visitChildren(ctx); + } + const std::string& uri = ctx->URI()->getText(); // TODO -- Transition to querying the symbol table for the space number. #42 - static uint32_t numSpacesSeen = 0; - uint32_t thisSpace = numSpacesSeen++; + uint32_t thisSpace = ++numSpacesSeen_; symbolTable_->defineSymbol( uri, Location(ctx), @@ -78,13 +83,26 @@ std::any SubstraitPlanVisitor::visitFunction( SubstraitPlanParser::FunctionContext* ctx) { // TODO -- Transition to using the symbol table for the function number. #42 // Let our enclosing extension space provide us with the detail. + std::string referenceName; + if (ctx->id() != nullptr) { + referenceName = ctx->id()->getText(); + } else if (ctx->name() != nullptr) { + referenceName = ctx->name()->getText(); + auto colonPos = referenceName.find_first_of(':'); + if (colonPos != std::string::npos) { + referenceName = referenceName.substr(0, colonPos); + } + } else { + referenceName = ""; + } + symbolTable_->defineSymbol( - ctx->id()->getText(), + referenceName, Location(ctx), SymbolType::kFunction, defaultResult(), std::make_shared( - ctx->name()->getText(), std::nullopt, ++numFunctionsSeen_)); + ctx->name()->getText(), std::nullopt, numFunctionsSeen_++)); return visitChildren(ctx); } @@ -107,9 +125,9 @@ std::any SubstraitPlanVisitor::visitSchema_definition( defaultResult(), defaultResult()); + // Mark all of the schema items so we can find the ones related to this schema. for (const auto& item : ctx->schema_item()) { auto symbol = ANY_CAST(SymbolInfo*, visitSchema_item(item)); - // TODO -- Implement schemas instead of skipping them. if (symbol == nullptr) { continue; } @@ -119,16 +137,6 @@ std::any SubstraitPlanVisitor::visitSchema_definition( return nullptr; } -std::any SubstraitPlanVisitor::visitColumn_attribute( - SubstraitPlanParser::Column_attributeContext* ctx) { - return visitChildren(ctx); -} - -std::any SubstraitPlanVisitor::visitColumn_type( - SubstraitPlanParser::Column_typeContext* ctx) { - return visitChildren(ctx); -} - std::any SubstraitPlanVisitor::visitSchema_item( SubstraitPlanParser::Schema_itemContext* ctx) { return symbolTable_->defineSymbol( @@ -136,7 +144,7 @@ std::any SubstraitPlanVisitor::visitSchema_item( Location(ctx), SymbolType::kSchemaColumn, defaultResult(), - ctx->column_type()->getText()); + visitLiteral_complex_type(ctx->literal_complex_type())); } std::any SubstraitPlanVisitor::visitRelation( @@ -223,16 +231,6 @@ std::any SubstraitPlanVisitor::visitLiteral_specifier( return visitChildren(ctx); } -std::any SubstraitPlanVisitor::visitLiteral_basic_type( - SubstraitPlanParser::Literal_basic_typeContext* ctx) { - return visitChildren(ctx); -} - -std::any SubstraitPlanVisitor::visitLiteral_complex_type( - SubstraitPlanParser::Literal_complex_typeContext* ctx) { - return visitChildren(ctx); -} - std::any SubstraitPlanVisitor::visitMap_literal_value( SubstraitPlanParser::Map_literal_valueContext* ctx) { return visitChildren(ctx); @@ -311,6 +309,11 @@ std::any SubstraitPlanVisitor::visitRelation_filter_behavior( return visitChildren(ctx); } +std::any SubstraitPlanVisitor::visitMeasure_detail( + SubstraitPlanParser::Measure_detailContext* ctx) { + return visitChildren(ctx); +} + std::any SubstraitPlanVisitor::visitRelationFilter( SubstraitPlanParser::RelationFilterContext* ctx) { return visitChildren(ctx); @@ -318,7 +321,8 @@ std::any SubstraitPlanVisitor::visitRelationFilter( std::any SubstraitPlanVisitor::visitRelationExpression( SubstraitPlanParser::RelationExpressionContext* ctx) { - return visitChildren(ctx); + visitChildren(ctx); + return nullptr; } std::any SubstraitPlanVisitor::visitRelationAdvancedExtension( @@ -331,6 +335,31 @@ std::any SubstraitPlanVisitor::visitRelationSourceReference( return visitChildren(ctx); } +std::any SubstraitPlanVisitor::visitRelationGrouping( + SubstraitPlanParser::RelationGroupingContext* ctx) { + return visitChildren(ctx); +} + +std::any SubstraitPlanVisitor::visitRelationMeasure( + SubstraitPlanParser::RelationMeasureContext* ctx) { + return visitChildren(ctx); +} + +std::any SubstraitPlanVisitor::visitRelationSort( + SubstraitPlanParser::RelationSortContext* ctx) { + return visitChildren(ctx); +} + +std::any SubstraitPlanVisitor::visitRelationCount( + SubstraitPlanParser::RelationCountContext* ctx) { + return visitChildren(ctx); +} + +std::any SubstraitPlanVisitor::visitRelationJoinType( + SubstraitPlanParser::RelationJoinTypeContext* ctx) { + return visitChildren(ctx); +} + std::any SubstraitPlanVisitor::visitFile_location( SubstraitPlanParser::File_locationContext* ctx) { return visitChildren(ctx); @@ -406,10 +435,20 @@ std::any SubstraitPlanVisitor::visitRelation_ref( return rel; } +std::any SubstraitPlanVisitor::visitSort_field( + SubstraitPlanParser::Sort_fieldContext* ctx) { + return defaultResult(); +} + std::any SubstraitPlanVisitor::visitId(SubstraitPlanParser::IdContext* ctx) { return ctx->getText(); } +std::any SubstraitPlanVisitor::visitSimple_id( + SubstraitPlanParser::Simple_idContext* ctx) { + return defaultResult(); +} + // NOLINTEND(readability-convert-member-functions-to-static) // NOLINTEND(readability-identifier-naming) diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.h b/src/substrait/textplan/parser/SubstraitPlanVisitor.h index c973024d..2bfb0fc7 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.h +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.h @@ -3,18 +3,18 @@ #pragma once #include "SubstraitPlanParser/SubstraitPlanParser.h" -#include "SubstraitPlanParser/SubstraitPlanParserVisitor.h" #include "substrait/textplan/SymbolTable.h" #include "substrait/textplan/parser/SubstraitParserErrorListener.h" +#include "substrait/textplan/parser/SubstraitPlanTypeVisitor.h" namespace io::substrait::textplan { -class SubstraitPlanVisitor : public SubstraitPlanParserVisitor { +class SubstraitPlanVisitor : public SubstraitPlanTypeVisitor { public: - SubstraitPlanVisitor() { - symbolTable_ = std::make_shared(); - errorListener_ = std::make_shared(); - } + SubstraitPlanVisitor( + const SymbolTable& symbolTable, + std::shared_ptr errorListener) + : SubstraitPlanTypeVisitor(symbolTable, errorListener) {} [[nodiscard]] std::shared_ptr getSymbolTable() const { return symbolTable_; @@ -37,10 +37,6 @@ class SubstraitPlanVisitor : public SubstraitPlanParserVisitor { std::any visitSignature(SubstraitPlanParser::SignatureContext* ctx) override; std::any visitSchema_definition( SubstraitPlanParser::Schema_definitionContext* ctx) override; - std::any visitColumn_attribute( - SubstraitPlanParser::Column_attributeContext* ctx) override; - std::any visitColumn_type( - SubstraitPlanParser::Column_typeContext* ctx) override; std::any visitSchema_item( SubstraitPlanParser::Schema_itemContext* ctx) override; std::any visitRelation(SubstraitPlanParser::RelationContext* ctx) override; @@ -50,10 +46,6 @@ class SubstraitPlanVisitor : public SubstraitPlanParserVisitor { SubstraitPlanParser::Source_definitionContext* ctx) override; std::any visitLiteral_specifier( SubstraitPlanParser::Literal_specifierContext* ctx) override; - std::any visitLiteral_basic_type( - SubstraitPlanParser::Literal_basic_typeContext* ctx) override; - std::any visitLiteral_complex_type( - SubstraitPlanParser::Literal_complex_typeContext* ctx) override; std::any visitMap_literal_value( SubstraitPlanParser::Map_literal_valueContext* ctx) override; std::any visitMap_literal( @@ -79,6 +71,8 @@ class SubstraitPlanVisitor : public SubstraitPlanParserVisitor { SubstraitPlanParser::RelationUsesSchemaContext* ctx) override; std::any visitRelation_filter_behavior( SubstraitPlanParser::Relation_filter_behaviorContext* ctx) override; + std::any visitMeasure_detail( + SubstraitPlanParser::Measure_detailContext* ctx) override; std::any visitRelationFilter( SubstraitPlanParser::RelationFilterContext* ctx) override; std::any visitRelationExpression( @@ -87,6 +81,16 @@ class SubstraitPlanVisitor : public SubstraitPlanParserVisitor { SubstraitPlanParser::RelationAdvancedExtensionContext* ctx) override; std::any visitRelationSourceReference( SubstraitPlanParser::RelationSourceReferenceContext* ctx) override; + std::any visitRelationGrouping( + SubstraitPlanParser::RelationGroupingContext* ctx) override; + std::any visitRelationMeasure( + SubstraitPlanParser::RelationMeasureContext* ctx) override; + std::any visitRelationSort( + SubstraitPlanParser::RelationSortContext* ctx) override; + std::any visitRelationCount( + SubstraitPlanParser::RelationCountContext* ctx) override; + std::any visitRelationJoinType( + SubstraitPlanParser::RelationJoinTypeContext* ctx) override; std::any visitFile_location( SubstraitPlanParser::File_locationContext* ctx) override; std::any visitFile_detail( @@ -106,14 +110,15 @@ class SubstraitPlanVisitor : public SubstraitPlanParserVisitor { SubstraitPlanParser::Named_table_detailContext* ctx) override; std::any visitRelation_ref( SubstraitPlanParser::Relation_refContext* ctx) override; + std::any visitSort_field( + SubstraitPlanParser::Sort_fieldContext* ctx) override; std::any visitId(SubstraitPlanParser::IdContext* ctx) override; + std::any visitSimple_id(SubstraitPlanParser::Simple_idContext* ctx) override; private: - std::shared_ptr symbolTable_; - std::shared_ptr errorListener_; - const SymbolInfo* currentRelationScope_{nullptr}; // Not owned. + int numSpacesSeen_{0}; int numFunctionsSeen_{0}; }; diff --git a/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 b/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 index c8e748c2..e96edf5f 100644 --- a/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 +++ b/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 @@ -28,7 +28,6 @@ FUNCTION: 'FUNCTION'; AS: 'AS'; SCHEMA: 'SCHEMA'; RELATION: 'RELATION'; -NULLABLE: 'NULLABLE'; PIPELINES: 'PIPELINES'; COMMON: 'COMMON'; @@ -37,6 +36,13 @@ FILTER: 'FILTER'; PROJECTION: 'PROJECTION'; EXPRESSION: 'EXPRESSION'; ADVANCED_EXTENSION: 'ADVANCED_EXTENSION'; +GROUPING: 'GROUPING'; +MEASURE: 'MEASURE'; +INVOCATION: 'INVOCATION'; +SORT: 'SORT'; +BY: 'BY'; +COUNT: 'COUNT'; +TYPE: 'TYPE'; VIRTUAL_TABLE: 'VIRTUAL_TABLE'; LOCAL_FILES: 'LOCAL_FILES'; @@ -53,6 +59,7 @@ URI_FOLDER: 'URI_FOLDER'; PARTITION_INDEX: 'PARTITION_INDEX'; START: 'START'; LENGTH: 'LENGTH'; +ORC: 'ORC'; NULLVAL: 'NULL'; TRUEVAL: 'TRUE'; FALSEVAL: 'FALSE'; @@ -79,6 +86,7 @@ MINUS: '-'; LEFTANGLEBRACKET: '<'; RIGHTANGLEBRACKET: '>'; QUESTIONMARK: '?'; +ATSIGN: '@'; IDENTIFIER : [A-Z][A-Z0-9]* @@ -105,11 +113,12 @@ SPACES: [ \u000B\t\r\n] -> channel(HIDDEN); mode EXTENSIONS; fragment SCHEME: [A-Z]+ ; fragment HOSTNAME: [A-Z0-9-.]+ ; -fragment FILENAME: [A-Z0-9-.]+; +fragment FILENAME: [A-Z0-9-._]+; +fragment PATH: FILENAME ( '/' FILENAME )*; URI - : SCHEME ':' ( '//' HOSTNAME '/')? FILENAME - | FILENAME + : SCHEME ':' ( '//' HOSTNAME '/' )? PATH + | '/'? PATH ; EXTENSIONS_LEFTBRACE: '{' -> mode(DEFAULT_MODE), type(LEFTBRACE); diff --git a/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 b/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 index 851029ee..8c72a665 100644 --- a/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 +++ b/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 @@ -56,20 +56,34 @@ relation_filter_behavior | id id ; +// TODO -- Can the type be determined automatically from the function definition? +// TODO -- Consider moving the run phase to an optional third detail line. +measure_detail + : MEASURE expression (ARROW literal_complex_type)? (ATSIGN id)? SEMICOLON + | FILTER expression SEMICOLON + | INVOCATION id SEMICOLON + | sort_field + ; + relation_detail : COMMON SEMICOLON # relationCommon | BASE_SCHEMA id SEMICOLON # relationUsesSchema | relation_filter_behavior? FILTER expression SEMICOLON # relationFilter - | EXPRESSION expression SEMICOLON (AS id)? # relationExpression - | ADVANCED_EXTENSION SEMICOLON # relationAdvancedExtension - | source_reference SEMICOLON # relationSourceReference + | EXPRESSION expression SEMICOLON (AS id)? # relationExpression + | ADVANCED_EXTENSION SEMICOLON # relationAdvancedExtension + | source_reference SEMICOLON # relationSourceReference + | GROUPING expression SEMICOLON # relationGrouping + | MEASURE LEFTBRACE measure_detail* RIGHTBRACE # relationMeasure + | sort_field # relationSort + | COUNT NUMBER SEMICOLON # relationCount + | TYPE id SEMICOLON # relationJoinType ; expression - : id LEFTPAREN expression (COMMA expression)? COMMA? RIGHTPAREN # expressionFunctionUse - | constant # expressionConstant - | column_name # expressionColumn - | expression AS literal_complex_type # expressionCast + : id LEFTPAREN (expression COMMA?)* RIGHTPAREN # expressionFunctionUse + | constant # expressionConstant + | column_name # expressionColumn + | expression AS literal_complex_type # expressionCast ; constant @@ -83,13 +97,13 @@ constant ; literal_basic_type - : id literal_specifier? QUESTIONMARK? + : id QUESTIONMARK? literal_specifier? ; literal_complex_type : literal_basic_type | LIST QUESTIONMARK? LEFTANGLEBRACKET literal_complex_type? RIGHTANGLEBRACKET - | MAP QUESTIONMARK? LEFTANGLEBRACKET literal_basic_type? COMMA? literal_complex_type? RIGHTANGLEBRACKET + | MAP QUESTIONMARK? LEFTANGLEBRACKET (literal_basic_type COMMA literal_complex_type)? RIGHTANGLEBRACKET | STRUCT QUESTIONMARK? LEFTANGLEBRACKET literal_complex_type? (COMMA literal_complex_type)* RIGHTANGLEBRACKET ; @@ -130,6 +144,7 @@ file_detail : PARTITION_INDEX COLON NUMBER | START COLON NUMBER | LENGTH COLON NUMBER + | ORC COLON LEFTBRACE RIGHTBRACE | file_location ; @@ -152,15 +167,7 @@ schema_definition ; schema_item - : id column_type SEMICOLON - ; - -column_type - : column_attribute* id - ; - -column_attribute - : NULLABLE + : id literal_complex_type SEMICOLON ; source_definition @@ -182,8 +189,12 @@ function : FUNCTION name (AS id)? SEMICOLON ; +sort_field + : SORT expression (BY id)? SEMICOLON + ; + name - : id COLON signature + : id COLON signature? ; signature @@ -192,7 +203,17 @@ signature // List keywords here to make them not reserved. id - : IDENTIFIER (UNDERSCORE+ IDENTIFIER)* + : simple_id (UNDERSCORE+ simple_id)* + ; + +simple_id + : IDENTIFIER | FILTER | SCHEMA + | NULLVAL + | SORT + | MEASURE + | GROUPING + | COUNT + | TYPE ; diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index a501cf4b..166f7b97 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -161,9 +161,9 @@ std::vector getTestCases() { { "test3-schema", R"(schema schema { - r_regionkey UNKNOWN; - r_name nullable UNKNOWN; - r_comment UNKNOWN; + r_regionkey i32; + r_name string?; + r_comment string; })", AllOf( HasSymbols({"schema", "r_regionkey", "r_name", "r_comment"}), @@ -291,6 +291,16 @@ std::vector getTestCases() { })", AllOf(HasSymbols({"myread"}), ParsesOk()), }, + { + "test7-bad-filter-relation", + R"(filter relation filter { + condition true_bool; + })", + HasErrors( + {"2:22 → missing 'FILTER' at 'true'", + "2:12 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", + "2:12 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior."}), + }, { "test10-literals-boolean", R"(project relation literalexamples { @@ -414,8 +424,10 @@ std::vector getTestCases() { expression "two\nlines with \"escapes\""_varchar<80>; expression "abcde"_fixedchar<5>; })", - AsBinaryPlan(Partially(EqualsProto<::substrait::proto::Plan>( - R"(relations { root { input { project { + AllOf( + HasErrors({}), + AsBinaryPlan(Partially(EqualsProto<::substrait::proto::Plan>( + R"(relations { root { input { project { expressions { literal { string: "simple text" } } expressions { literal { string: "123" } } expressions { literal { @@ -434,7 +446,7 @@ std::vector getTestCases() { var_char: { value: "two\nlines with \"escapes\"" length: 80 } } } expressions { literal { fixed_char: "abcde" } } - } } } })"))), + } } } })")))), }, { "test10-literals-strings-nulls", @@ -554,7 +566,7 @@ std::vector getTestCases() { R"(project relation literalexamples { expression {"a", "b", "c"}_list; expression {null, "a", "b"}_list; - expression {{"a", "b"}, {"1", "2"}}_list>?; + expression {{"a", "b"}, {"1", "2"}}_list?>; expression {}_list; expression {}_list; expression {}_list?; @@ -578,10 +590,14 @@ std::vector getTestCases() { values { string: "1" } values { string: "2" } } } } } } - expressions { literal { empty_list { type { string { } } } } } expressions { literal { empty_list { type { string { - nullability: NULLABILITY_NULLABLE } } } } } - expressions { literal { empty_list { type { string { } } + nullability: NULLABILITY_REQUIRED } } + nullability: NULLABILITY_REQUIRED } } } + expressions { literal { empty_list { type { string { + nullability: NULLABILITY_NULLABLE } } + nullability: NULLABILITY_REQUIRED } } } + expressions { literal { empty_list { type { string { + nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_NULLABLE } } } } } } })")), }, @@ -704,6 +720,7 @@ std::vector getTestCases() { expression "unknown\escape"_string; expression {123_i8}_map; expression {123}_map; + expression "abcde"_fixedchar; })", HasErrors({ "9:34 → mismatched input 'r5' expecting NUMBER", @@ -775,6 +792,13 @@ std::vector getTestCases() { } } } } } })"))), }, + { + "test13-bad-functions", + R"(extension_space blah.yaml { + function sum: as sum; + })", + HasErrors({"Functions should have an associated type."}), + }, { "test14-three-node-pipeline-with-fields", R"(pipelines { diff --git a/src/substrait/textplan/tests/CMakeLists.txt b/src/substrait/textplan/tests/CMakeLists.txt index 49c4a2df..9d0ee047 100644 --- a/src/substrait/textplan/tests/CMakeLists.txt +++ b/src/substrait/textplan/tests/CMakeLists.txt @@ -18,3 +18,40 @@ add_test_case( gmock gtest gtest_main) + +add_test_case( + round_trip_test + SOURCES + RoundtripTest.cpp + EXTRA_LINK_LIBS + substrait_textplan_converter + substrait_textplan_loader + substrait_common + substrait_proto + parse_result_matchers + protobuf-matchers + fmt::fmt-header-only + absl::strings + gmock + gtest + gtest_main) + +cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH TEXTPLAN_SOURCE_DIR) + +add_custom_command( + TARGET round_trip_test + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E echo "Copying unit test data.." + COMMAND ${CMAKE_COMMAND} -E make_directory + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" + COMMAND + ${CMAKE_COMMAND} -E copy + "${TEXTPLAN_SOURCE_DIR}/converter/data/q6_first_stage.json" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.json" + COMMAND ${CMAKE_COMMAND} -E copy "${TEXTPLAN_SOURCE_DIR}/data/*.json" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/") + +message( + STATUS + "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" +) diff --git a/src/substrait/textplan/tests/RoundtripTest.cpp b/src/substrait/textplan/tests/RoundtripTest.cpp new file mode 100644 index 00000000..da70c355 --- /dev/null +++ b/src/substrait/textplan/tests/RoundtripTest.cpp @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "absl/strings/str_split.h" +#include "gmock/gmock.h" +#include "substrait/textplan/SymbolTablePrinter.h" +#include "substrait/textplan/converter/LoadBinary.h" +#include "substrait/textplan/converter/ParseBinary.h" +#include "substrait/textplan/parser/ParseText.h" +#include "substrait/textplan/tests/ParseResultMatchers.h" + +using ::protobuf_matchers::EqualsProto; +using ::protobuf_matchers::IgnoringFieldPaths; +using ::protobuf_matchers::Partially; +using ::testing::AllOf; + +namespace io::substrait::textplan { +namespace { + +std::string addLineNumbers(const std::string& text) { + std::stringstream result; + int lineNum = 0; + for (absl::string_view sp : absl::StrSplit(text, '\n')) { + result << std::setw(4) << ++lineNum << " " << sp << std::endl; + } + return result.str(); +} + +class RoundTripBinaryToTextFixture + : public ::testing::TestWithParam {}; + +std::vector getTestCases() { + const std::filesystem::path currPath = std::filesystem::current_path(); + std::vector filenames{}; + std::filesystem::path testDataPath = currPath; + testDataPath.append("data"); + for (auto const& dirEntry : + std::filesystem::recursive_directory_iterator{testDataPath}) { + std::string pathName = dirEntry.path(); + if (pathName.substr(pathName.length() - 5) == ".json") { + filenames.push_back(pathName); + } + } + std::sort(filenames.begin(), filenames.end()); + return filenames; +} + +TEST_P(RoundTripBinaryToTextFixture, RoundTrip) { + auto filename = GetParam(); + std::string json = readFromFile(filename); + auto planOrErrors = loadFromJson(json); + std::vector errors = planOrErrors.errors(); + ASSERT_THAT(errors, ::testing::ElementsAre()); + + auto plan = *planOrErrors; + + auto textResult = parseBinaryPlan(plan); + auto symbols = textResult.getSymbolTable().getSymbols(); + + std::string outputText = + SymbolTablePrinter::outputToText(textResult.getSymbolTable()); + + auto stream = loadTextString(outputText); + try { + auto result = parseStream(stream); + auto outputBinary = + SymbolTablePrinter::outputToBinaryPlan(result.getSymbolTable()); + + ASSERT_THAT( + result, + ::testing::AllOf( + ParsesOk(), + HasErrors({}), + AsBinaryPlan(IgnoringFieldPaths( + {"extension_uris", "extensions"}, EqualsProto(plan))))) + << std::endl + << "Intermediate result:" << std::endl + << addLineNumbers(outputText); + } catch (std::invalid_argument ex) { + FAIL() << ex.what(); + } +} + +INSTANTIATE_TEST_SUITE_P( + RoundTripBinaryToTextTests, + RoundTripBinaryToTextFixture, + ::testing::ValuesIn(getTestCases()), + [](const testing::TestParamInfo& info) { + std::string identifier = info.param; + auto lastSlash = identifier.find_last_of('/'); + if (lastSlash != std::string::npos) { + identifier = identifier.substr(lastSlash); + } + if (identifier.length() > 5 && + identifier.substr(identifier.length() - 5) == ".json") { + identifier = identifier.substr(0, identifier.length() - 5); + } + + // Remove non-alphanumeric characters to make the test framework happy. + identifier.erase( + std::remove_if( + identifier.begin(), + identifier.end(), + [](auto const& c) -> bool { return !std::isalnum(c); }), + identifier.end()); + return identifier; + }); + +} // namespace +} // namespace io::substrait::textplan diff --git a/src/substrait/type/tests/TypeTest.cpp b/src/substrait/type/tests/TypeTest.cpp index 0f1c2c2d..f7de5859 100644 --- a/src/substrait/type/tests/TypeTest.cpp +++ b/src/substrait/type/tests/TypeTest.cpp @@ -2,6 +2,7 @@ #include +#include #include "substrait/type/Type.h" using namespace io::substrait; @@ -147,6 +148,14 @@ TEST_F(TypeTest, decodeTest) { ASSERT_EQ(typePtr->scale(), 2); }); + testDecode( + "decimal?<18,2>", [](const std::shared_ptr& typePtr) { + ASSERT_EQ(typePtr->signature(), "dec<18,2>"); + ASSERT_EQ(typePtr->precision(), 18); + ASSERT_EQ(typePtr->scale(), 2); + ASSERT_TRUE(typePtr->nullable()); + }); + testDecode( "struct", [](const std::shared_ptr& typePtr) { From 32380f280561f0c49e77e832028ef87d4441593a Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 1 Jun 2023 23:29:23 -0700 Subject: [PATCH 03/33] Convert to using type over string. --- .../textplan/parser/SubstraitPlanRelationVisitor.cpp | 4 +--- src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 9537fc36..a3dde516 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -394,9 +394,7 @@ std::any SubstraitPlanRelationVisitor::visitRelationUsesSchema( continue; } schema->add_names(sym.name); - auto typeText = ANY_CAST(std::string, sym.blob); - // TODO -- Use the location of the schema item for errors. - auto typeProto = textToTypeProto(ctx, typeText); + auto typeProto = ANY_CAST(::substrait::proto::Type, sym.blob); if (typeProto.kind_case() != ::substrait::proto::Type::KIND_NOT_SET) { *schema->mutable_struct_()->add_types() = typeProto; } diff --git a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp index 8e050004..478b5af0 100644 --- a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp @@ -181,6 +181,11 @@ ::substrait::proto::Type SubstraitPlanTypeVisitor::typeToProto( } case TypeKind::kMap: { auto map = reinterpret_cast(&decodedType); + if (map->keyType() == nullptr || map->valueType() == nullptr) { + errorListener_->addError( + ctx->getStart(), "Maps require both a key and a value type."); + break; + } *type.mutable_map()->mutable_key() = typeToProto(ctx, *map->keyType()); *type.mutable_map()->mutable_value() = typeToProto(ctx, *map->valueType()); From 252b61e6b79ab9b70269fb3f99faa0ad9aeb6286 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 11:41:00 -0700 Subject: [PATCH 04/33] Fix for binary->text nullability for fixedchar and varchar. --- .../textplan/converter/PlanPrinterVisitor.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index 15ddcee5..ad1d04c6 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -44,11 +44,14 @@ std::string stringEscape(std::string_view str) { std::string invocationToString( ::substrait::proto::AggregateFunction_AggregationInvocation invocation) { switch (invocation) { - case ::substrait::proto::AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_ALL: + case ::substrait::proto:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_ALL: return "all"; - case ::substrait::proto::AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_DISTINCT: + case ::substrait::proto:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_DISTINCT: return "distinct"; - case ::substrait::proto::AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED: + case ::substrait::proto:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED: default: return "unspecified"; } @@ -190,7 +193,8 @@ std::any PlanPrinterVisitor::visitType(const ::substrait::proto::Type& type) { case ::substrait::proto::Type::kVarchar: { std::stringstream result; result << "varchar"; - if (type.varchar().nullability()) { + if (type.varchar().nullability() == + ::substrait::proto::Type::NULLABILITY_NULLABLE) { result << "?"; } result << "<" << type.varchar().length() << ">"; @@ -199,7 +203,8 @@ std::any PlanPrinterVisitor::visitType(const ::substrait::proto::Type& type) { case ::substrait::proto::Type::kFixedChar: { std::stringstream result; result << "fixedchar"; - if (type.fixed_char().nullability()) { + if (type.fixed_char().nullability() == + ::substrait::proto::Type::NULLABILITY_NULLABLE) { result << "?"; } result << "<" << type.fixed_char().length() << ">"; From 8d7d21fd947654f0464938fd6ba95871cc2340e9 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 14:15:45 -0700 Subject: [PATCH 05/33] Various fixes --- .../textplan/parser/grammar/SubstraitPlanParser.g4 | 2 +- .../textplan/parser/tests/TextPlanParserTest.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 b/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 index 8c72a665..f5520241 100644 --- a/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 +++ b/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 @@ -103,7 +103,7 @@ literal_basic_type literal_complex_type : literal_basic_type | LIST QUESTIONMARK? LEFTANGLEBRACKET literal_complex_type? RIGHTANGLEBRACKET - | MAP QUESTIONMARK? LEFTANGLEBRACKET (literal_basic_type COMMA literal_complex_type)? RIGHTANGLEBRACKET + | MAP QUESTIONMARK? LEFTANGLEBRACKET literal_basic_type? COMMA? literal_complex_type? RIGHTANGLEBRACKET | STRUCT QUESTIONMARK? LEFTANGLEBRACKET literal_complex_type? (COMMA literal_complex_type)* RIGHTANGLEBRACKET ; diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 166f7b97..fd15c853 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -611,7 +611,7 @@ std::vector getTestCases() { })", AsBinaryPlan((EqualsProto<::substrait::proto::Plan>( R"(relations { root { input { project { - expressions { literal { null { list { type { string { } } + expressions { literal { null { list { type { string { nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_NULLABLE } } } } expressions { literal { null { list { type { list { type { string { @@ -634,11 +634,11 @@ std::vector getTestCases() { AsBinaryPlan(EqualsProto<::substrait::proto::Plan>( R"(relations { root { input { project { expressions { literal { map { - key_values { key { i16: 42 } value { string: "life" } } - key_values { key { i16: 32 } value { string: "everything" } } + key_values { key { i16: 42 nullable: true } value { string: "life" } } + key_values { key { i16: 32 nullable: true } value { string: "everything" } } } } } expressions { literal { - empty_map { key { fp32 {} } value { string { } } } } } + empty_map { key { fp32 { nullability: NULLABILITY_REQUIRED } } value { string {nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_REQUIRED} } } } } } })")), }, { From 5bb355ecff8445504ac08a55c76af8274afd5a9f Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 14:17:22 -0700 Subject: [PATCH 06/33] Minor cleanup. --- .../parser/SubstraitPlanTypeVisitor.cpp | 24 ++----------------- .../parser/SubstraitPlanTypeVisitor.h | 2 -- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp index 478b5af0..c015f394 100644 --- a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp @@ -11,34 +11,14 @@ namespace io::substrait::textplan { -std::any SubstraitPlanTypeVisitor::visitLiteral_specifier( - SubstraitPlanParser::Literal_specifierContext* ctx) { - // Provides detail for the width of the type. - return visitChildren(ctx); -} - std::any SubstraitPlanTypeVisitor::visitLiteral_basic_type( SubstraitPlanParser::Literal_basic_typeContext* ctx) { - std::__1::shared_ptr decodedType; - try { - decodedType = Type::decode(ctx->getText()); - } catch (...) { - errorListener_->addError(ctx->getStart(), "Failed to decode type."); - return ::substrait::proto::Type{}; - } - return typeToProto(ctx, *decodedType); + return textToTypeProto(ctx, ctx->getText()); } std::any SubstraitPlanTypeVisitor::visitLiteral_complex_type( SubstraitPlanParser::Literal_complex_typeContext* ctx) { - std::shared_ptr decodedType; - try { - decodedType = Type::decode(ctx->getText()); - } catch (...) { - errorListener_->addError(ctx->getStart(), "Failed to decode type."); - return ::substrait::proto::Type{}; - } - return typeToProto(ctx, *decodedType); + return textToTypeProto(ctx, ctx->getText()); } ::substrait::proto::Type SubstraitPlanTypeVisitor::textToTypeProto( diff --git a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h index aa61ea91..51d0efd8 100644 --- a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h +++ b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h @@ -23,8 +23,6 @@ class SubstraitPlanTypeVisitor : public SubstraitPlanParserBaseVisitor { errorListener_ = std::move(errorListener); } - std::any visitLiteral_specifier( - SubstraitPlanParser::Literal_specifierContext* ctx) override; std::any visitLiteral_basic_type( SubstraitPlanParser::Literal_basic_typeContext* ctx) override; std::any visitLiteral_complex_type( From 4fbcf67893be168c6aa5ff1b3223a08f811c0b2c Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 14:38:37 -0700 Subject: [PATCH 07/33] Fixed more occurrences of nullability not being handled correctly. --- .../parser/SubstraitPlanRelationVisitor.cpp | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index a3dde516..3c22b163 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -1189,7 +1189,8 @@ SubstraitPlanRelationVisitor::visitNumber( case ::substrait::proto::Type::kI8: { int32_t val = std::stoi(node->getText()); literal.set_i8(val); - if (literalType.i8().nullability()) { + if (literalType.i8().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { literal.set_nullable(true); } break; @@ -1197,7 +1198,8 @@ SubstraitPlanRelationVisitor::visitNumber( case ::substrait::proto::Type::kI16: { int32_t val = std::stoi(node->getText()); literal.set_i16(val); - if (literalType.i16().nullability()) { + if (literalType.i16().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { literal.set_nullable(true); } break; @@ -1205,7 +1207,8 @@ SubstraitPlanRelationVisitor::visitNumber( case ::substrait::proto::Type::kI32: { int32_t val = std::stoi(node->getText()); literal.set_i32(val); - if (literalType.i32().nullability()) { + if (literalType.i32().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { literal.set_nullable(true); } break; @@ -1213,7 +1216,8 @@ SubstraitPlanRelationVisitor::visitNumber( case ::substrait::proto::Type::kI64: { int64_t val = std::stol(node->getText()); literal.set_i64(val); - if (literalType.i64().nullability()) { + if (literalType.i64().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { literal.set_nullable(true); } break; @@ -1221,7 +1225,8 @@ SubstraitPlanRelationVisitor::visitNumber( case ::substrait::proto::Type::kFp32: { float val = std::stof(node->getText()); literal.set_fp32(val); - if (literalType.fp32().nullability()) { + if (literalType.fp32().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { literal.set_nullable(true); } break; @@ -1229,7 +1234,8 @@ SubstraitPlanRelationVisitor::visitNumber( case ::substrait::proto::Type::kFp64: { double val = std::stod(node->getText()); literal.set_fp64(val); - if (literalType.fp64().nullability()) { + if (literalType.fp64().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { literal.set_nullable(true); } break; @@ -1245,7 +1251,8 @@ SubstraitPlanRelationVisitor::visitNumber( break; } *literal.mutable_decimal() = decimal.toProto(); - if (literalType.decimal().nullability()) { + if (literalType.decimal().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { literal.set_nullable(true); } break; From 26c701fefc2a73ccac3b314f1334760514fe1bb3 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 14:41:19 -0700 Subject: [PATCH 08/33] Updated based on grammar changes. --- src/substrait/textplan/parser/tests/TextPlanParserTest.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index fd15c853..e4dccc8e 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -960,7 +960,8 @@ std::vector getTestCases() { HasErrors({ "1:0 → extraneous input 'relation' expecting {, " "'EXTENSION_SPACE', 'SCHEMA', 'PIPELINES', 'FILTER', " - "'SOURCE', IDENTIFIER}", + "'GROUPING', 'MEASURE', 'SORT', 'COUNT', 'TYPE', 'SOURCE', " + "'NULL', IDENTIFIER}", "1:24 → mismatched input '{' expecting 'RELATION'", "1:9 → Unrecognized relation type: notyperelation", }), From 6a7ee481723f1e72fc2711ec109add60c258909d Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 14:43:01 -0700 Subject: [PATCH 09/33] Fixed test to match recent change to numbering strategy. --- src/substrait/textplan/parser/tests/TextPlanParserTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index e4dccc8e..8d1dff1d 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -783,8 +783,8 @@ std::vector getTestCases() { })", AsBinaryPlan(Partially(EqualsProto<::substrait::proto::Plan>( R"(relations { root { input { project { - expressions { scalar_function { function_reference: 1 - arguments { value { scalar_function { function_reference: 2 + expressions { scalar_function { function_reference: 0 + arguments { value { scalar_function { function_reference: 1 arguments { value { literal { i32: 1 } } } arguments { value { literal { i32: -2 } } } } } } From 248ef102c83a72839de03bcd515c8f84c7e2196c Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 14:51:41 -0700 Subject: [PATCH 10/33] Added newly emitted fields to an earlier test. --- .../parser/tests/TextPlanParserTest.cpp | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 8d1dff1d..a7d3fc27 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -232,7 +232,24 @@ std::vector getTestCases() { function concat:str as concat; })")), AsBinaryPlan(EqualsProto<::substrait::proto::Plan>( - R"(relations { root { input { project { + R"(extension_uris { + extension_uri_anchor: 1 uri: "blah.yaml" + } + extensions { + extension_function { + extension_uri_reference: 1 name: "add:i8" } + } + extensions { + extension_function { + extension_uri_reference: 1 function_anchor: 1 + name: "subtract:i8" } + } + extensions { + extension_function { + extension_uri_reference: 1 function_anchor: 2 + name: "concat:str" } + } + relations { root { input { project { expressions { selection { direct_reference { @@ -261,15 +278,15 @@ std::vector getTestCases() { } } expressions { scalar_function { - function_reference: 1 arguments { value { selection { + function_reference: 0 arguments { value { selection { direct_reference { struct_field { } } } } } arguments { value { literal { i8: 1 } } } } } expressions { scalar_function { - function_reference: 2 arguments { value { selection { + function_reference: 1 arguments { value { selection { direct_reference { struct_field { } } } } } arguments { value { literal { i8: 1 } } } } } expressions { scalar_function { - function_reference: 3 arguments { value { selection { + function_reference: 2 arguments { value { selection { direct_reference { struct_field { field: 1 } } } } } arguments { value { selection { direct_reference { struct_field { field: 1 } } } } } } } From 72b6dbe51a70fa22e80143d4686a1599af97ca96 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 15:00:24 -0700 Subject: [PATCH 11/33] Added an option to allow the roundtrip tests to be skipped (useful until all of the features have caught up. --- src/substrait/textplan/tests/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/substrait/textplan/tests/CMakeLists.txt b/src/substrait/textplan/tests/CMakeLists.txt index 9d0ee047..0e7ba329 100644 --- a/src/substrait/textplan/tests/CMakeLists.txt +++ b/src/substrait/textplan/tests/CMakeLists.txt @@ -19,6 +19,12 @@ add_test_case( gtest gtest_main) +option( + SUBSTRAIT_CPP_ROUNDTRIP_TESTING + "Enable substrait-cpp textplan roundtrip tests." + OFF) + +if(${SUBSTRAIT_CPP_ROUNDTRIP_TESTING}) add_test_case( round_trip_test SOURCES @@ -55,3 +61,4 @@ message( STATUS "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" ) +endif() From 1ba347c7a0be7ac65712c8ad2db55d5da6280876 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 16:43:33 -0700 Subject: [PATCH 12/33] More nullability fixes. --- .../textplan/parser/tests/TextPlanParserTest.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index a7d3fc27..c18b259f 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -632,13 +632,13 @@ std::vector getTestCases() { nullability: NULLABILITY_NULLABLE } } } } expressions { literal { null { list { type { list { type { string { - nullability: NULLABILITY_NULLABLE } } } } + nullability: NULLABILITY_NULLABLE } } nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_NULLABLE } } } } expressions { literal { null { list { type { - list { type { string { } } } } + list { type { string { nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_NULLABLE } } } } expressions { literal { null { list { type { - list { type { string { } } } } + list { type { string { nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_NULLABLE } } } } } } } })"))), }, @@ -651,8 +651,8 @@ std::vector getTestCases() { AsBinaryPlan(EqualsProto<::substrait::proto::Plan>( R"(relations { root { input { project { expressions { literal { map { - key_values { key { i16: 42 nullable: true } value { string: "life" } } - key_values { key { i16: 32 nullable: true } value { string: "everything" } } + key_values { key { i16: 42 } value { string: "life" } } + key_values { key { i16: 32 } value { string: "everything" } } } } } expressions { literal { empty_map { key { fp32 { nullability: NULLABILITY_REQUIRED } } value { string {nullability: NULLABILITY_REQUIRED } } nullability: NULLABILITY_REQUIRED} } } From 6ca2b4e0bc664f800c4c7f799076d689c889d202 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 2 Jun 2023 18:23:08 -0700 Subject: [PATCH 13/33] Update type to match original json plan. --- src/substrait/textplan/parser/data/provided_sample1.splan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/substrait/textplan/parser/data/provided_sample1.splan b/src/substrait/textplan/parser/data/provided_sample1.splan index 80b78216..ed3a0dd3 100644 --- a/src/substrait/textplan/parser/data/provided_sample1.splan +++ b/src/substrait/textplan/parser/data/provided_sample1.splan @@ -19,7 +19,7 @@ read relation read { schema schema { r_regionkey i32; r_name string; - r_comment string; + r_comment string?; } source named_table named { From bf29afab7ad7019f436dc1adb070f391215917bc Mon Sep 17 00:00:00 2001 From: David Sisson Date: Sat, 3 Jun 2023 01:31:19 -0700 Subject: [PATCH 14/33] Broke the bad literals test into multiple to make it easier to debug. --- .../parser/SubstraitPlanTypeVisitor.cpp | 5 +- .../parser/tests/TextPlanParserTest.cpp | 108 +++++++++++------- 2 files changed, 72 insertions(+), 41 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp index c015f394..403caa22 100644 --- a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.cpp @@ -187,7 +187,10 @@ bool SubstraitPlanTypeVisitor::insideStructLiteralWithExternalType( if (ctx == nullptr) { return false; } - if (ctx->getRuleIndex() == SubstraitPlanParser::RuleConstant) { + if (ctx->getRuleIndex() == SubstraitPlanParser::RuleConstant && + const_cast( + dynamic_cast(ctx)) + ->struct_literal() != nullptr) { return true; } return insideStructLiteralWithExternalType( diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index c18b259f..91fd3960 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -714,18 +714,61 @@ std::vector getTestCases() { }} } })"))), }, { - "test11-bad-literals", + "test11-bad-numeric-literals", R"(project relation literalexamples { expression 1; expression 1.5; expression 1_potato; - expression "data"_potato; expression null; - expression "ddb287e8"_uuid; - expression "nothex"_uuid; expression 42_decimal; expression 42_decimal; expression 42_decimal<-5,-4>; + })", + HasErrors({ + "6:34 → mismatched input 'r5' expecting NUMBER", + "6:36 → mismatched input ',' expecting 'FILTER'", + "7:34 → mismatched input 'r' expecting NUMBER", + "7:35 → mismatched input ',' expecting 'FILTER'", + "4:25 → Unable to recognize requested type.", + "6:26 → Failed to decode type.", + "7:26 → Failed to decode type.", + "2:23 → Literals should include a type.", + "3:23 → Literals should include a type.", + "4:25 → Unable to recognize requested type.", + "5:23 → Null literals require type.", + "6:26 → Failed to decode type.", + "6:34 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", + "6:34 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", + "6:34 → Filters are not permitted for this kind of relation.", + "7:26 → Failed to decode type.", + "7:34 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", + "7:34 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", + "7:34 → Filters are not permitted for this kind of relation.", + "8:23 → Could not parse literal as decimal.", + }), + }, + { + "test11-bad-stringlike-literals", + R"(project relation literalexamples { + expression "data"_potato; + expression "ddb287e8"_uuid; + expression "nothex"_uuid; + expression "unknown\escape"_string; + expression "abcde"_fixedchar; + })", + HasErrors({ + "2:30 → Unable to recognize requested type.", + "6:31 → Unable to recognize requested type.", + "2:30 → Unable to recognize requested type.", + "3:23 → UUIDs are 128 bits long and thus should be specified with exactly 32 hexadecimal digits.", + "4:23 → UUIDs should be be specified with hexadecimal characters with optional dashes only.", + "5:31 → Unknown slash escape sequence.", + "6:31 → Unable to recognize requested type.", + }), + }, + { + "test11-bad-complex-literals", + R"(project relation literalexamples { expression {}_list?; expression {}_struct; expression {}_struct<>; @@ -734,46 +777,31 @@ std::vector getTestCases() { expression {}_map<,string>; expression {}_map<,>; expression {}_list<>; - expression "unknown\escape"_string; expression {123_i8}_map; expression {123}_map; - expression "abcde"_fixedchar; })", HasErrors({ - "9:34 → mismatched input 'r5' expecting NUMBER", - "9:36 → mismatched input ',' expecting 'FILTER'", - "10:34 → mismatched input 'r' expecting NUMBER", - "10:35 → mismatched input ',' expecting 'FILTER'", - "12:38 → extraneous input '?' expecting ';'", - "2:23 → Literals should include a type.", - "3:23 → Literals should include a type.", - "4:25 → Unable to recognize requested type.", - "5:30 → Unable to recognize requested type.", - "6:23 → Null literals require type.", - "7:23 → UUIDs are 128 bits long and thus should be specified with exactly 32 hexadecimal digits.", - "8:23 → UUIDs should be be specified with hexadecimal characters with optional dashes only.", - "9:26 → Failed to decode type.", - "9:34 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", - "9:34 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", - "9:34 → Filters are not permitted for this kind of relation.", - "10:26 → Failed to decode type.", - "10:34 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", - "10:34 → Best effort and post join are the only two legal filter behavior choices. You may also not provide one which will result to the default filter behavior.", - "10:34 → Filters are not permitted for this kind of relation.", - "11:23 → Could not parse literal as decimal.", - "13:26 → Unable to recognize requested type.", - "14:26 → Unable to recognize requested type.", - "15:26 → Maps require both a key and a value type.", - "15:23 → Unsupported type 0.", - "16:26 → Maps require both a key and a value type.", - "16:23 → Unsupported type 0.", - "17:26 → Unable to recognize requested type.", - "18:26 → Unable to recognize requested type.", - "18:26 → Unable to recognize requested type.", - "19:26 → Unable to recognize requested type.", - "20:31 → Unknown slash escape sequence.", - "21:23 → Map literals require pairs of values separated by colons.", - "22:23 → Map literals require pairs of values separated by colons.", + "2:38 → extraneous input '?' expecting ';'", + "3:26 → Unable to recognize requested type.", + "4:26 → Unable to recognize requested type.", + "5:26 → Maps require both a key and a value type.", + "6:26 → Maps require both a key and a value type.", + "7:26 → Unable to recognize requested type.", + "8:26 → Unable to recognize requested type.", + "8:26 → Unable to recognize requested type.", + "9:26 → Unable to recognize requested type.", + "3:26 → Unable to recognize requested type.", + "4:26 → Unable to recognize requested type.", + "5:26 → Maps require both a key and a value type.", + "5:23 → Unsupported type 0.", + "6:26 → Maps require both a key and a value type.", + "6:23 → Unsupported type 0.", + "7:26 → Unable to recognize requested type.", + "8:26 → Unable to recognize requested type.", + "8:26 → Unable to recognize requested type.", + "9:26 → Unable to recognize requested type.", + "10:23 → Map literals require pairs of values separated by colons.", + "11:23 → Map literals require pairs of values separated by colons.", }), }, { From fafc97254efda51c12a4eb15378052af2519c0e4 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Sat, 3 Jun 2023 01:47:50 -0700 Subject: [PATCH 15/33] Now detects when functions are missing type declarations. --- .../textplan/parser/SubstraitPlanVisitor.cpp | 13 ++++++++++++- .../textplan/parser/tests/TextPlanParserTest.cpp | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp index d52ed537..a6c8cd96 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp @@ -96,6 +96,16 @@ std::any SubstraitPlanVisitor::visitFunction( referenceName = ""; } + // We do not yet examine the type of functions but we look for presence. + if (ctx->name() != nullptr) { + auto colonPos = ctx->name()->getText().find_first_of(':'); + if (colonPos == std::string::npos || + ctx->name()->getText().substr(colonPos+1).empty()) { + errorListener_->addError( + ctx->getStart(), "Functions should have an associated type."); + } + } + symbolTable_->defineSymbol( referenceName, Location(ctx), @@ -125,7 +135,8 @@ std::any SubstraitPlanVisitor::visitSchema_definition( defaultResult(), defaultResult()); - // Mark all of the schema items so we can find the ones related to this schema. + // Mark all of the schema items so we can find the ones related to this + // schema. for (const auto& item : ctx->schema_item()) { auto symbol = ANY_CAST(SymbolInfo*, visitSchema_item(item)); if (symbol == nullptr) { diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 91fd3960..5637c387 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -842,7 +842,7 @@ std::vector getTestCases() { R"(extension_space blah.yaml { function sum: as sum; })", - HasErrors({"Functions should have an associated type."}), + HasErrors({"2:12 → Functions should have an associated type."}), }, { "test14-three-node-pipeline-with-fields", From 2187faf2834ed3a663f1f2b3c5dbb36ff15a9514 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Jun 2023 01:05:52 -0700 Subject: [PATCH 16/33] Removed one case of duplicate error messages. --- .../textplan/parser/SubstraitPlanVisitor.cpp | 3 ++- .../textplan/parser/tests/TextPlanParserTest.cpp | 13 ------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp index a6c8cd96..226742cf 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp @@ -259,7 +259,8 @@ std::any SubstraitPlanVisitor::visitStruct_literal( std::any SubstraitPlanVisitor::visitConstant( SubstraitPlanParser::ConstantContext* ctx) { - return visitChildren(ctx); + // No need to examine these just yet, we will do this in the next pass. + return defaultResult(); } std::any SubstraitPlanVisitor::visitColumn_name( diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 5637c387..6ff2b9c5 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -729,9 +729,6 @@ std::vector getTestCases() { "6:36 → mismatched input ',' expecting 'FILTER'", "7:34 → mismatched input 'r' expecting NUMBER", "7:35 → mismatched input ',' expecting 'FILTER'", - "4:25 → Unable to recognize requested type.", - "6:26 → Failed to decode type.", - "7:26 → Failed to decode type.", "2:23 → Literals should include a type.", "3:23 → Literals should include a type.", "4:25 → Unable to recognize requested type.", @@ -757,8 +754,6 @@ std::vector getTestCases() { expression "abcde"_fixedchar; })", HasErrors({ - "2:30 → Unable to recognize requested type.", - "6:31 → Unable to recognize requested type.", "2:30 → Unable to recognize requested type.", "3:23 → UUIDs are 128 bits long and thus should be specified with exactly 32 hexadecimal digits.", "4:23 → UUIDs should be be specified with hexadecimal characters with optional dashes only.", @@ -785,14 +780,6 @@ std::vector getTestCases() { "3:26 → Unable to recognize requested type.", "4:26 → Unable to recognize requested type.", "5:26 → Maps require both a key and a value type.", - "6:26 → Maps require both a key and a value type.", - "7:26 → Unable to recognize requested type.", - "8:26 → Unable to recognize requested type.", - "8:26 → Unable to recognize requested type.", - "9:26 → Unable to recognize requested type.", - "3:26 → Unable to recognize requested type.", - "4:26 → Unable to recognize requested type.", - "5:26 → Maps require both a key and a value type.", "5:23 → Unsupported type 0.", "6:26 → Maps require both a key and a value type.", "6:23 → Unsupported type 0.", From 29374fdd6e836d9ef10952b7954bbc02573abf4d Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Jun 2023 11:09:18 -0700 Subject: [PATCH 17/33] Fixed bad merge. --- .../parser/SubstraitPlanRelationVisitor.cpp | 63 ++++++++++++------- .../parser/tests/TextPlanParserTest.cpp | 14 ++--- 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 3c22b163..268ba48e 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -351,10 +351,12 @@ std::any SubstraitPlanRelationVisitor::visitRelationFilter( "specified."); break; } +#if 1 if (result.type() != typeid(::substrait::proto::Expression)) { // MEGAHACK - expression not of the right type needs to be returned return defaultResult(); } +#endif *parentRelationData->relation.mutable_filter()->mutable_condition() = ANY_CAST(::substrait::proto::Expression, result); } else { @@ -664,20 +666,32 @@ std::any SubstraitPlanRelationVisitor::visitRelationSourceReference( auto parentRelationData = ANY_CAST(std::shared_ptr, parentSymbol.blob); auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); - switch (parentRelationType) { - case RelationType::kRead: - *parentRelationData->relation.mutable_read()->mutable_base_schema() = - constructSchema(parentSymbol); - break; - default: - errorListener_->addError( - ctx->getStart(), - "Only read relations support base_schema properties."); - break; + + if (parentRelationType == RelationType::kRead) { + auto sourceName = ctx->source_reference()->id()->getText(); + auto* symbol = symbolTable_->lookupSymbolByName(sourceName); + if (symbol != nullptr) { + auto* source = + parentRelationData->relation.mutable_read()->mutable_named_table(); + for (const auto& sym : *symbolTable_) { + if (sym.type != SymbolType::kSourceDetail) { + continue; + } + if (sym.location != symbol->location) { + continue; + } + source->add_names(sym.name); + } + } + } else { + errorListener_->addError( + ctx->getStart(), + "Source references are not defined for this kind of relation."); } return defaultResult(); } + std::any SubstraitPlanRelationVisitor::visitRelationSort( SubstraitPlanParser::RelationSortContext* ctx) { auto parentSymbol = symbolTable_->lookupSymbolByLocation( @@ -701,19 +715,22 @@ std::any SubstraitPlanRelationVisitor::visitRelationSort( std::any SubstraitPlanRelationVisitor::visitExpression( SubstraitPlanParser::ExpressionContext* ctx) { - if (dynamic_cast(ctx)) { - return visitExpressionFunctionUse( - dynamic_cast(ctx)); - } else if (dynamic_cast( - ctx)) { - return visitExpressionConstant( - dynamic_cast(ctx)); - } else if (dynamic_cast(ctx)) { - return visitExpressionColumn( - dynamic_cast(ctx)); - } else if (dynamic_cast(ctx)) { - return visitExpressionCast( - dynamic_cast(ctx)); + if (auto* funcUseCtx = + dynamic_cast( + ctx)) { + return visitExpressionFunctionUse(funcUseCtx); + } else if ( + auto* constantCtx = + dynamic_cast(ctx)) { + return visitExpressionConstant(constantCtx); + } else if ( + auto* columnCtx = + dynamic_cast(ctx)) { + return visitExpressionColumn(columnCtx); + } else if ( + auto* castCtx = + dynamic_cast(ctx)) { + return visitExpressionCast(castCtx); } return defaultResult(); } diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 6ff2b9c5..a425b33c 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -918,9 +918,9 @@ std::vector getTestCases() { names: "product_id" names: "count" struct { - types { i32 { } } - types { i32 { } } - types { i64 { } } } + types { i32 { nullability: NULLABILITY_REQUIRED } } + types { i32 { nullability: NULLABILITY_REQUIRED } } + types { i64 { nullability: NULLABILITY_REQUIRED } } } } named_table { names: "#1" } } @@ -931,8 +931,8 @@ std::vector getTestCases() { names: "product_id" names: "cost" struct { - types { i32 { } } - types { fp32 { } } } + types { i32 { nullability: NULLABILITY_REQUIRED } } + types { fp32 { nullability: NULLABILITY_REQUIRED } } } } named_table { names: "#2" } } @@ -963,8 +963,8 @@ std::vector getTestCases() { names: "company" names: "order_id" struct { - types { string { } } - types { i32 { } } + types { string { nullability: NULLABILITY_REQUIRED } } + types { i32 { nullability: NULLABILITY_REQUIRED } } } } named_table { names: "#3" } From fa7d5aab16fa420295fa547721f4f4a1fdf40f62 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Jun 2023 11:27:11 -0700 Subject: [PATCH 18/33] Fixed sort desc nulls first. --- src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 268ba48e..5965f131 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -1522,7 +1522,7 @@ int32_t SubstraitPlanRelationVisitor::visitSortDirection( return ::substrait::proto::SortField::SORT_DIRECTION_ASC_NULLS_FIRST; } else if (id == "ascnullslast") { return ::substrait::proto::SortField::SORT_DIRECTION_ASC_NULLS_LAST; - } else if (id == "descnullslast") { + } else if (id == "descnullsfirst") { return ::substrait::proto::SortField::SORT_DIRECTION_DESC_NULLS_FIRST; } else if (id == "descnullslast") { return ::substrait::proto::SortField::SORT_DIRECTION_DESC_NULLS_LAST; From 4d67bc64dbf1b8c2d694cc37bbbaa5916caa70b9 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Jun 2023 11:27:53 -0700 Subject: [PATCH 19/33] Now warns if roundtrip testing is off. --- src/substrait/textplan/tests/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/substrait/textplan/tests/CMakeLists.txt b/src/substrait/textplan/tests/CMakeLists.txt index 0e7ba329..0657a24d 100644 --- a/src/substrait/textplan/tests/CMakeLists.txt +++ b/src/substrait/textplan/tests/CMakeLists.txt @@ -61,4 +61,12 @@ message( STATUS "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" ) +else() + +message( + STATUS + "Round trip testing is turned off. Add SUBSTRAIT_CPP_ROUNDTRIP_TESTING=on to enable." +) + endif() +unset(SUBSTRAIT_CPP_ROUNDTRIP_TESTING CACHE) From 5cbc9c0f79b2b4aaa86d85c6dbb5c488a1fe00b6 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 6 Jun 2023 13:34:51 -0700 Subject: [PATCH 20/33] Addressed review comments. --- .../parser/SubstraitPlanRelationVisitor.cpp | 4 +++- .../textplan/parser/tests/TextPlanParserTest.cpp | 15 ++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 5965f131..3006cfc5 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -790,7 +790,9 @@ std::any SubstraitPlanRelationVisitor::visitExpressionColumn( expr.mutable_selection() ->mutable_direct_reference() ->mutable_struct_field() - ->set_field(fieldReference); + ->set_field(static_cast( + (currentFieldNumber - relationData->fieldReferences.begin()) & + std::numeric_limits::max)); } visitChildren(ctx); diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index a425b33c..5fc70f8d 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -797,11 +797,16 @@ std::vector getTestCases() { expression 123_i8 AS i32; expression 123_i8 AS i32 AS i64; })", - AsBinaryPlan(Partially(EqualsProto<::substrait::proto::Plan>( - R"(relations { root { input { project { - expressions { cast { type { i32 {} } input { literal { i8: 123 } } } } - expressions { cast { type { i64 {} } input { cast { type { i32 {} } input { literal { i8: 123 } } } } } } - } } } })"))), + AllOf( + HasErrors({}), + AsBinaryPlan(EqualsProto<::substrait::proto::Plan>( + R"(relations { root { input { project { + expressions { cast { type { i32 {} } + input { literal { i8: 123 } } } } + expressions { cast { type { i64 {} } + input { cast { type { i32 {} } + input { literal { i8: 123 } } } } } } + } } } })"))), }, { "test13-functions", From 21c9c1b247a3a6820350d3f4a76c6b9baf3d3c73 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 6 Jun 2023 13:49:18 -0700 Subject: [PATCH 21/33] Bug fix. --- .../textplan/parser/SubstraitPlanRelationVisitor.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 3006cfc5..5965f131 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -790,9 +790,7 @@ std::any SubstraitPlanRelationVisitor::visitExpressionColumn( expr.mutable_selection() ->mutable_direct_reference() ->mutable_struct_field() - ->set_field(static_cast( - (currentFieldNumber - relationData->fieldReferences.begin()) & - std::numeric_limits::max)); + ->set_field(fieldReference); } visitChildren(ctx); From 2eb897e6f476be9890ca1b294d07f5292795573b Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 6 Jun 2023 14:47:01 -0700 Subject: [PATCH 22/33] Updated --- .../textplan/parser/tests/TextPlanParserTest.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 5fc70f8d..13e8eef6 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -801,10 +801,13 @@ std::vector getTestCases() { HasErrors({}), AsBinaryPlan(EqualsProto<::substrait::proto::Plan>( R"(relations { root { input { project { - expressions { cast { type { i32 {} } + expressions { cast { type { i32 { + nullability: NULLABILITY_REQUIRED } } input { literal { i8: 123 } } } } - expressions { cast { type { i64 {} } - input { cast { type { i32 {} } + expressions { cast { type { i64 { + nullability: NULLABILITY_REQUIRED } } + input { cast { type { i32 { + nullability: NULLABILITY_REQUIRED } } input { literal { i8: 123 } } } } } } } } } })"))), }, From e3dc336eaa7cfa41719eaa23732636e309c644f8 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 6 Jun 2023 17:05:34 -0700 Subject: [PATCH 23/33] Remove unused code. --- .../textplan/parser/SubstraitPlanRelationVisitor.cpp | 12 ------------ .../textplan/parser/SubstraitPlanRelationVisitor.h | 2 -- 2 files changed, 14 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 5965f131..7e105dd1 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -1534,16 +1534,4 @@ int32_t SubstraitPlanRelationVisitor::visitSortDirection( return ::substrait::proto::SortField::SORT_DIRECTION_UNSPECIFIED; } -::substrait::proto::NamedStruct SubstraitPlanRelationVisitor::constructSchema( - SymbolInfo info) { - ::substrait::proto::NamedStruct schema; - for (const auto& symbol : *symbolTable_) { - if (symbol.type != SymbolType::kSchemaColumn) { - continue; - } - *schema.add_names() = symbol.name; - } - return schema; -} - } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h index 92b21294..a41e8e2b 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h @@ -162,8 +162,6 @@ class SubstraitPlanRelationVisitor : public SubstraitPlanTypeVisitor { const antlr4::tree::TerminalNode* node, const std::string& str); - ::substrait::proto::NamedStruct constructSchema(SymbolInfo info); - const SymbolInfo* currentRelationScope_{nullptr}; // Not owned. }; From 2a6d737af20c9df88ab88d098808420e1e378999 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 6 Jun 2023 17:20:50 -0700 Subject: [PATCH 24/33] Type fix for symbols found in the table by location. --- src/substrait/textplan/SymbolTable.cpp | 6 +- src/substrait/textplan/SymbolTable.h | 2 +- .../converter/InitialPlanProtoVisitor.cpp | 6 +- .../textplan/converter/PipelineVisitor.cpp | 78 +++++++++---------- .../textplan/converter/PlanPrinterVisitor.cpp | 18 ++--- .../parser/SubstraitPlanPipelineVisitor.cpp | 4 +- .../parser/SubstraitPlanRelationVisitor.cpp | 56 +++++++------ .../textplan/parser/SubstraitPlanVisitor.cpp | 4 +- 8 files changed, 86 insertions(+), 88 deletions(-) diff --git a/src/substrait/textplan/SymbolTable.cpp b/src/substrait/textplan/SymbolTable.cpp index e3de4380..bdab07ff 100644 --- a/src/substrait/textplan/SymbolTable.cpp +++ b/src/substrait/textplan/SymbolTable.cpp @@ -132,13 +132,13 @@ const SymbolInfo* SymbolTable::lookupSymbolByName( return symbols_[itr->second].get(); } -const SymbolInfo& SymbolTable::lookupSymbolByLocation( +const SymbolInfo* SymbolTable::lookupSymbolByLocation( const Location& location) const { auto itr = symbolsByLocation_.find(location); if (itr == symbolsByLocation_.end()) { - return SymbolInfo::kUnknown; + return nullptr; } - return *symbols_[itr->second]; + return symbols_[itr->second].get(); } const SymbolInfo& SymbolTable::nthSymbolByType(uint32_t n, SymbolType type) diff --git a/src/substrait/textplan/SymbolTable.h b/src/substrait/textplan/SymbolTable.h index f033be34..19e6d419 100644 --- a/src/substrait/textplan/SymbolTable.h +++ b/src/substrait/textplan/SymbolTable.h @@ -146,7 +146,7 @@ class SymbolTable { [[nodiscard]] const SymbolInfo* lookupSymbolByName( const std::string& name) const; - [[nodiscard]] const SymbolInfo& lookupSymbolByLocation( + [[nodiscard]] const SymbolInfo* lookupSymbolByLocation( const Location& location) const; [[nodiscard]] const SymbolInfo& nthSymbolByType(uint32_t n, SymbolType type) diff --git a/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp b/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp index aedfe9f7..53a5ce24 100644 --- a/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp +++ b/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp @@ -267,12 +267,12 @@ std::any InitialPlanProtoVisitor::visitNamedStruct( void InitialPlanProtoVisitor::addFieldsToRelation( const std::shared_ptr& relationData, const ::substrait::proto::Rel& relation) { - auto symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation)); - if (symbol == SymbolInfo::kUnknown || symbol.type != SymbolType::kRelation) { + auto* symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation)); + if (symbol == nullptr || symbol->type != SymbolType::kRelation) { return; } auto symbolRelationData = - ANY_CAST(std::shared_ptr, symbol.blob); + ANY_CAST(std::shared_ptr, symbol->blob); for (const auto& field : symbolRelationData->fieldReferences) { relationData->fieldReferences.push_back(field); } diff --git a/src/substrait/textplan/converter/PipelineVisitor.cpp b/src/substrait/textplan/converter/PipelineVisitor.cpp index bf4058c2..da5cd87f 100644 --- a/src/substrait/textplan/converter/PipelineVisitor.cpp +++ b/src/substrait/textplan/converter/PipelineVisitor.cpp @@ -9,11 +9,11 @@ namespace io::substrait::textplan { std::shared_ptr PipelineVisitor::getRelationData( const google::protobuf::Message& relation) { - auto symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation)); - if (symbol == SymbolInfo::kUnknown) { + auto* symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation)); + if (symbol == nullptr) { return nullptr; } - return ANY_CAST(std::shared_ptr, symbol.blob); + return ANY_CAST(std::shared_ptr, symbol->blob); } std::any PipelineVisitor::visitRelation( @@ -25,92 +25,92 @@ std::any PipelineVisitor::visitRelation( // No relations beyond this one. break; case ::substrait::proto::Rel::RelTypeCase::kFilter: { - const auto& inputSymbol = symbolTable_->lookupSymbolByLocation( + const auto* inputSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.filter().input())); - relationData->continuingPipeline = &inputSymbol; + relationData->continuingPipeline = inputSymbol; break; } case ::substrait::proto::Rel::RelTypeCase::kFetch: { - const auto& inputSymbol = symbolTable_->lookupSymbolByLocation( + const auto* inputSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.fetch().input())); - relationData->continuingPipeline = &inputSymbol; + relationData->continuingPipeline = inputSymbol; break; } case ::substrait::proto::Rel::RelTypeCase::kAggregate: { - const auto& inputSymbol = symbolTable_->lookupSymbolByLocation( + const auto* inputSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.aggregate().input())); - relationData->continuingPipeline = &inputSymbol; + relationData->continuingPipeline = inputSymbol; break; } case ::substrait::proto::Rel::RelTypeCase::kSort: { - const auto& inputSymbol = symbolTable_->lookupSymbolByLocation( + const auto* inputSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.sort().input())); - relationData->continuingPipeline = &inputSymbol; + relationData->continuingPipeline = inputSymbol; break; } case ::substrait::proto::Rel::RelTypeCase::kJoin: { - const auto& leftSymbol = symbolTable_->lookupSymbolByLocation( + const auto* leftSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.join().left())); - const auto& rightSymbol = symbolTable_->lookupSymbolByLocation( + const auto* rightSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.join().right())); - relationData->newPipelines.push_back(&leftSymbol); - relationData->newPipelines.push_back(&rightSymbol); + relationData->newPipelines.push_back(leftSymbol); + relationData->newPipelines.push_back(rightSymbol); break; } case ::substrait::proto::Rel::RelTypeCase::kProject: { - const auto& inputSymbol = symbolTable_->lookupSymbolByLocation( + const auto* inputSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.project().input())); - relationData->continuingPipeline = &inputSymbol; + relationData->continuingPipeline = inputSymbol; break; } case ::substrait::proto::Rel::RelTypeCase::kSet: for (const auto& rel : relation.set().inputs()) { - const auto& inputSymbol = + const auto* inputSymbol = symbolTable_->lookupSymbolByLocation(Location(&rel)); - relationData->newPipelines.push_back(&inputSymbol); + relationData->newPipelines.push_back(inputSymbol); } break; case ::substrait::proto::Rel::RelTypeCase::kExtensionSingle: { - const auto& inputSymbol = symbolTable_->lookupSymbolByLocation( + const auto* inputSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.extension_single().input())); - relationData->continuingPipeline = &inputSymbol; + relationData->continuingPipeline = inputSymbol; break; } case ::substrait::proto::Rel::RelTypeCase::kExtensionMulti: for (const auto& rel : relation.extension_multi().inputs()) { - const auto& inputSymbol = + const auto* inputSymbol = symbolTable_->lookupSymbolByLocation(Location(&rel)); - relationData->newPipelines.push_back(&inputSymbol); + relationData->newPipelines.push_back(inputSymbol); } break; case ::substrait::proto::Rel::RelTypeCase::kExtensionLeaf: // No children. break; case ::substrait::proto::Rel::RelTypeCase::kCross: { - const auto& leftSymbol = symbolTable_->lookupSymbolByLocation( + const auto* leftSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.cross().left())); - const auto& rightSymbol = symbolTable_->lookupSymbolByLocation( + const auto* rightSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.cross().right())); - relationData->newPipelines.push_back(&leftSymbol); - relationData->newPipelines.push_back(&rightSymbol); + relationData->newPipelines.push_back(leftSymbol); + relationData->newPipelines.push_back(rightSymbol); break; } case ::substrait::proto::Rel::RelTypeCase::kHashJoin: { - const auto& leftSymbol = symbolTable_->lookupSymbolByLocation( + const auto* leftSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.hash_join().left())); - const auto& rightSymbol = symbolTable_->lookupSymbolByLocation( + const auto* rightSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.hash_join().right())); - relationData->newPipelines.push_back(&leftSymbol); - relationData->newPipelines.push_back(&rightSymbol); + relationData->newPipelines.push_back(leftSymbol); + relationData->newPipelines.push_back(rightSymbol); break; } case ::substrait::proto::Rel::RelTypeCase::kMergeJoin: { - const auto& leftSymbol = symbolTable_->lookupSymbolByLocation( + const auto* leftSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.merge_join().left())); - const auto& rightSymbol = symbolTable_->lookupSymbolByLocation( + const auto* rightSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.merge_join().right())); - relationData->newPipelines.push_back(&leftSymbol); - relationData->newPipelines.push_back(&rightSymbol); + relationData->newPipelines.push_back(leftSymbol); + relationData->newPipelines.push_back(rightSymbol); break; } case ::substrait::proto::Rel::REL_TYPE_NOT_SET: @@ -122,19 +122,19 @@ std::any PipelineVisitor::visitRelation( std::any PipelineVisitor::visitPlanRelation( const ::substrait::proto::PlanRel& relation) { - auto symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation)); - auto relationData = ANY_CAST(std::shared_ptr, symbol.blob); + auto* symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation)); + auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); switch (relation.rel_type_case()) { case ::substrait::proto::PlanRel::kRel: { const auto& relSymbol = symbolTable_->lookupSymbolByLocation(Location(&relation.rel())); - relationData->newPipelines.push_back(&relSymbol); + relationData->newPipelines.push_back(relSymbol); break; } case ::substrait::proto::PlanRel::kRoot: { const auto& inputSymbol = symbolTable_->lookupSymbolByLocation( Location(&relation.root().input())); - relationData->newPipelines.push_back(&inputSymbol); + relationData->newPipelines.push_back(inputSymbol); break; } case ::substrait::proto::PlanRel::REL_TYPE_NOT_SET: diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index ad1d04c6..a835dd90 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -628,10 +628,10 @@ std::any PlanPrinterVisitor::visitRelation( // Mark the current scope for any operations within this relation. auto previousScope = currentScope_; auto resetCurrentScope = finally([&]() { currentScope_ = previousScope; }); - const SymbolInfo& symbol = + const SymbolInfo* symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation)); - if (symbol != SymbolInfo::kUnknown) { - currentScope_ = &symbol; + if (symbol != nullptr) { + currentScope_ = symbol; } auto result = BasePlanProtoVisitor::visitRelation(relation); @@ -660,17 +660,17 @@ std::any PlanPrinterVisitor::visitReadRelation( case ::substrait::proto::ReadRel::READ_TYPE_NOT_SET: return ""; } - const auto& symbol = + const auto* symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(*msg)); - if (symbol != SymbolInfo::kUnknown) { - text << " source " << symbol.name << ";\n"; + if (symbol != nullptr) { + text << " source " << symbol->name << ";\n"; } if (relation.has_base_schema()) { - const auto& schemaSymbol = symbolTable_->lookupSymbolByLocation( + const auto* schemaSymbol = symbolTable_->lookupSymbolByLocation( PROTO_LOCATION(relation.base_schema())); - if (schemaSymbol != SymbolInfo::kUnknown) { - text << " base_schema " << schemaSymbol.name << ";\n"; + if (schemaSymbol != nullptr) { + text << " base_schema " << schemaSymbol->name << ";\n"; } } if (relation.has_filter()) { diff --git a/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp index 7cdbe3d2..92577557 100644 --- a/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp @@ -95,13 +95,13 @@ std::any SubstraitPlanPipelineVisitor::visitPipeline( const SymbolInfo* leftSymbol = &SymbolInfo::kUnknown; if (ctx->pipeline() != nullptr) { leftSymbol = - &symbolTable_->lookupSymbolByLocation(PARSER_LOCATION(ctx->pipeline())); + symbolTable_->lookupSymbolByLocation(PARSER_LOCATION(ctx->pipeline())); } const SymbolInfo* rightSymbol = &SymbolInfo::kUnknown; if (dynamic_cast(ctx->parent)->getRuleIndex() == SubstraitPlanParser::RulePipeline) { rightSymbol = - &symbolTable_->lookupSymbolByLocation(PARSER_LOCATION(ctx->parent)); + symbolTable_->lookupSymbolByLocation(PARSER_LOCATION(ctx->parent)); } const SymbolInfo* rightmostSymbol = rightSymbol; if (*rightSymbol != SymbolInfo::kUnknown) { diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 7e105dd1..e7c01db3 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -20,7 +20,6 @@ #include "substrait/textplan/Location.h" #include "substrait/textplan/StructuredSymbolData.h" #include "substrait/textplan/SymbolTable.h" -#include "substrait/type/Type.h" namespace io::substrait::textplan { @@ -224,25 +223,25 @@ std::any SubstraitPlanRelationVisitor::aggregateResult( std::any SubstraitPlanRelationVisitor::visitRelation( SubstraitPlanParser::RelationContext* ctx) { // Create the relation before visiting our children, so they can update it. - auto symbol = symbolTable_->lookupSymbolByLocation(Location(ctx)); - if (symbol == SymbolInfo::kUnknown) { + auto* symbol = symbolTable_->lookupSymbolByLocation(Location(ctx)); + if (symbol == nullptr) { // This error has been previously dealt with thus we can safely skip it. return defaultResult(); } - auto relationData = ANY_CAST(std::shared_ptr, symbol.blob); + auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); ::substrait::proto::Rel relation; - auto relationType = ANY_CAST(RelationType, symbol.subtype); + auto relationType = ANY_CAST(RelationType, symbol->subtype); setRelationType(relationType, &relation); relationData->relation = relation; - symbolTable_->updateLocation(symbol, PROTO_LOCATION(relationData->relation)); + symbolTable_->updateLocation(*symbol, PROTO_LOCATION(relationData->relation)); // Mark the current scope for any operations within this relation. auto previousScope = currentRelationScope_; auto resetCurrentScope = finally([&]() { currentRelationScope_ = previousScope; }); - currentRelationScope_ = &symbol; + currentRelationScope_ = symbol; visitChildren(ctx); @@ -281,12 +280,12 @@ std::any SubstraitPlanRelationVisitor::visitRelationFilter( visitRelation_filter_behavior(ctx->relation_filter_behavior())); } - auto parentSymbol = symbolTable_->lookupSymbolByLocation( + auto* parentSymbol = symbolTable_->lookupSymbolByLocation( Location(dynamic_cast(ctx->parent))); auto parentRelationData = - ANY_CAST(std::shared_ptr, parentSymbol.blob); + ANY_CAST(std::shared_ptr, parentSymbol->blob); auto result = SubstraitPlanRelationVisitor::visitChildren(ctx); - auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); switch (parentRelationType) { case RelationType::kRead: switch (behavior) { @@ -376,11 +375,11 @@ std::any SubstraitPlanRelationVisitor::visitRelationFilter( std::any SubstraitPlanRelationVisitor::visitRelationUsesSchema( SubstraitPlanParser::RelationUsesSchemaContext* ctx) { - auto parentSymbol = symbolTable_->lookupSymbolByLocation( + auto* parentSymbol = symbolTable_->lookupSymbolByLocation( Location(dynamic_cast(ctx->parent))); auto parentRelationData = - ANY_CAST(std::shared_ptr, parentSymbol.blob); - auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); if (parentRelationType == RelationType::kRead) { auto schemaName = ctx->id()->getText(); @@ -412,12 +411,12 @@ std::any SubstraitPlanRelationVisitor::visitRelationUsesSchema( std::any SubstraitPlanRelationVisitor::visitRelationExpression( SubstraitPlanParser::RelationExpressionContext* ctx) { - auto parentSymbol = symbolTable_->lookupSymbolByLocation( + auto* parentSymbol = symbolTable_->lookupSymbolByLocation( Location(dynamic_cast(ctx->parent))); auto parentRelationData = - ANY_CAST(std::shared_ptr, parentSymbol.blob); + ANY_CAST(std::shared_ptr, parentSymbol->blob); auto result = SubstraitPlanRelationVisitor::visitChildren(ctx); - auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); switch (parentRelationType) { case RelationType::kJoin: if (parentRelationData->relation.join().has_expression()) { @@ -445,12 +444,12 @@ std::any SubstraitPlanRelationVisitor::visitRelationExpression( std::any SubstraitPlanRelationVisitor::visitRelationGrouping( SubstraitPlanParser::RelationGroupingContext* ctx) { - auto parentSymbol = symbolTable_->lookupSymbolByLocation( + auto* parentSymbol = symbolTable_->lookupSymbolByLocation( Location(dynamic_cast(ctx->parent))); auto parentRelationData = - ANY_CAST(std::shared_ptr, parentSymbol.blob); + ANY_CAST(std::shared_ptr, parentSymbol->blob); auto result = SubstraitPlanRelationVisitor::visitChildren(ctx); - auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); switch (parentRelationType) { case RelationType::kAggregate: { if (parentRelationData->relation.aggregate().groupings_size() == 0) { @@ -519,11 +518,11 @@ std::any SubstraitPlanRelationVisitor::visitRelationMeasure( } // Add it to our relation. - auto parentSymbol = symbolTable_->lookupSymbolByLocation( + auto* parentSymbol = symbolTable_->lookupSymbolByLocation( Location(dynamic_cast(ctx->parent))); auto parentRelationData = - ANY_CAST(std::shared_ptr, parentSymbol.blob); - auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); switch (parentRelationType) { case RelationType::kAggregate: *parentRelationData->relation.mutable_aggregate()->add_measures() = @@ -661,11 +660,11 @@ std::any SubstraitPlanRelationVisitor::visitMeasure_detail( std::any SubstraitPlanRelationVisitor::visitRelationSourceReference( SubstraitPlanParser::RelationSourceReferenceContext* ctx) { - auto parentSymbol = symbolTable_->lookupSymbolByLocation( + auto* parentSymbol = symbolTable_->lookupSymbolByLocation( Location(dynamic_cast(ctx->parent))); auto parentRelationData = - ANY_CAST(std::shared_ptr, parentSymbol.blob); - auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); if (parentRelationType == RelationType::kRead) { auto sourceName = ctx->source_reference()->id()->getText(); @@ -691,14 +690,13 @@ std::any SubstraitPlanRelationVisitor::visitRelationSourceReference( return defaultResult(); } - std::any SubstraitPlanRelationVisitor::visitRelationSort( SubstraitPlanParser::RelationSortContext* ctx) { - auto parentSymbol = symbolTable_->lookupSymbolByLocation( + auto* parentSymbol = symbolTable_->lookupSymbolByLocation( Location(dynamic_cast(ctx->parent))); auto parentRelationData = - ANY_CAST(std::shared_ptr, parentSymbol.blob); - auto parentRelationType = ANY_CAST(RelationType, parentSymbol.subtype); + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); switch (parentRelationType) { case RelationType::kSort: *parentRelationData->relation.mutable_sort()->add_sorts() = ANY_CAST( diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp index 226742cf..f10e07ea 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp @@ -70,9 +70,9 @@ std::any SubstraitPlanVisitor::visitExtensionspace( // Update the contained functions to belong in this space. for (auto func : ctx->function()) { - auto funcSymbol = symbolTable_->lookupSymbolByLocation(Location(func)); + auto* funcSymbol = symbolTable_->lookupSymbolByLocation(Location(func)); auto functionData = - ANY_CAST(std::shared_ptr, funcSymbol.blob); + ANY_CAST(std::shared_ptr, funcSymbol->blob); functionData->extensionUriReference = thisSpace; } From 102febff96aa3273df002b876073ce0a5f4493e9 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 6 Jun 2023 17:26:51 -0700 Subject: [PATCH 25/33] Added some more errors. --- .../textplan/parser/SubstraitPlanPipelineVisitor.cpp | 2 -- .../textplan/parser/SubstraitPlanRelationVisitor.cpp | 12 +++++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp index 92577557..a08c682e 100644 --- a/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp @@ -2,10 +2,8 @@ #include "substrait/textplan/parser/SubstraitPlanPipelineVisitor.h" -#include #include -#include "SubstraitPlanLexer/SubstraitPlanLexer.h" #include "SubstraitPlanParser/SubstraitPlanParser.h" #include "substrait/textplan/Any.h" #include "substrait/textplan/Location.h" diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index e7c01db3..5ba174d2 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -350,12 +350,11 @@ std::any SubstraitPlanRelationVisitor::visitRelationFilter( "specified."); break; } -#if 1 if (result.type() != typeid(::substrait::proto::Expression)) { - // MEGAHACK - expression not of the right type needs to be returned + errorListener_->addError( + ctx->getStart(), "Could not parse as an expression."); return defaultResult(); } -#endif *parentRelationData->relation.mutable_filter()->mutable_condition() = ANY_CAST(::substrait::proto::Expression, result); } else { @@ -633,7 +632,9 @@ std::any SubstraitPlanRelationVisitor::visitMeasure_detail( visitAggregationPhase(ctx->id()))); } } else { - // MEGAHACK -- Raise an error as this is not a function use. + errorListener_->addError( + ctx->id()->getStart(), + "Expected an expression utilizing a function here."); } return measure; @@ -752,7 +753,8 @@ std::any SubstraitPlanRelationVisitor::visitExpressionFunctionUse( for (const auto& exp : ctx->expression()) { auto result = visitExpression(exp); if (result.type() != typeid(::substrait::proto::Expression)) { - // MEGAHACK -- Add an error for a bad type. + errorListener_->addError( + ctx->id()->getStart(), "Could not parse as an expression."); return expr; } auto newExpr = ANY_CAST(::substrait::proto::Expression, result); From b5c4de57c053cbf0d2d4920be907f5b80804414d Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 6 Jun 2023 17:44:12 -0700 Subject: [PATCH 26/33] More refactoring. --- .../parser/SubstraitPlanRelationVisitor.cpp | 103 +++++++----------- 1 file changed, 42 insertions(+), 61 deletions(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 5ba174d2..fc8ee51d 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -44,8 +44,8 @@ std::string toLower(const std::string& str) { } // Yields true if the string 'haystack' starts with the string 'needle'. -bool startsWith(const std::string& haystack, const std::string& needle) { - return strncmp(haystack.c_str(), needle.c_str(), needle.size()) == 0; +bool startsWith(const std::string& haystack, std::string_view needle) { + return strncmp(haystack.c_str(), needle.data(), needle.size()) == 0; } void setNullable(::substrait::proto::Type* type) { @@ -208,6 +208,27 @@ void setRelationType( } } +std::string normalizeProtoEnum(std::string_view text, std::string_view prefix) { + std::string result{text}; + // Remove non-alphabetic characters. + result.erase( + std::remove_if( + result.begin(), + result.end(), + [](auto const& c) -> bool { return !std::isalpha(c); }), + result.end()); + // Lowercase. + std::transform( + result.begin(), result.end(), result.begin(), [](unsigned char c) { + return std::tolower(c); + }); + // Remove the prefix if it exists. + if (startsWith(result, prefix)) { + result = result.substr(prefix.length()); + } + return result; +} + } // namespace std::any SubstraitPlanRelationVisitor::aggregateResult( @@ -538,26 +559,14 @@ std::any SubstraitPlanRelationVisitor::visitRelationMeasure( int32_t SubstraitPlanRelationVisitor::visitAggregationInvocation( SubstraitPlanParser::IdContext* ctx) { - std::string id = ctx->getText(); - id.erase( - std::remove_if( - id.begin(), - id.end(), - [](auto const& c) -> bool { return !std::isalpha(c); }), - id.end()); - std::transform(id.begin(), id.end(), id.begin(), [](unsigned char c) { - return std::tolower(c); - }); - if (startsWith(id, kAggregationInvocationPrefix)) { - id = id.substr(kAggregationInvocationPrefix.length()); - } - // TODO -- Replace this with a handcrafted function or a trie. - if (id == "unspecified") { + std::string text = + normalizeProtoEnum(ctx->getText(), kAggregationInvocationPrefix); + if (text == "unspecified") { return ::substrait::proto::AggregateFunction:: AGGREGATION_INVOCATION_UNSPECIFIED; - } else if (id == "all") { + } else if (text == "all") { return ::substrait::proto::AggregateFunction::AGGREGATION_INVOCATION_ALL; - } else if (id == "distinct") { + } else if (text == "distinct") { return ::substrait::proto::AggregateFunction:: AGGREGATION_INVOCATION_DISTINCT; } @@ -570,29 +579,17 @@ int32_t SubstraitPlanRelationVisitor::visitAggregationInvocation( int32_t SubstraitPlanRelationVisitor::visitAggregationPhase( SubstraitPlanParser::IdContext* ctx) { - std::string id = ctx->getText(); - id.erase( - std::remove_if( - id.begin(), - id.end(), - [](auto const& c) -> bool { return !std::isalpha(c); }), - id.end()); - std::transform(id.begin(), id.end(), id.begin(), [](unsigned char c) { - return std::tolower(c); - }); - if (startsWith(id, kAggregationPhasePrefix)) { - id = id.substr(kAggregationPhasePrefix.length()); - } - // TODO -- Replace this with a handcrafted function or a trie. - if (id == "unspecified") { + std::string text = + normalizeProtoEnum(ctx->getText(), kAggregationPhasePrefix); + if (text == "unspecified") { return ::substrait::proto::AGGREGATION_PHASE_UNSPECIFIED; - } else if (id == "initialtointermediate") { + } else if (text == "initialtointermediate") { return ::substrait::proto::AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE; - } else if (id == "intermediatetointermediate") { + } else if (text == "intermediatetointermediate") { return ::substrait::proto::AGGREGATION_PHASE_INTERMEDIATE_TO_INTERMEDIATE; - } else if (id == "initialtoresult") { + } else if (text == "initialtoresult") { return ::substrait::proto::AGGREGATION_PHASE_INITIAL_TO_RESULT; - } else if (id == "intermediatetoresult") { + } else if (text == "intermediatetoresult") { return ::substrait::proto::AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT; } this->errorListener_->addError( @@ -1499,34 +1496,18 @@ std::any SubstraitPlanRelationVisitor::visitSort_field( int32_t SubstraitPlanRelationVisitor::visitSortDirection( SubstraitPlanParser::IdContext* ctx) { - std::string id = ctx->getText(); -#if 1 - // MEGAHACK -- Turn this common code into a subfunction. - id.erase( - std::remove_if( - id.begin(), - id.end(), - [](auto const& c) -> bool { return !std::isalpha(c); }), - id.end()); - std::transform(id.begin(), id.end(), id.begin(), [](unsigned char c) { - return std::tolower(c); - }); - if (startsWith(id, kSortDirectionPrefix)) { - id = id.substr(kSortDirectionPrefix.length()); - } -#endif - // TODO -- Replace this with a handcrafted function or a trie. - if (id == "unspecified") { + std::string text = normalizeProtoEnum(ctx->getText(), kSortDirectionPrefix); + if (text == "unspecified") { return ::substrait::proto::SortField::SORT_DIRECTION_UNSPECIFIED; - } else if (id == "ascnullsfirst") { + } else if (text == "ascnullsfirst") { return ::substrait::proto::SortField::SORT_DIRECTION_ASC_NULLS_FIRST; - } else if (id == "ascnullslast") { + } else if (text == "ascnullslast") { return ::substrait::proto::SortField::SORT_DIRECTION_ASC_NULLS_LAST; - } else if (id == "descnullsfirst") { + } else if (text == "descnullsfirst") { return ::substrait::proto::SortField::SORT_DIRECTION_DESC_NULLS_FIRST; - } else if (id == "descnullslast") { + } else if (text == "descnullslast") { return ::substrait::proto::SortField::SORT_DIRECTION_DESC_NULLS_LAST; - } else if (id == "clustered") { + } else if (text == "clustered") { return ::substrait::proto::SortField::SORT_DIRECTION_CLUSTERED; } this->errorListener_->addError( From eced5ca17a89e62d2317d6aee714dfac270cee44 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 7 Jun 2023 12:21:12 -0700 Subject: [PATCH 27/33] Ran the reformatter. --- .../textplan/converter/CMakeLists.txt | 8 +- .../textplan/parser/SubstraitPlanVisitor.cpp | 2 +- src/substrait/textplan/tests/CMakeLists.txt | 79 +++++++++---------- 3 files changed, 45 insertions(+), 44 deletions(-) diff --git a/src/substrait/textplan/converter/CMakeLists.txt b/src/substrait/textplan/converter/CMakeLists.txt index 9ac41c27..44594716 100644 --- a/src/substrait/textplan/converter/CMakeLists.txt +++ b/src/substrait/textplan/converter/CMakeLists.txt @@ -17,8 +17,12 @@ set(TEXTPLAN_SRCS add_library(substrait_textplan_converter ${TEXTPLAN_SRCS}) target_link_libraries( - substrait_textplan_converter substrait_common substrait_expression - substrait_proto symbol_table error_listener + substrait_textplan_converter + substrait_common + substrait_expression + substrait_proto + symbol_table + error_listener date::date) if(${SUBSTRAIT_CPP_BUILD_TESTING}) diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp index f10e07ea..33657ad3 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp @@ -100,7 +100,7 @@ std::any SubstraitPlanVisitor::visitFunction( if (ctx->name() != nullptr) { auto colonPos = ctx->name()->getText().find_first_of(':'); if (colonPos == std::string::npos || - ctx->name()->getText().substr(colonPos+1).empty()) { + ctx->name()->getText().substr(colonPos + 1).empty()) { errorListener_->addError( ctx->getStart(), "Functions should have an associated type."); } diff --git a/src/substrait/textplan/tests/CMakeLists.txt b/src/substrait/textplan/tests/CMakeLists.txt index 0657a24d..f444773e 100644 --- a/src/substrait/textplan/tests/CMakeLists.txt +++ b/src/substrait/textplan/tests/CMakeLists.txt @@ -19,54 +19,51 @@ add_test_case( gtest gtest_main) -option( - SUBSTRAIT_CPP_ROUNDTRIP_TESTING - "Enable substrait-cpp textplan roundtrip tests." - OFF) +option(SUBSTRAIT_CPP_ROUNDTRIP_TESTING + "Enable substrait-cpp textplan roundtrip tests." OFF) if(${SUBSTRAIT_CPP_ROUNDTRIP_TESTING}) -add_test_case( - round_trip_test - SOURCES - RoundtripTest.cpp - EXTRA_LINK_LIBS - substrait_textplan_converter - substrait_textplan_loader - substrait_common - substrait_proto - parse_result_matchers - protobuf-matchers - fmt::fmt-header-only - absl::strings - gmock - gtest - gtest_main) + add_test_case( + round_trip_test + SOURCES + RoundtripTest.cpp + EXTRA_LINK_LIBS + substrait_textplan_converter + substrait_textplan_loader + substrait_common + substrait_proto + parse_result_matchers + protobuf-matchers + fmt::fmt-header-only + absl::strings + gmock + gtest + gtest_main) -cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH TEXTPLAN_SOURCE_DIR) + cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH TEXTPLAN_SOURCE_DIR) -add_custom_command( - TARGET round_trip_test - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E echo "Copying unit test data.." - COMMAND ${CMAKE_COMMAND} -E make_directory - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" - COMMAND - ${CMAKE_COMMAND} -E copy - "${TEXTPLAN_SOURCE_DIR}/converter/data/q6_first_stage.json" - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.json" - COMMAND ${CMAKE_COMMAND} -E copy "${TEXTPLAN_SOURCE_DIR}/data/*.json" - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/") + add_custom_command( + TARGET round_trip_test + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E echo "Copying unit test data.." + COMMAND ${CMAKE_COMMAND} -E make_directory + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" + COMMAND + ${CMAKE_COMMAND} -E copy + "${TEXTPLAN_SOURCE_DIR}/converter/data/q6_first_stage.json" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/q6_first_stage.json" + COMMAND ${CMAKE_COMMAND} -E copy "${TEXTPLAN_SOURCE_DIR}/data/*.json" + "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data/") -message( - STATUS - "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data" -) + message( + STATUS + "test data will be here: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/tests/data") else() -message( - STATUS - "Round trip testing is turned off. Add SUBSTRAIT_CPP_ROUNDTRIP_TESTING=on to enable." -) + message( + STATUS + "Round trip testing is turned off. Add SUBSTRAIT_CPP_ROUNDTRIP_TESTING=on to enable." + ) endif() unset(SUBSTRAIT_CPP_ROUNDTRIP_TESTING CACHE) From 7921f5962905b20784b3157ec26fc6fc4af90b6f Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 7 Jun 2023 12:56:36 -0700 Subject: [PATCH 28/33] Clang tidy fix. --- src/substrait/textplan/parser/SubstraitPlanVisitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.h b/src/substrait/textplan/parser/SubstraitPlanVisitor.h index 2bfb0fc7..84f36b3e 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.h +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.h @@ -14,7 +14,7 @@ class SubstraitPlanVisitor : public SubstraitPlanTypeVisitor { SubstraitPlanVisitor( const SymbolTable& symbolTable, std::shared_ptr errorListener) - : SubstraitPlanTypeVisitor(symbolTable, errorListener) {} + : SubstraitPlanTypeVisitor(symbolTable, std::move(errorListener)) {} [[nodiscard]] std::shared_ptr getSymbolTable() const { return symbolTable_; From 299eb0c5fbcb6d6d5832826822aab5e6581c7114 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 7 Jun 2023 16:05:48 -0700 Subject: [PATCH 29/33] Add roundtrip testing to the clang tidy script so it can find all the headers. --- scripts/run-clang-tidy.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/run-clang-tidy.sh b/scripts/run-clang-tidy.sh index d3e9b3ac..293ef869 100755 --- a/scripts/run-clang-tidy.sh +++ b/scripts/run-clang-tidy.sh @@ -4,7 +4,7 @@ SCRIPTDIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) WORKDIR="$( cd $SCRIPTDIR/.. && pwd )" # Make compile_command.json -rm -rf tmp && mkdir tmp && cmake -Btmp -DCMAKE_EXPORT_COMPILE_COMMANDS=ON +rm -rf tmp && mkdir tmp && cmake -Btmp -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DSUBSTRAIT_CPP_ROUNDTRIP_TESTING=ON # Build substrait protobuf pushd tmp/src/substrait/proto && make -j && popd || exit # Build textplan grammar From 926ebce6859950a9636136c190e3f7df1b858c60 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Jun 2023 12:32:30 -0700 Subject: [PATCH 30/33] Updated based on review notes. --- src/substrait/textplan/converter/PlanPrinterVisitor.cpp | 6 +++++- src/substrait/textplan/parser/ParseText.cpp | 1 + src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index a835dd90..d9bef6a0 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -52,8 +52,12 @@ std::string invocationToString( return "distinct"; case ::substrait::proto:: AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED: - default: return "unspecified"; + case ::substrait::proto:: + AggregateFunction_AggregationInvocation_AggregateFunction_AggregationInvocation_INT_MIN_SENTINEL_DO_NOT_USE_: + case ::substrait::proto:: + AggregateFunction_AggregationInvocation_AggregateFunction_AggregationInvocation_INT_MAX_SENTINEL_DO_NOT_USE_: + break; } } diff --git a/src/substrait/textplan/parser/ParseText.cpp b/src/substrait/textplan/parser/ParseText.cpp index bfea7682..29d9b396 100644 --- a/src/substrait/textplan/parser/ParseText.cpp +++ b/src/substrait/textplan/parser/ParseText.cpp @@ -87,6 +87,7 @@ ParseResult parseStream(antlr4::ANTLRInputStream stream) { try { relationVisitor->visitPlan(tree); } catch (std::invalid_argument ex) { + // Catches the any_cast exception and logs a useful error message. errorListener.syntaxError( &parser, nullptr, diff --git a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h index 51d0efd8..6370b2be 100644 --- a/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h +++ b/src/substrait/textplan/parser/SubstraitPlanTypeVisitor.h @@ -37,6 +37,10 @@ class SubstraitPlanTypeVisitor : public SubstraitPlanParserBaseVisitor { const antlr4::ParserRuleContext* ctx, const ParameterizedType& decodedType); + // Identifies whether the given context has a parent node of a constant + // including a struct. This allows {3years, 1month, + // 2days}_interval_year_month_day to have the optional label tags which are + // not real types. bool insideStructLiteralWithExternalType(const antlr4::RuleContext* ctx); std::shared_ptr symbolTable_; From 2e98f093a4a674129d6dcc8d634b1cd86ec6c03c Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Jun 2023 12:34:16 -0700 Subject: [PATCH 31/33] Cleaned up includes. --- src/substrait/textplan/converter/PlanPrinterVisitor.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index d9bef6a0..e59a2bc6 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -3,16 +3,13 @@ #include "substrait/textplan/converter/PlanPrinterVisitor.h" #include -#include #include #include #include "date/date.h" -#include "fmt/format.h" #include "substrait/expression/DecimalLiteral.h" #include "substrait/proto/ProtoUtils.h" #include "substrait/proto/algebra.pb.h" -#include "substrait/proto/plan.pb.h" #include "substrait/textplan/Any.h" #include "substrait/textplan/Finally.h" #include "substrait/textplan/StructuredSymbolData.h" From 889baf8c39b1c8bbae69cca53315a37587200621 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 12 Jun 2023 09:09:52 -0700 Subject: [PATCH 32/33] Replaced iostream with sstream to keep included header size down. --- src/substrait/textplan/ParseResult.cpp | 3 +-- src/substrait/textplan/converter/PlanPrinterVisitor.cpp | 1 - src/substrait/textplan/converter/Tool.cpp | 2 +- src/substrait/textplan/parser/ParseText.cpp | 3 +-- src/substrait/textplan/parser/Tool.cpp | 2 +- 5 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/substrait/textplan/ParseResult.cpp b/src/substrait/textplan/ParseResult.cpp index 75be2807..41ac974d 100644 --- a/src/substrait/textplan/ParseResult.cpp +++ b/src/substrait/textplan/ParseResult.cpp @@ -2,8 +2,7 @@ #include "substrait/textplan/ParseResult.h" -#include -#include +#include namespace io::substrait::textplan { diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index e59a2bc6..aeaf8e07 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -2,7 +2,6 @@ #include "substrait/textplan/converter/PlanPrinterVisitor.h" -#include #include #include diff --git a/src/substrait/textplan/converter/Tool.cpp b/src/substrait/textplan/converter/Tool.cpp index 44842055..9c3d652b 100644 --- a/src/substrait/textplan/converter/Tool.cpp +++ b/src/substrait/textplan/converter/Tool.cpp @@ -4,7 +4,7 @@ #include #endif -#include +#include #include "substrait/textplan/SymbolTablePrinter.h" #include "substrait/textplan/converter/LoadBinary.h" diff --git a/src/substrait/textplan/parser/ParseText.cpp b/src/substrait/textplan/parser/ParseText.cpp index 29d9b396..d54ac15d 100644 --- a/src/substrait/textplan/parser/ParseText.cpp +++ b/src/substrait/textplan/parser/ParseText.cpp @@ -5,12 +5,11 @@ #include #include #include -#include +#include #include #include "SubstraitPlanLexer/SubstraitPlanLexer.h" #include "SubstraitPlanParser/SubstraitPlanParser.h" -#include "substrait/textplan/Any.h" #include "substrait/textplan/StructuredSymbolData.h" #include "substrait/textplan/parser/SubstraitParserErrorListener.h" #include "substrait/textplan/parser/SubstraitPlanPipelineVisitor.h" diff --git a/src/substrait/textplan/parser/Tool.cpp b/src/substrait/textplan/parser/Tool.cpp index 9c980ec6..84302d9c 100644 --- a/src/substrait/textplan/parser/Tool.cpp +++ b/src/substrait/textplan/parser/Tool.cpp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: Apache-2.0 */ #include -#include +#include #include "substrait/textplan/SymbolTablePrinter.h" #include "substrait/textplan/parser/ParseText.h" From 68fea3b5289f237f1e2c01a6af5e5e5ddb93a071 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 12 Jun 2023 09:16:18 -0700 Subject: [PATCH 33/33] A few clang fixes. --- src/substrait/textplan/converter/PlanPrinterVisitor.cpp | 2 ++ src/substrait/textplan/parser/ParseText.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index aeaf8e07..dd6e869c 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -55,6 +55,8 @@ std::string invocationToString( AggregateFunction_AggregationInvocation_AggregateFunction_AggregationInvocation_INT_MAX_SENTINEL_DO_NOT_USE_: break; } + // We shouldn't reach here but return something to make the compiler happy. + return "unspecified"; } } // namespace diff --git a/src/substrait/textplan/parser/ParseText.cpp b/src/substrait/textplan/parser/ParseText.cpp index d54ac15d..554e4a26 100644 --- a/src/substrait/textplan/parser/ParseText.cpp +++ b/src/substrait/textplan/parser/ParseText.cpp @@ -5,8 +5,8 @@ #include #include #include -#include #include +#include #include "SubstraitPlanLexer/SubstraitPlanLexer.h" #include "SubstraitPlanParser/SubstraitPlanParser.h"