diff --git a/src/substrait/textplan/ParseResult.h b/src/substrait/textplan/ParseResult.h index fab7ddd0..12ff6ccf 100644 --- a/src/substrait/textplan/ParseResult.h +++ b/src/substrait/textplan/ParseResult.h @@ -46,6 +46,10 @@ class ParseResult { return errors; } + void addErrors(const std::vector& errors) { + syntaxErrors_.insert(syntaxErrors_.end(), errors.begin(), errors.end()); + } + // Add the capability for ::testing::PrintToString to print ParseResult. friend std::ostream& operator<<(std::ostream& os, const ParseResult& result); diff --git a/src/substrait/textplan/StructuredSymbolData.h b/src/substrait/textplan/StructuredSymbolData.h index 676fa211..98c09cc9 100644 --- a/src/substrait/textplan/StructuredSymbolData.h +++ b/src/substrait/textplan/StructuredSymbolData.h @@ -24,6 +24,8 @@ struct RelationData { const SymbolInfo* continuingPipeline{nullptr}; // The next nodes in the pipelines that this node starts. std::vector newPipelines; + // Expressions in this relation consume subqueries with these symbols. + std::vector subQueryPipelines; // The information corresponding to the relation without any references to // other relations or inputs. @@ -46,6 +48,10 @@ struct RelationData { // references to the symbol would use the alias.) std::map generatedFieldReferenceAlternativeExpression; + // Temporary storage for global aliases for expressions. Used during the + // construction of a relation. + std::map generatedFieldReferenceAliases; + // If populated, supersedes the combination of fieldReferences and // generatedFieldReferences for the field symbols exposed by this relation. std::vector outputFieldReferences; diff --git a/src/substrait/textplan/SubstraitErrorListener.h b/src/substrait/textplan/SubstraitErrorListener.h index 81d8356f..1f414352 100644 --- a/src/substrait/textplan/SubstraitErrorListener.h +++ b/src/substrait/textplan/SubstraitErrorListener.h @@ -30,6 +30,10 @@ class SubstraitErrorListener { addError(-1, -1, msg); }; + void addErrorInstances(const std::vector& errors) { + errors_.insert(errors_.end(), errors.begin(), errors.end()); + } + const std::vector& getErrors() { return errors_; }; diff --git a/src/substrait/textplan/SymbolTable.cpp b/src/substrait/textplan/SymbolTable.cpp index fdfef0bb..66d25579 100644 --- a/src/substrait/textplan/SymbolTable.cpp +++ b/src/substrait/textplan/SymbolTable.cpp @@ -61,7 +61,8 @@ const SymbolInfo SymbolInfo::kUnknown = { std::nullopt}; bool operator==(const SymbolInfo& left, const SymbolInfo& right) { - return (left.name == right.name) && (left.location == right.location) && + return (left.name == right.name) && + (left.sourceLocation == right.sourceLocation) && (left.type == right.type); } @@ -118,13 +119,31 @@ size_t SymbolTable::findSymbolIndex(const SymbolInfo& symbol) { return symbols_.size(); } -void SymbolTable::updateLocation( +void SymbolTable::addPermanentLocation( const SymbolInfo& symbol, const Location& location) { auto index = findSymbolIndex(symbol); + symbols_[index]->permanentLocation = location; symbolsByLocation_.insert(std::make_pair(location, index)); } +void SymbolTable::setParentQueryLocation( + const io::substrait::textplan::SymbolInfo& symbol, + const io::substrait::textplan::Location& location) { + auto index = findSymbolIndex(symbol); + symbols_[index]->parentQueryLocation = location; + + int highestIndex = -1; + for (const auto& sym : symbols_) { + if (sym->parentQueryLocation == location) { + if (sym->parentQueryIndex > highestIndex) { + highestIndex = sym->parentQueryIndex; + } + } + } + symbols_[index]->parentQueryIndex = highestIndex + 1; +} + void SymbolTable::addAlias(const std::string& alias, const SymbolInfo* symbol) { auto index = findSymbolIndex(*symbol); symbols_[index]->alias = alias; @@ -169,6 +188,19 @@ const SymbolInfo* SymbolTable::lookupSymbolByLocationAndTypes( return nullptr; } +const SymbolInfo* SymbolTable::lookupSymbolByParentQueryAndType( + const Location& location, + int index, + SymbolType type) const { + for (const auto& symbol : symbols_) { + if (symbol->parentQueryLocation == location && + symbol->parentQueryIndex == index && symbol->type == type) { + return symbol.get(); + } + } + return nullptr; +} + const SymbolInfo& SymbolTable::nthSymbolByType(uint32_t n, SymbolType type) const { int count = 0; @@ -200,7 +232,11 @@ std::string SymbolTable::toDebugString() const { } auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); result << std::left << std::setw(4) << relationCount++; - result << std::left << std::setw(20) << symbol->name << std::endl; + result << std::left << std::setw(20) << symbol->name; + if (!relationData->subQueryPipelines.empty()) { + result << " SQC=" << relationData->subQueryPipelines.size(); + } + result << std::endl; int32_t fieldNum = 0; for (const auto& field : relationData->fieldReferences) { diff --git a/src/substrait/textplan/SymbolTable.h b/src/substrait/textplan/SymbolTable.h index 97e54879..67acb78a 100644 --- a/src/substrait/textplan/SymbolTable.h +++ b/src/substrait/textplan/SymbolTable.h @@ -33,7 +33,7 @@ enum class SymbolType { }; enum class RelationType { - // Logical plans + // Logical kUnknown = 0, kRead = 1, kProject = 2, @@ -45,11 +45,11 @@ enum class RelationType { kFilter = 8, kSet = 9, - // Physical plans + // Physical kHashJoin = 31, kMergeJoin = 32, - // Write relations, currently unreachable in Plan protos. + // Write kExchange = 50, kDdl = 51, kWrite = 52, @@ -79,7 +79,10 @@ struct SymbolInfo { std::string name; std::string alias{}; // If present, use this instead of name. const SymbolInfo* schema{nullptr}; // The related schema symbol if present. - Location location; + Location sourceLocation; + Location permanentLocation{Location::kUnknownLocation}; + Location parentQueryLocation{Location::kUnknownLocation}; + int parentQueryIndex{-1}; SymbolType type; std::any subtype; std::any blob; @@ -91,7 +94,7 @@ struct SymbolInfo { std::any newSubtype, std::any newBlob) : name(std::move(newName)), - location(newLocation), + sourceLocation(newLocation), type(newType), subtype(std::move(newSubtype)), blob(std::move(newBlob)){}; @@ -145,8 +148,13 @@ class SymbolTable { const std::any& subtype, const std::any& blob); - // Changes the location for a specified existing symbol. - void updateLocation(const SymbolInfo& symbol, const Location& location); + // Changes the permanent location for a specified existing symbol. + void addPermanentLocation(const SymbolInfo& symbol, const Location& location); + + // Sets the location of the parent query. + void setParentQueryLocation( + const SymbolInfo& symbol, + const Location& location); // Adds an alias to the given symbol. void addAlias(const std::string& alias, const SymbolInfo* symbol); @@ -165,6 +173,9 @@ class SymbolTable { const Location& location, std::unordered_set types) const; + [[nodiscard]] const SymbolInfo* lookupSymbolByParentQueryAndType( + const Location& location, int index, SymbolType type) const; + [[nodiscard]] const SymbolInfo& nthSymbolByType(uint32_t n, SymbolType type) const; diff --git a/src/substrait/textplan/SymbolTablePrinter.cpp b/src/substrait/textplan/SymbolTablePrinter.cpp index 1b460624..66329d96 100644 --- a/src/substrait/textplan/SymbolTablePrinter.cpp +++ b/src/substrait/textplan/SymbolTablePrinter.cpp @@ -7,7 +7,6 @@ #include "substrait/common/Exceptions.h" #include "substrait/proto/algebra.pb.h" -#include "substrait/proto/extensions/extensions.pb.h" #include "substrait/textplan/Any.h" #include "substrait/textplan/StructuredSymbolData.h" #include "substrait/textplan/SymbolTable.h" @@ -81,7 +80,8 @@ std::string typeToText(const ::substrait::proto::Type& type) { std::string relationToText( const SymbolTable& symbolTable, - const SymbolInfo& info) { + const SymbolInfo& info, + SubstraitErrorListener* errorListener) { auto relationData = ANY_CAST(std::shared_ptr, info.blob); if (relationData->relation.rel_type_case() == ::substrait::proto::Rel::REL_TYPE_NOT_SET) { @@ -90,7 +90,19 @@ std::string relationToText( } PlanPrinterVisitor printer(symbolTable); - return printer.printRelation(info); + auto result = printer.printRelation(info); + errorListener->addErrorInstances(printer.getErrorListener()->getErrors()); + return result; +} + +const SymbolInfo* getNextSinglePipeline( + const std::shared_ptr& relationData) { + if (relationData->continuingPipeline != nullptr) { + return relationData->continuingPipeline; + } else if (relationData->newPipelines.size() == 1) { + return relationData->newPipelines[0]; + } + return nullptr; } std::vector pipelineToPath( @@ -134,6 +146,22 @@ std::string outputPipelinesSection(const SymbolTable& symbolTable) { text << ";\n"; hasPreviousText = true; } + for (auto pipelineStart : relationData->subQueryPipelines) { + auto pipeline = pipelineToPath(symbolTable, pipelineStart); + // No need to include this node since the subquery already points to the + // end of this pipeline. + bool isFirstPipe = true; + for (auto pipe = pipeline.rbegin(); pipe != pipeline.rend(); pipe++) { + if (isFirstPipe) { + text << " " << *pipe; + } else { + text << " -> " << *pipe; + } + isFirstPipe = false; + } + text << ";\n"; + hasPreviousText = true; + } } if (hasPreviousText) { return "pipelines {\n" + text.str() + "}\n"; @@ -141,7 +169,9 @@ std::string outputPipelinesSection(const SymbolTable& symbolTable) { return text.str(); } -std::string outputRelationsSection(const SymbolTable& symbolTable) { +std::string outputRelationsSection( + const SymbolTable& symbolTable, + SubstraitErrorListener* errorListener) { std::stringstream text; bool hasPreviousText = false; for (const SymbolInfo& info : symbolTable) { @@ -152,7 +182,7 @@ std::string outputRelationsSection(const SymbolTable& symbolTable) { if (hasPreviousText) { text << "\n"; } - text << relationToText(symbolTable, info); + text << relationToText(symbolTable, info, errorListener); hasPreviousText = true; } return text.str(); @@ -260,7 +290,7 @@ std::string outputSourcesSection(const SymbolTable& symbolTable) { text << "source named_table " << info.name << " {\n"; text << " names = [\n"; for (const auto& sym : - symbolTable.lookupSymbolsByLocation(info.location)) { + symbolTable.lookupSymbolsByLocation(info.sourceLocation)) { if (sym->type == SymbolType::kSourceDetail) { text << " \"" << sym->name << "\",\n"; } @@ -459,7 +489,9 @@ void outputFunctionsToBinaryPlan( } // namespace // TODO -- Update so that errors occurring during printing are captured. -std::string SymbolTablePrinter::outputToText(const SymbolTable& symbolTable) { +std::string SymbolTablePrinter::outputToText( + const SymbolTable& symbolTable, + SubstraitErrorListener* errorListener) { std::stringstream text; bool hasPreviousText = false; @@ -469,7 +501,7 @@ std::string SymbolTablePrinter::outputToText(const SymbolTable& symbolTable) { hasPreviousText = true; } - newText = outputRelationsSection(symbolTable); + newText = outputRelationsSection(symbolTable, errorListener); if (!newText.empty()) { if (hasPreviousText) { text << "\n"; @@ -516,9 +548,11 @@ std::string SymbolTablePrinter::outputToText(const SymbolTable& symbolTable) { } void SymbolTablePrinter::addInputsToRelation( + const SymbolTable& symbolTable, const SymbolInfo& symbolInfo, ::substrait::proto::Rel* relation) { auto relationData = ANY_CAST(std::shared_ptr, symbolInfo.blob); + int consumedPipelines = 0; // Connect up the incoming inputs in the relation data to the appropriate // input/left/right/inputs of this relation (which recursively also needs to @@ -532,24 +566,36 @@ void SymbolTablePrinter::addInputsToRelation( case RelationType::kRead: // No inputs to add. break; - case RelationType::kProject: - if (relationData->continuingPipeline != nullptr) { - auto continuingRelationData = ANY_CAST( - std::shared_ptr, - relationData->continuingPipeline->blob); + case RelationType::kProject: { + auto nextPipeline = getNextSinglePipeline(relationData); + if (nextPipeline != nullptr) { + auto nextRelationData = + ANY_CAST(std::shared_ptr, nextPipeline->blob); *relation->mutable_project()->mutable_input() = - continuingRelationData->relation; + nextRelationData->relation; addInputsToRelation( - *relationData->continuingPipeline, + symbolTable, + *nextPipeline, relation->mutable_project()->mutable_input()); + for (auto& expr : *relation->mutable_project()->mutable_expressions()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + &expr, + &consumedPipelines); + } + } else { + SUBSTRAIT_FAIL("Internal error: Incorrect number of pipelines."); } break; + } case RelationType::kJoin: if (relationData->newPipelines.size() == kBinaryRelationInputCount) { auto leftRelationData = ANY_CAST( std::shared_ptr, relationData->newPipelines[0]->blob); *relation->mutable_join()->mutable_left() = leftRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[0], relation->mutable_join()->mutable_left()); @@ -558,16 +604,32 @@ void SymbolTablePrinter::addInputsToRelation( *relation->mutable_join()->mutable_right() = rightRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[1], relation->mutable_join()->mutable_right()); + if (relation->join().has_expression()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + relation->mutable_join()->mutable_expression(), + &consumedPipelines); + } + if (relation->join().has_post_join_filter()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + relation->mutable_join()->mutable_post_join_filter(), + &consumedPipelines); + } + break; } - break; case RelationType::kCross: if (relationData->newPipelines.size() == kBinaryRelationInputCount) { auto leftRelationData = ANY_CAST( std::shared_ptr, relationData->newPipelines[0]->blob); *relation->mutable_cross()->mutable_left() = leftRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[0], relation->mutable_cross()->mutable_left()); @@ -576,74 +638,137 @@ void SymbolTablePrinter::addInputsToRelation( *relation->mutable_cross()->mutable_right() = rightRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[1], relation->mutable_cross()->mutable_right()); } break; - case RelationType::kFetch: - if (relationData->continuingPipeline != nullptr) { - auto continuingRelationData = ANY_CAST( - std::shared_ptr, - relationData->continuingPipeline->blob); + case RelationType::kFetch: { + auto nextPipeline = getNextSinglePipeline(relationData); + if (nextPipeline != nullptr) { + auto nextRelationData = + ANY_CAST(std::shared_ptr, nextPipeline->blob); *relation->mutable_fetch()->mutable_input() = - continuingRelationData->relation; + nextRelationData->relation; addInputsToRelation( - *relationData->continuingPipeline, + symbolTable, + *nextPipeline, relation->mutable_fetch()->mutable_input()); + } else { + SUBSTRAIT_FAIL("Internal error: Incorrect number of pipelines."); } break; - case RelationType::kAggregate: - if (relationData->continuingPipeline != nullptr) { - auto continuingRelationData = ANY_CAST( - std::shared_ptr, - relationData->continuingPipeline->blob); + } + case RelationType::kAggregate: { + auto nextPipeline = getNextSinglePipeline(relationData); + if (nextPipeline != nullptr) { + auto nextRelationData = + ANY_CAST(std::shared_ptr, nextPipeline->blob); *relation->mutable_aggregate()->mutable_input() = - continuingRelationData->relation; + nextRelationData->relation; addInputsToRelation( - *relationData->continuingPipeline, + symbolTable, + *nextPipeline, relation->mutable_aggregate()->mutable_input()); + } else { + SUBSTRAIT_FAIL("Internal error: Incorrect number of pipelines."); + } + for (auto& grouping : + *relation->mutable_aggregate()->mutable_groupings()) { + for (auto& groupExpression : *grouping.mutable_grouping_expressions()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + &groupExpression, + &consumedPipelines); + } + } + for (auto& measure : *relation->mutable_aggregate()->mutable_measures()) { + if (measure.has_measure()) { + for (auto& argument : + *measure.mutable_measure()->mutable_arguments()) { + if (argument.arg_type_case() == + ::substrait::proto::FunctionArgument::kValue) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + argument.mutable_value(), + &consumedPipelines); + } + } + } + if (measure.has_filter()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + measure.mutable_filter(), + &consumedPipelines); + } } break; - case RelationType::kSort: - if (relationData->continuingPipeline != nullptr) { - auto continuingRelationData = ANY_CAST( - std::shared_ptr, - relationData->continuingPipeline->blob); - *relation->mutable_sort()->mutable_input() = - continuingRelationData->relation; + } + case RelationType::kSort: { + auto nextPipeline = getNextSinglePipeline(relationData); + if (nextPipeline != nullptr) { + auto nextRelationData = + ANY_CAST(std::shared_ptr, nextPipeline->blob); + *relation->mutable_sort()->mutable_input() = nextRelationData->relation; addInputsToRelation( - *relationData->continuingPipeline, + symbolTable, + *nextPipeline, relation->mutable_sort()->mutable_input()); + for (auto& sort : *relation->mutable_sort()->mutable_sorts()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + sort.mutable_expr(), + &consumedPipelines); + } + } else { + SUBSTRAIT_FAIL("Internal error: Incorrect number of pipelines."); } break; - case RelationType::kFilter: - if (relationData->continuingPipeline != nullptr) { - auto continuingRelationData = ANY_CAST( - std::shared_ptr, - relationData->continuingPipeline->blob); + } + case RelationType::kFilter: { + auto nextPipeline = getNextSinglePipeline(relationData); + if (nextPipeline != nullptr) { + auto nextRelationData = + ANY_CAST(std::shared_ptr, nextPipeline->blob); *relation->mutable_filter()->mutable_input() = - continuingRelationData->relation; + nextRelationData->relation; addInputsToRelation( - *relationData->continuingPipeline, + symbolTable, + *nextPipeline, relation->mutable_filter()->mutable_input()); + if (relation->filter().has_condition()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + relation->mutable_filter()->mutable_condition(), + &consumedPipelines); + } + } else { + SUBSTRAIT_FAIL("Internal error: Incorrect number of pipelines."); } break; + } case RelationType::kSet: for (const auto& pipeline : relationData->newPipelines) { auto inputRelationData = ANY_CAST(std::shared_ptr, pipeline->blob); auto* input = relation->mutable_set()->add_inputs(); *input = inputRelationData->relation; - addInputsToRelation(*pipeline, input); + addInputsToRelation(symbolTable, *pipeline, input); } break; - case RelationType::kHashJoin: + case RelationType::kHashJoin: { if (relationData->newPipelines.size() == kBinaryRelationInputCount) { auto leftRelationData = ANY_CAST( std::shared_ptr, relationData->newPipelines[0]->blob); *relation->mutable_hash_join()->mutable_left() = leftRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[0], relation->mutable_hash_join()->mutable_left()); @@ -652,17 +777,27 @@ void SymbolTablePrinter::addInputsToRelation( *relation->mutable_hash_join()->mutable_right() = rightRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[1], relation->mutable_hash_join()->mutable_right()); } + if (relation->hash_join().has_post_join_filter()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + relation->mutable_hash_join()->mutable_post_join_filter(), + &consumedPipelines); + } break; - case RelationType::kMergeJoin: + } + case RelationType::kMergeJoin: { if (relationData->newPipelines.size() == kBinaryRelationInputCount) { auto leftRelationData = ANY_CAST( std::shared_ptr, relationData->newPipelines[0]->blob); *relation->mutable_merge_join()->mutable_left() = leftRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[0], relation->mutable_merge_join()->mutable_left()); @@ -671,36 +806,50 @@ void SymbolTablePrinter::addInputsToRelation( *relation->mutable_merge_join()->mutable_right() = rightRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[1], relation->mutable_merge_join()->mutable_right()); } + if (relation->merge_join().has_post_join_filter()) { + addInputsToExpression( + symbolTable, + relationData->subQueryPipelines, + relation->mutable_merge_join()->mutable_post_join_filter(), + &consumedPipelines); + } break; + } case RelationType::kExchange: case RelationType::kDdl: case RelationType::kWrite: // Not yet possible to reach these relations in plans. break; case RelationType::kExtensionLeaf: + // Nothing to do here. break; - case RelationType::kExtensionSingle: - if (relationData->continuingPipeline != nullptr) { - auto continuingRelationData = ANY_CAST( - std::shared_ptr, - relationData->continuingPipeline->blob); + case RelationType::kExtensionSingle: { + auto nextPipeline = getNextSinglePipeline(relationData); + if (nextPipeline != nullptr) { + auto nextRelationData = + ANY_CAST(std::shared_ptr, nextPipeline->blob); *relation->mutable_extension_single()->mutable_input() = - continuingRelationData->relation; + nextRelationData->relation; addInputsToRelation( - *relationData->continuingPipeline, + symbolTable, + *nextPipeline, relation->mutable_extension_single()->mutable_input()); + } else { + SUBSTRAIT_FAIL("Internal error: Incorrect number of pipelines."); } break; + } case RelationType::kExtensionMulti: for (const auto& pipeline : relationData->newPipelines) { auto inputRelationData = ANY_CAST(std::shared_ptr, pipeline->blob); auto* input = relation->mutable_extension_multi()->add_inputs(); *input = inputRelationData->relation; - addInputsToRelation(*pipeline, input); + addInputsToRelation(symbolTable, *pipeline, input); } break; case RelationType::kUnknown: @@ -708,13 +857,237 @@ void SymbolTablePrinter::addInputsToRelation( } } +void SymbolTablePrinter::addInputsToExpression( + const SymbolTable& symbolTable, + const std::vector& symbolInfos, + ::substrait::proto::Expression* expression, + int* consumedPipelines) { + switch (expression->rex_type_case()) { + case ::substrait::proto::Expression::kLiteral: + case ::substrait::proto::Expression::kSelection: + return; + case ::substrait::proto::Expression::kScalarFunction: { + for (auto& arg : + *expression->mutable_scalar_function()->mutable_arguments()) { + if (arg.arg_type_case() == + ::substrait::proto::FunctionArgument::kValue) { + addInputsToExpression( + symbolTable, symbolInfos, arg.mutable_value(), consumedPipelines); + } + } + break; + } + case ::substrait::proto::Expression::kWindowFunction: + for (auto& arg : + *expression->mutable_window_function()->mutable_arguments()) { + if (arg.arg_type_case() == + ::substrait::proto::FunctionArgument::kValue) { + addInputsToExpression( + symbolTable, symbolInfos, arg.mutable_value(), consumedPipelines); + } + } + for (auto& partition : + *expression->mutable_window_function()->mutable_partitions()) { + addInputsToExpression( + symbolTable, symbolInfos, &partition, consumedPipelines); + } + break; + case ::substrait::proto::Expression::kIfThen: + for (auto& ifThen : *expression->mutable_if_then()->mutable_ifs()) { + addInputsToExpression( + symbolTable, symbolInfos, ifThen.mutable_if_(), consumedPipelines); + addInputsToExpression( + symbolTable, symbolInfos, ifThen.mutable_then(), consumedPipelines); + } + if (expression->if_then().has_else_()) { + addInputsToExpression( + symbolTable, + symbolInfos, + expression->mutable_if_then()->mutable_else_(), + consumedPipelines); + } + break; + case ::substrait::proto::Expression::kSwitchExpression: + if (expression->switch_expression().has_match()) { + addInputsToExpression( + symbolTable, + symbolInfos, + expression->mutable_switch_expression()->mutable_match(), + consumedPipelines); + } + for (auto& ifValue : + *expression->mutable_switch_expression()->mutable_ifs()) { + addInputsToExpression( + symbolTable, + symbolInfos, + ifValue.mutable_then(), + consumedPipelines); + } + if (expression->switch_expression().has_else_()) { + addInputsToExpression( + symbolTable, + symbolInfos, + expression->mutable_switch_expression()->mutable_else_(), + consumedPipelines); + } + break; + case ::substrait::proto::Expression::kSingularOrList: + if (expression->singular_or_list().has_value()) { + addInputsToExpression( + symbolTable, + symbolInfos, + expression->mutable_singular_or_list()->mutable_value(), + consumedPipelines); + } + for (auto& option : + *expression->mutable_singular_or_list()->mutable_options()) { + addInputsToExpression( + symbolTable, symbolInfos, &option, consumedPipelines); + } + break; + case ::substrait::proto::Expression::kMultiOrList: + for (auto& value : + *expression->mutable_multi_or_list()->mutable_value()) { + addInputsToExpression( + symbolTable, symbolInfos, &value, consumedPipelines); + } + for (auto& option : + *expression->mutable_multi_or_list()->mutable_options()) { + for (auto& field : *option.mutable_fields()) { + addInputsToExpression( + symbolTable, symbolInfos, &field, consumedPipelines); + } + } + break; + case ::substrait::proto::Expression::kCast: + if (expression->cast().has_input()) { + addInputsToExpression( + symbolTable, + symbolInfos, + expression->mutable_cast()->mutable_input(), + consumedPipelines); + } + break; + case ::substrait::proto::Expression::kSubquery: + // Handled below. + break; + case ::substrait::proto::Expression::kNested: + // TODO -- Implement nested expressions. + break; + case ::substrait::proto::Expression::kEnum: + break; + case ::substrait::proto::Expression::REX_TYPE_NOT_SET: + return; + } + + switch (expression->subquery().subquery_type_case()) { + case ::substrait::proto::Expression_Subquery::kScalar: { + if (*consumedPipelines >= symbolInfos.size()) { + SUBSTRAIT_FAIL("Internal error: Ran out of subquery symbols."); + } + auto subquerySymbol = symbolInfos[(*consumedPipelines)++]; + if (subquerySymbol != nullptr) { + auto relationData = + ANY_CAST(std::shared_ptr, subquerySymbol->blob); + *expression->mutable_subquery()->mutable_scalar()->mutable_input() = + relationData->relation; + addInputsToRelation( + symbolTable, + *subquerySymbol, + expression->mutable_subquery()->mutable_scalar()->mutable_input()); + } + SUBSTRAIT_FAIL("Internal Error: Known symbol is missing."); + } + case ::substrait::proto::Expression_Subquery::kInPredicate: { + // First visit the needle expressions. + for (auto& expr : *expression->mutable_subquery() + ->mutable_in_predicate() + ->mutable_needles()) { + addInputsToExpression( + symbolTable, symbolInfos, &expr, consumedPipelines); + } + // Now visit the haystack. + if (expression->subquery().in_predicate().has_haystack()) { + if (*consumedPipelines >= symbolInfos.size()) { + SUBSTRAIT_FAIL("Internal error: Ran out of subquery symbols."); + } + auto subquerySymbol = symbolInfos[(*consumedPipelines)++]; + if (subquerySymbol != nullptr) { + auto relationData = + ANY_CAST(std::shared_ptr, subquerySymbol->blob); + *expression->mutable_subquery() + ->mutable_in_predicate() + ->mutable_haystack() = relationData->relation; + addInputsToRelation( + symbolTable, + *subquerySymbol, + expression->mutable_subquery() + ->mutable_in_predicate() + ->mutable_haystack()); + } + SUBSTRAIT_FAIL("Internal Error: Known symbol is missing."); + } + break; + } + case ::substrait::proto::Expression_Subquery::kSetPredicate: { + if (*consumedPipelines >= symbolInfos.size()) { + SUBSTRAIT_FAIL("Internal error: Ran out of subquery symbols."); + } + auto subquerySymbol = symbolInfos[(*consumedPipelines)++]; + if (subquerySymbol != nullptr) { + auto relationData = + ANY_CAST(std::shared_ptr, subquerySymbol->blob); + *expression->mutable_subquery() + ->mutable_set_predicate() + ->mutable_tuples() = relationData->relation; + addInputsToRelation( + symbolTable, + *subquerySymbol, + expression->mutable_subquery() + ->mutable_set_predicate() + ->mutable_tuples()); + } + SUBSTRAIT_FAIL("Internal Error: Known symbol is missing."); + } + case ::substrait::proto::Expression_Subquery::kSetComparison: { + addInputsToExpression( + symbolTable, + symbolInfos, + expression->mutable_subquery() + ->mutable_set_comparison() + ->mutable_left(), + consumedPipelines); + if (*consumedPipelines >= symbolInfos.size()) { + SUBSTRAIT_FAIL("Internal error: Ran out of subquery symbols."); + } + auto subquerySymbol = symbolInfos[(*consumedPipelines)++]; + if (subquerySymbol != nullptr) { + auto relationData = + ANY_CAST(std::shared_ptr, subquerySymbol->blob); + *expression->mutable_subquery() + ->mutable_set_comparison() + ->mutable_right() = relationData->relation; + addInputsToRelation( + symbolTable, + *subquerySymbol, + expression->mutable_subquery() + ->mutable_set_comparison() + ->mutable_right()); + } + } + case ::substrait::proto::Expression_Subquery::SUBQUERY_TYPE_NOT_SET: + break; + } +} + ::substrait::proto::Plan SymbolTablePrinter::outputToBinaryPlan( const SymbolTable& symbolTable) { ::substrait::proto::Plan plan; outputExtensionSpacesToBinaryPlan(symbolTable, &plan); outputFunctionsToBinaryPlan(symbolTable, &plan); for (const SymbolInfo& info : symbolTable) { - if (info.type != SymbolType::kRelation) { + if (info.type != SymbolType::kRelation || + info.parentQueryLocation != Location::kUnknownLocation) { continue; } auto relationData = ANY_CAST(std::shared_ptr, info.blob); @@ -736,6 +1109,7 @@ ::substrait::proto::Plan SymbolTablePrinter::outputToBinaryPlan( inputRelationData->relation; addInputsToRelation( + symbolTable, *relationData->newPipelines[0], relation->mutable_root()->mutable_input()); } diff --git a/src/substrait/textplan/SymbolTablePrinter.h b/src/substrait/textplan/SymbolTablePrinter.h index 262eca07..f23e3a4d 100644 --- a/src/substrait/textplan/SymbolTablePrinter.h +++ b/src/substrait/textplan/SymbolTablePrinter.h @@ -5,8 +5,10 @@ #include #include "SymbolTable.h" +#include "SubstraitErrorListener.h" namespace substrait::proto { +class Expression; class Plan; class Rel; } // namespace substrait::proto @@ -15,15 +17,24 @@ namespace io::substrait::textplan { class SymbolTablePrinter { public: - static std::string outputToText(const SymbolTable& symbolTable); + static std::string outputToText( + const SymbolTable& symbolTable, + SubstraitErrorListener* errorListener); static ::substrait::proto::Plan outputToBinaryPlan( const SymbolTable& symbolTable); private: static void addInputsToRelation( + const SymbolTable& symbolTable, const SymbolInfo& symbolInfo, ::substrait::proto::Rel* relation); + + static void addInputsToExpression( + const SymbolTable& symbolTable, + const std::vector& symbolInfos, + ::substrait::proto::Expression* expression, + int* consumedPipelines); }; } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp b/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp index 7fd3eb0a..8f11c5c5 100644 --- a/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp +++ b/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp @@ -137,6 +137,52 @@ ::google::protobuf::RepeatedField getOutputMapping( } // namespace +std::any InitialPlanProtoVisitor::visitExpression( + const ::substrait::proto::Expression& expression) { + if (expression.rex_type_case() == + ::substrait::proto::Expression::RexTypeCase::kSubquery) { + outerRelations_.push_back(currentRelationScope_); + auto resetRelationScope = finally([&]() { outerRelations_.pop_back(); }); + + auto result = visitSubquery(expression.subquery()); + + const ::substrait::proto::Rel* subqueryRelation; + switch (expression.subquery().subquery_type_case()) { + case ::substrait::proto::Expression_Subquery::kScalar: + subqueryRelation = + &expression.subquery().scalar().input(); + break; + case ::substrait::proto::Expression_Subquery::kInPredicate: + subqueryRelation = + &expression.subquery().in_predicate().haystack(); + break; + case ::substrait::proto::Expression_Subquery::kSetPredicate: + subqueryRelation = + &expression.subquery().set_predicate().tuples(); + break; + case ::substrait::proto::Expression_Subquery::kSetComparison: + subqueryRelation = + &expression.subquery().set_comparison().right(); + break; + case ::substrait::proto::Expression_Subquery::SUBQUERY_TYPE_NOT_SET: + errorListener_->addError("Subquery type not set."); + return result; + } + if (subqueryRelation == nullptr) { + errorListener_->addError("Unrecognized subquery type."); + return result; + } + + const SymbolInfo* symbol = symbolTable_->lookupSymbolByLocationAndType( + PROTO_LOCATION(*subqueryRelation), SymbolType::kRelation); + symbolTable_->setParentQueryLocation( + *symbol, PROTO_LOCATION(*currentRelationScope_)); + + return result; + } + return BasePlanProtoVisitor::visitExpression(expression); +} + std::any InitialPlanProtoVisitor::visitExtension( const ::substrait::proto::extensions::SimpleExtensionDeclaration& extension) { @@ -199,13 +245,13 @@ std::any InitialPlanProtoVisitor::visitRelation( currentRelationScope_ = previousRelationScope; }); - BasePlanProtoVisitor::visitRelation(relation); - // Create a reduced copy of the relation for use in the symbol table. auto relationData = std::make_shared(); relationData->relation = relation; eraseInputs(&relationData->relation); + BasePlanProtoVisitor::visitRelation(relation); + // Update the relation data for long term use. updateLocalSchema(relationData, relation, relationData->relation); if (readRelationSources_.find(currentRelationScope_) != @@ -225,7 +271,8 @@ std::any InitialPlanProtoVisitor::visitRelation( SymbolType::kRelation, relation.rel_type_case(), relationData); - symbolTable_->updateLocation(*symbol, PROTO_LOCATION(relationData->relation)); + symbolTable_->addPermanentLocation( + *symbol, PROTO_LOCATION(relationData->relation)); return std::nullopt; } @@ -433,7 +480,6 @@ void InitialPlanProtoVisitor::updateLocalSchema( SymbolType::kMeasure, SourceType::kUnknown, std::nullopt); - symbol->location = PROTO_LOCATION(measure); relationData->generatedFieldReferences.emplace_back(symbol); } // Aggregate relations are different in that they alter the emitted fields diff --git a/src/substrait/textplan/converter/InitialPlanProtoVisitor.h b/src/substrait/textplan/converter/InitialPlanProtoVisitor.h index 520e8a63..33966283 100644 --- a/src/substrait/textplan/converter/InitialPlanProtoVisitor.h +++ b/src/substrait/textplan/converter/InitialPlanProtoVisitor.h @@ -31,6 +31,9 @@ class InitialPlanProtoVisitor : public BasePlanProtoVisitor { }; private: + std::any visitExpression( + const ::substrait::proto::Expression& expression) override; + std::any visitExtensionUri( const ::substrait::proto::extensions::SimpleExtensionURI& uri) override; std::any visitExtension( @@ -57,6 +60,7 @@ class InitialPlanProtoVisitor : public BasePlanProtoVisitor { std::any visitNamedStruct( const ::substrait::proto::NamedStruct& named) override; + // Populates the input schema from the relations that come before. void updateLocalSchema( const std::shared_ptr& relationData, const ::substrait::proto::Rel& relation, @@ -92,6 +96,7 @@ class InitialPlanProtoVisitor : public BasePlanProtoVisitor { std::shared_ptr errorListener_; const ::substrait::proto::Rel* currentRelationScope_{nullptr}; // Not owned. + std::vector outerRelations_; std::map readRelationSources_; diff --git a/src/substrait/textplan/converter/PipelineVisitor.cpp b/src/substrait/textplan/converter/PipelineVisitor.cpp index 57d5dcea..70a6c63a 100644 --- a/src/substrait/textplan/converter/PipelineVisitor.cpp +++ b/src/substrait/textplan/converter/PipelineVisitor.cpp @@ -3,16 +3,77 @@ #include "substrait/textplan/converter/PipelineVisitor.h" #include "substrait/textplan/Any.h" +#include "substrait/textplan/Finally.h" #include "substrait/textplan/StructuredSymbolData.h" #include "substrait/textplan/SymbolTable.h" namespace io::substrait::textplan { +std::any PipelineVisitor::visitExpression( + const ::substrait::proto::Expression& expression) { + if (expression.rex_type_case() == + ::substrait::proto::Expression::RexTypeCase::kSubquery) { + auto result = visitSubquery(expression.subquery()); + + const ::substrait::proto::Rel* subqueryRelation; + switch (expression.subquery().subquery_type_case()) { + case ::substrait::proto::Expression_Subquery::kScalar: + subqueryRelation = + &expression.subquery().scalar().input(); + break; + case ::substrait::proto::Expression_Subquery::kInPredicate: + subqueryRelation = + &expression.subquery().in_predicate().haystack(); + break; + case ::substrait::proto::Expression_Subquery::kSetPredicate: + subqueryRelation = + &expression.subquery().set_predicate().tuples(); + break; + case ::substrait::proto::Expression_Subquery::kSetComparison: + subqueryRelation = + &expression.subquery().set_comparison().right(); + break; + case ::substrait::proto::Expression_Subquery::SUBQUERY_TYPE_NOT_SET: + // No need to raise as this would have been exposed earlier. + return result; + } + if (subqueryRelation == nullptr) { + // No need to raise as this would have been caught earlier. + return result; + } + + auto subquerySymbol = symbolTable_->lookupSymbolByLocationAndType( + PROTO_LOCATION(*subqueryRelation), SymbolType::kRelation); + auto currentRelationData = ANY_CAST(std::shared_ptr, currentRelationScope_->blob); + currentRelationData->subQueryPipelines.push_back(subquerySymbol); + + // Populate the start of the pipeline for easy later access. + const SymbolInfo* current; + auto thisRelationData = + ANY_CAST(std::shared_ptr, subquerySymbol->blob); + thisRelationData->pipelineStart = subquerySymbol; + while (thisRelationData->continuingPipeline != nullptr) { + current = thisRelationData->continuingPipeline; + thisRelationData = ANY_CAST(std::shared_ptr, current->blob); + thisRelationData->pipelineStart = subquerySymbol; + } + return result; + } + return BasePlanProtoVisitor::visitExpression(expression); +} + std::any PipelineVisitor::visitRelation( const ::substrait::proto::Rel& relation) { auto symbol = symbolTable_->lookupSymbolByLocationAndType( PROTO_LOCATION(relation), SymbolType::kRelation); auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); + + auto previousRelationScope = currentRelationScope_; + currentRelationScope_ = symbol; + auto resetRelationScope = finally([this, &previousRelationScope]() { + currentRelationScope_ = previousRelationScope; + }); + switch (relation.rel_type_case()) { case ::substrait::proto::Rel::RelTypeCase::kRead: // No relations beyond this one. diff --git a/src/substrait/textplan/converter/PipelineVisitor.h b/src/substrait/textplan/converter/PipelineVisitor.h index 24c069e9..5fe4a810 100644 --- a/src/substrait/textplan/converter/PipelineVisitor.h +++ b/src/substrait/textplan/converter/PipelineVisitor.h @@ -19,8 +19,8 @@ class PipelineVisitor : public BasePlanProtoVisitor { }; private: - std::shared_ptr getRelationData( - const google::protobuf::Message& relation); + std::any visitExpression( + const ::substrait::proto::Expression& expression) override; std::any visitRelation(const ::substrait::proto::Rel& relation) override; @@ -28,6 +28,8 @@ class PipelineVisitor : public BasePlanProtoVisitor { const ::substrait::proto::PlanRel& relation) override; std::shared_ptr symbolTable_; + + const SymbolInfo* currentRelationScope_{nullptr}; }; } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp index 3243769d..f6e3fbe7 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.cpp +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.cpp @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: Apache-2.0 */ #include "substrait/textplan/converter/PlanPrinterVisitor.h" +#include #include #include @@ -65,38 +66,6 @@ std::string visitEnumArgument(const std::string& str) { return text.str(); } -int32_t expressionCount(const ::substrait::proto::Rel& relation) { - switch (relation.rel_type_case()) { - case ::substrait::proto::Rel::kProject: - return relation.project().expressions().size(); - default: - // No support for any other types besides project at the moment. - break; - } - return 0; -} - -const ::substrait::proto::Expression* getExpressionByNumber( - const ::substrait::proto::Rel& relation, - int num) { - switch (relation.rel_type_case()) { - case ::substrait::proto::Rel::kProject: - return &relation.project().expressions(num); - default: - // No support for any other types besides project at the moment. - break; - } - return nullptr; -} - -bool isDirectFieldReference(const ::substrait::proto::Expression& expr) { - if (expr.selection().reference_type_case() == - ::substrait::proto::Expression::FieldReference::kDirectReference) { - return expr.selection().direct_reference().has_struct_field(); - } - return false; -} - } // namespace std::string PlanPrinterVisitor::printRelation(const SymbolInfo& symbol) { @@ -132,33 +101,68 @@ std::string PlanPrinterVisitor::typeToText( return ANY_CAST(std::string, visitType(type)); } +Location getParentQueryLocation(const SymbolInfo* scope) { + if (scope->parentQueryLocation != Location::kUnknownLocation) { + return scope->parentQueryLocation; + } + auto relationData = ANY_CAST(std::shared_ptr, scope->blob); + auto nextPipeline = relationData->pipelineStart; + if (nextPipeline != nullptr) { + if (nextPipeline->parentQueryLocation != Location::kUnknownLocation) { + return nextPipeline->parentQueryLocation; + } + } + return Location::kUnknownLocation; +} + // TODO -- Refactor this to return the symbol for later display decisions. std::string PlanPrinterVisitor::lookupFieldReference( - uint32_t field_reference, + uint32_t fieldReference, + const SymbolInfo* currentScope, + uint32_t stepsOut, bool needFullyQualified) { - if (*currentScope_ == SymbolInfo::kUnknown) { + if (currentScope == nullptr || *currentScope_ == SymbolInfo::kUnknown) { errorListener_->addError( - "Field number " + std::to_string(field_reference) + + "Field number " + std::to_string(fieldReference) + " mysteriously requested outside of a relation."); - return "field#" + std::to_string(field_reference); + return "field#" + std::to_string(fieldReference); + } + auto actualScope = currentScope; + if (stepsOut > 0) { + for (auto stepsLeft = stepsOut; stepsLeft > 0; stepsLeft--) { + auto actualParentQueryLocation = getParentQueryLocation(actualScope); + if (actualParentQueryLocation == Location::kUnknownLocation) { + errorListener_->addError( + "Requested steps out of " + std::to_string(stepsOut) + + " but not within subquery depth that high."); + return "field#" + std::to_string(fieldReference); + } + actualScope = symbolTable_->lookupSymbolByLocationAndType( + actualParentQueryLocation, SymbolType::kRelation); + if (actualScope == nullptr) { + errorListener_->addError( + "Internal error: Missing previously encountered parent query symbol."); + return "field#" + std::to_string(fieldReference); + } + } } auto relationData = - ANY_CAST(std::shared_ptr, currentScope_->blob); + ANY_CAST(std::shared_ptr, actualScope->blob); auto fieldReferencesSize = relationData->fieldReferences.size(); const SymbolInfo* symbol{nullptr}; - if (field_reference < fieldReferencesSize) { - symbol = relationData->fieldReferences[field_reference]; + if (fieldReference < fieldReferencesSize) { + symbol = relationData->fieldReferences[fieldReference]; } else if ( - field_reference < + fieldReference < fieldReferencesSize + relationData->generatedFieldReferences.size()) { symbol = relationData - ->generatedFieldReferences[field_reference - fieldReferencesSize]; + ->generatedFieldReferences[fieldReference - fieldReferencesSize]; } else { errorListener_->addError( "Encountered field reference out of range: " + - std::to_string(field_reference)); - return "field#" + std::to_string(field_reference); + std::to_string(fieldReference)); + return "field#" + std::to_string(fieldReference); } if (!symbol->alias.empty()) { return symbol->alias; @@ -185,6 +189,160 @@ std::string PlanPrinterVisitor::lookupFunctionReference( return "functionref#" + std::to_string(function_reference); } +std::any PlanPrinterVisitor::visitSubqueryScalar( + const ::substrait::proto::Expression_Subquery_Scalar& query) { + std::stringstream result; + result << "SUBQUERY "; + if (query.has_input()) { + const SymbolInfo* symbol = symbolTable_->lookupSymbolByParentQueryAndType( + currentScope_->sourceLocation, + ++currentScopeIndex_, + SymbolType::kRelation); + if (symbol != nullptr) { + result << symbol->name; + } else { + errorListener_->addError( + "Internal error -- could not find subquery symbol."); + } + } else { + errorListener_->addError( + "No subquery input provided to scalar subquery operation."); + } + return result.str(); +} + +std::any PlanPrinterVisitor::visitSubqueryInPredicate( + const ::substrait::proto::Expression_Subquery_InPredicate& query) { + std::stringstream result; + result << "("; + bool hadPreviousNeedle = false; + for (const auto& needle : query.needles()) { + if (hadPreviousNeedle) { + result << ", "; + } + result << ANY_CAST(std::string, visitExpression(needle)); + hadPreviousNeedle = true; + } + result << ") IN SUBQUERY "; + if (query.has_haystack()) { + const SymbolInfo* symbol = symbolTable_->lookupSymbolByParentQueryAndType( + currentScope_->sourceLocation, + ++currentScopeIndex_, + SymbolType::kRelation); + if (symbol != nullptr) { + result << symbol->name; + } else { + errorListener_->addError( + "Internal error -- could not find subquery symbol."); + } + } else { + errorListener_->addError("No haystack defined for in predicate subquery."); + } + return result.str(); +} + +std::any PlanPrinterVisitor::visitSubquerySetPredicate( + const ::substrait::proto::Expression_Subquery_SetPredicate& query) { + std::stringstream result; + switch (query.predicate_op()) { + case ::substrait::proto:: + Expression_Subquery_SetPredicate_PredicateOp_PREDICATE_OP_EXISTS: + result << "EXISTS IN "; + break; + case ::substrait::proto:: + Expression_Subquery_SetPredicate_PredicateOp_PREDICATE_OP_UNIQUE: + result << "UNIQUE IN "; + break; + case ::substrait::proto:: + Expression_Subquery_SetPredicate_PredicateOp_PREDICATE_OP_UNSPECIFIED: + case ::substrait::proto:: + Expression_Subquery_SetPredicate_PredicateOp_Expression_Subquery_SetPredicate_PredicateOp_INT_MIN_SENTINEL_DO_NOT_USE_: + case ::substrait::proto:: + Expression_Subquery_SetPredicate_PredicateOp_Expression_Subquery_SetPredicate_PredicateOp_INT_MAX_SENTINEL_DO_NOT_USE_: + errorListener_->addError("Did not recognize the subquery predicate."); + return std::string("UNSUPPORTED SUBQUERY"); + } + result << "SUBQUERY "; + if (query.has_tuples()) { + const SymbolInfo* symbol = symbolTable_->lookupSymbolByParentQueryAndType( + currentScope_->sourceLocation, + ++currentScopeIndex_, + SymbolType::kRelation); + if (symbol != nullptr) { + result << symbol->name; + } else { + errorListener_->addError( + "Internal error -- could not find subquery symbol."); + } + } else { + errorListener_->addError( + "No subquery defined for set predicate operation."); + } + return result.str(); +} + +std::any PlanPrinterVisitor::visitSubquerySetComparison( + const ::substrait::proto::Expression_Subquery_SetComparison& query) { + std::stringstream result; + if (query.has_left()) { + result << ANY_CAST(std::string, visitExpression(query.left())); + } else { + errorListener_->addError( + "No expression defined for set comparison operation."); + } + switch (query.comparison_op()) { + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_LE: + result << "LE "; + break; + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_GE: + result << "GE "; + break; + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_EQ: + result << "EQ "; + break; + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_NE: + result << "NE "; + break; + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_LT: + result << "LT "; + break; + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_GT: + result << "GT "; + break; + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_UNSPECIFIED: + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_Expression_Subquery_SetComparison_ComparisonOp_INT_MIN_SENTINEL_DO_NOT_USE_: + case ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_Expression_Subquery_SetComparison_ComparisonOp_INT_MAX_SENTINEL_DO_NOT_USE_: + errorListener_->addError("Did not recognize the subquery comparison."); + return std::string("UNSUPPORTED SUBQUERY"); + } + result << "ALL SUBQUERY "; + if (query.has_right()) { + const SymbolInfo* symbol = symbolTable_->lookupSymbolByParentQueryAndType( + currentScope_->sourceLocation, + ++currentScopeIndex_, + SymbolType::kRelation); + if (symbol != nullptr) { + result << symbol->name; + } else { + errorListener_->addError( + "Internal error -- could not find subquery symbol."); + } + } else { + errorListener_->addError( + "No subquery defined for set comparison operation."); + } + return result.str(); +} + std::any PlanPrinterVisitor::visitSelect( const ::substrait::proto::Expression_MaskExpression_Select& select) { errorListener_->addError("MaskExpression Select not yet implemented."); @@ -410,10 +568,18 @@ std::any PlanPrinterVisitor::visitFieldReference( const ::substrait::proto::Expression::FieldReference& ref) { // TODO -- Move this logic into visitDirectReference and visitMaskedReference. switch (ref.reference_type_case()) { - case ::substrait::proto::Expression::FieldReference::kDirectReference: + case ::substrait::proto::Expression::FieldReference::kDirectReference: { + uint32_t stepsOut{0}; + if (ref.has_outer_reference()) { + stepsOut = ref.outer_reference().steps_out(); + } // TODO -- Figure out when fully qualified names aren't needed. return lookupFieldReference( - ref.direct_reference().struct_field().field(), true); + ref.direct_reference().struct_field().field(), + currentScope_, + stepsOut, + true); + } case ::substrait::proto::Expression::FieldReference::kMaskedReference: errorListener_->addError( "Masked reference not yet supported: " + ref.ShortDebugString()); @@ -427,13 +593,6 @@ std::any PlanPrinterVisitor::visitFieldReference( return "FIELD_REF_TYPE_NOT_SUPPORTED"; } -// The visitor should be tolerant of older plans. This requires calling -// deprecated APIs. -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#pragma gcc diagnostic push -#pragma gcc diagnostic ignored "-Wdeprecated-declarations" - std::any PlanPrinterVisitor::visitScalarFunction( const ::substrait::proto::Expression::ScalarFunction& function) { std::stringstream text; @@ -490,9 +649,6 @@ std::any PlanPrinterVisitor::visitScalarFunction( return text.str(); } -#pragma clang diagnostic pop -#pragma gcc diagnostic pop - std::any PlanPrinterVisitor::visitWindowFunction( const ::substrait::proto::Expression::WindowFunction& function) { errorListener_->addError( @@ -564,14 +720,6 @@ std::any PlanPrinterVisitor::visitCast( return text.str(); } -std::any PlanPrinterVisitor::visitSubquery( - const ::substrait::proto::Expression_Subquery& query) { - errorListener_->addError( - "Subquery expressions are not yet supported: " + - query.ShortDebugString()); - return std::string("SUBQUERY_NOT_YET_IMPLEMENTED"); -} - std::any PlanPrinterVisitor::visitNested( const ::substrait::proto::Expression_Nested& structure) { errorListener_->addError( @@ -641,7 +789,10 @@ std::any PlanPrinterVisitor::visitRelationCommon( text << "\n"; } for (const auto& mapping : common.emit().output_mapping()) { - text << " emit " << lookupFieldReference(mapping, true) << ";\n"; + text << " emit " + << lookupFieldReference( + mapping, currentScope_, /* stepsOut= */ 0, true) + << ";\n"; } return text.str(); } @@ -726,12 +877,17 @@ std::any PlanPrinterVisitor::visitRelation( const ::substrait::proto::Rel& relation) { // Mark the current scope for any operations within this relation. auto previousScope = currentScope_; - auto resetCurrentScope = finally([&]() { currentScope_ = previousScope; }); + auto previousScopeIndex = currentScopeIndex_; + auto resetCurrentScope = finally([&]() { + currentScope_ = previousScope; + currentScopeIndex_ = previousScopeIndex; + }); const SymbolInfo* symbol = symbolTable_->lookupSymbolByLocationAndType( PROTO_LOCATION(relation), SymbolType::kRelation); if (symbol != nullptr) { currentScope_ = symbol; } + currentScopeIndex_ = -1; auto result = BasePlanProtoVisitor::visitRelation(relation); diff --git a/src/substrait/textplan/converter/PlanPrinterVisitor.h b/src/substrait/textplan/converter/PlanPrinterVisitor.h index f817af58..2b598b5b 100644 --- a/src/substrait/textplan/converter/PlanPrinterVisitor.h +++ b/src/substrait/textplan/converter/PlanPrinterVisitor.h @@ -35,10 +35,23 @@ class PlanPrinterVisitor : public BasePlanProtoVisitor { private: std::string lookupFieldReference( - uint32_t field_reference, + uint32_t fieldReference, + const SymbolInfo* currentScope, + uint32_t stepsOut, bool needFullyQualified); std::string lookupFunctionReference(uint32_t function_reference); + std::any visitSubqueryScalar( + const ::substrait::proto::Expression_Subquery_Scalar& query) override; + std::any visitSubqueryInPredicate( + const ::substrait::proto::Expression_Subquery_InPredicate& query) + override; + std::any visitSubquerySetPredicate( + const ::substrait::proto::Expression_Subquery_SetPredicate& query) + override; + std::any visitSubquerySetComparison( + const ::substrait::proto::Expression_Subquery_SetComparison& query) + override; std::any visitSelect( const ::substrait::proto::Expression_MaskExpression_Select& select) override; @@ -64,8 +77,6 @@ class PlanPrinterVisitor : public BasePlanProtoVisitor { std::any visitMultiOrList( const ::substrait::proto::Expression::MultiOrList& expression) override; std::any visitCast(const ::substrait::proto::Expression::Cast& cast) override; - std::any visitSubquery( - const ::substrait::proto::Expression_Subquery& query) override; std::any visitNested( const ::substrait::proto::Expression_Nested& structure) override; std::any visitEnum(const ::substrait::proto::Expression_Enum& value) override; @@ -121,6 +132,7 @@ class PlanPrinterVisitor : public BasePlanProtoVisitor { std::shared_ptr symbolTable_; std::shared_ptr errorListener_; const SymbolInfo* currentScope_; /* not owned */ + int currentScopeIndex_{-1}; int functionDepth_; }; diff --git a/src/substrait/textplan/converter/ReferenceNormalizer.cpp b/src/substrait/textplan/converter/ReferenceNormalizer.cpp index ec667c1f..61754a61 100644 --- a/src/substrait/textplan/converter/ReferenceNormalizer.cpp +++ b/src/substrait/textplan/converter/ReferenceNormalizer.cpp @@ -29,10 +29,15 @@ bool compareExtensionFunctions( return ord(a) < ord(b); } +// Forward references void normalizeFunctionsForExpression( ::substrait::proto::Expression* expr, const std::map& functionReferenceMapping); +void normalizeFunctionsForRelation( + ::substrait::proto::Rel* relation, + const std::map& functionReferenceMapping); + void normalizeFunctionsForArgument( ::substrait::proto::FunctionArgument& argument, const std::map& functionReferenceMapping) { @@ -73,6 +78,41 @@ void normalizeFunctionsForExpression( normalizeFunctionsForExpression( expr->mutable_if_then()->mutable_else_(), functionReferenceMapping); } + } else if (expr->has_subquery()) { + switch (expr->subquery().subquery_type_case()) { + case ::substrait::proto::Expression_Subquery::kScalar: + normalizeFunctionsForRelation( + expr->mutable_subquery()->mutable_scalar()->mutable_input(), + functionReferenceMapping); + break; + case ::substrait::proto::Expression_Subquery::kInPredicate: + for (auto& needle : *expr->mutable_subquery() + ->mutable_in_predicate() + ->mutable_needles()) { + normalizeFunctionsForExpression(&needle, functionReferenceMapping); + } + normalizeFunctionsForRelation( + expr->mutable_subquery() + ->mutable_in_predicate() + ->mutable_haystack(), + functionReferenceMapping); + break; + case ::substrait::proto::Expression_Subquery::kSetPredicate: + normalizeFunctionsForRelation( + expr->mutable_subquery()->mutable_set_predicate()->mutable_tuples(), + functionReferenceMapping); + break; + case ::substrait::proto::Expression_Subquery::kSetComparison: + normalizeFunctionsForExpression( + expr->mutable_subquery()->mutable_set_comparison()->mutable_left(), + functionReferenceMapping); + normalizeFunctionsForRelation( + expr->mutable_subquery()->mutable_set_comparison()->mutable_right(), + functionReferenceMapping); + break; + case ::substrait::proto::Expression_Subquery::SUBQUERY_TYPE_NOT_SET: + break; + } } } diff --git a/src/substrait/textplan/converter/SaveBinary.cpp b/src/substrait/textplan/converter/SaveBinary.cpp index c8dd6c07..d4cde817 100644 --- a/src/substrait/textplan/converter/SaveBinary.cpp +++ b/src/substrait/textplan/converter/SaveBinary.cpp @@ -78,11 +78,18 @@ absl::Status savePlanToText( if (!errors.empty()) { return absl::UnknownError(absl::StrJoin(errors, "")); } - stream << SymbolTablePrinter::outputToText(result.getSymbolTable()); + SubstraitErrorListener errorListener; + stream << SymbolTablePrinter::outputToText( + result.getSymbolTable(), &errorListener); stream.close(); if (stream.fail()) { return absl::UnknownError("Failed to write the plan as text."); } + if (!errorListener.getErrorMessages().empty()) { + return absl::UnknownError(fmt::format( + "Errors while writing to text: {}", + absl::StrJoin(errorListener.getErrorMessages(), "\n"))); + } return absl::OkStatus(); } diff --git a/src/substrait/textplan/converter/Tool.cpp b/src/substrait/textplan/converter/Tool.cpp index bdcc707a..8c22bb24 100644 --- a/src/substrait/textplan/converter/Tool.cpp +++ b/src/substrait/textplan/converter/Tool.cpp @@ -22,13 +22,17 @@ void convertPlanToText(const char* filename) { } auto result = parseBinaryPlan(*planOrError); + SubstraitErrorListener errorListener; + auto textResult = SymbolTablePrinter::outputToText(result.getSymbolTable(), + &errorListener); + result.addErrors(errorListener.getErrorMessages()); auto errors = result.getAllErrors(); if (!errors.empty()) { for (const auto& err : errors) { std::cerr << err << std::endl; } } - std::cout << SymbolTablePrinter::outputToText(result.getSymbolTable()); + std::cout << textResult; } } // namespace diff --git a/src/substrait/textplan/data/tpch-plan02.json b/src/substrait/textplan/data/tpch-plan02.json new file mode 100644 index 00000000..85b44c47 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan02.json @@ -0,0 +1,1333 @@ +# select s.s_acctbal, s.s_name, n.n_name, p.p_partkey, p.p_mfgr, s.s_address, s.s_phone, s.s_comment from "part" p, "supplier" s, "partsupp" ps, "nation" n, "region" r where p.p_partkey = ps.ps_partkey and s.s_suppkey = ps.ps_suppkey and p.p_size = 41 and p.p_type like '%NICKEL' and s.s_nationkey = n.n_nationkey and n.n_regionkey = r.r_regionkey and r.r_name = 'EUROPE' and ps.ps_supplycost = ( select min(ps.ps_supplycost) from "partsupp" ps, "supplier" s, "nation" n, "region" r where p.p_partkey = ps.ps_partkey and s.s_suppkey = ps.ps_suppkey and s.s_nationkey = n.n_nationkey and n.n_regionkey = r.r_regionkey and r.r_name = 'EUROPE' ) order by s.s_acctbal desc, n.n_name, s.s_name, p.p_partkey limit 100 +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "like:vchar_vchar" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 3, + "name": "min:dec" + } + }], + "relations": [{ + "root": { + "input": { + "fetch": { + "common": { + "direct": { + } + }, + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [28, 29, 30, 31, 32, 33, 34, 35] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["P_PARTKEY", "P_NAME", "P_MFGR", "P_BRAND", "P_TYPE", "P_SIZE", "P_CONTAINER", "P_RETAILPRICE", "P_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 23, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PART"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 199, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PARTSUPP"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["R_REGIONKEY", "R_NAME", "R_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["REGION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 41, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "%NICKEL", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 23 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 25 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 26 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "EUROPE", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 19 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "subquery": { + "scalar": { + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [19] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 199, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PARTSUPP"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["R_REGIONKEY", "R_NAME", "R_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["REGION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 14 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "EUROPE", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 3, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + } + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 14 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 11 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 13 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 15 + } + }, + "rootReference": { + } + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "offset": "0", + "count": "100" + } + }, + "names": ["S_ACCTBAL", "S_NAME", "N_NAME", "P_PARTKEY", "P_MFGR", "S_ADDRESS", "S_PHONE", "S_COMMENT"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan04.json b/src/substrait/textplan/data/tpch-plan04.json new file mode 100644 index 00000000..28288187 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan04.json @@ -0,0 +1,548 @@ +# select o.o_orderpriority, count(*) as order_count from "orders" o where o.o_orderdate >= date '1996-10-01' and o.o_orderdate < date '1996-10-01' + interval '3' month and exists ( select smoke.sh tpch_smoke.sh from "lineitem" l where l.l_orderkey = o.o_orderkey and l.l_commitdate < l.l_receiptdate ) group by o.o_orderpriority order by o.o_orderpriority +{ + "extensionUris": [{ + "extensionUriAnchor": 4, + "uri": "/functions_aggregate_generic.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "gte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 2, + "name": "lt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 3, + "name": "add:date_year" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "count:any" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [9] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 9770, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "date": 9770, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "intervalYearToMonth": { + "years": 0, + "months": 3 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "subquery": { + "setPredicate": { + "predicateOp": "PREDICATE_OP_EXISTS", + "tuples": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 11 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + } + } + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "names": ["O_ORDERPRIORITY", "ORDER_COUNT"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan11.json b/src/substrait/textplan/data/tpch-plan11.json new file mode 100644 index 00000000..7944238d --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan11.json @@ -0,0 +1,1012 @@ +# select ps.ps_partkey, sum(ps.ps_supplycost smoke.sh tpch_smoke.sh ps.ps_availqty) as "value" from "partsupp" ps, "supplier" s, "nation" n where ps.ps_suppkey = s.s_suppkey and s.s_nationkey = n.n_nationkey and n.n_name = 'JAPAN' group by ps.ps_partkey having sum(ps.ps_supplycost smoke.sh tpch_smoke.sh ps.ps_availqty) > ( select sum(ps.ps_supplycost smoke.sh tpch_smoke.sh ps.ps_availqty) smoke.sh tpch_smoke.sh 0.0001000000 from "partsupp" ps, "supplier" s, "nation" n where ps.ps_suppkey = s.s_suppkey and s.s_nationkey = n.n_nationkey and n.n_name = 'JAPAN' ) order by "value" desc +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "sum:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 4, + "name": "gt:any_any" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16, 17] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 199, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PARTSUPP"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 13 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "JAPAN", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 3, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "condition": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "subquery": { + "scalar": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [1] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 199, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PARTSUPP"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 13 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "JAPAN", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 3, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 10, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "decimal": { + "value": "QEIPAAAAAAAAAAAAAAAAAA==", + "precision": 11, + "scale": 10 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + }] + } + } + } + } + } + }], + "options": [] + } + } + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }] + } + }, + "names": ["PS_PARTKEY", "value"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan16.json b/src/substrait/textplan/data/tpch-plan16.json new file mode 100644 index 00000000..c59d71a9 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan16.json @@ -0,0 +1,1016 @@ +# select p.p_brand, p.p_type, p.p_size, count(distinct ps.ps_suppkey) as supplier_cnt from "partsupp" ps, "part" p where p.p_partkey = ps.ps_partkey and p.p_brand <> 'Brand#21' and p.p_type not like 'MEDIUM PLATED%' and p.p_size in (38, 2, 8, 31, 44, 5, 14, 24) and ps.ps_suppkey not in ( select s.s_suppkey from "supplier" s where s.s_comment like '%Customer%Complaints%' ) group by p.p_brand, p.p_type, p.p_size order by supplier_cnt desc, p.p_brand, p.p_type, p.p_size +{ + "extensionUris": [{ + "extensionUriAnchor": 4, + "uri": "/functions_aggregate_generic.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 2, + "name": "not_equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 3, + "name": "not:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "like:vchar_vchar" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 5, + "name": "or:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 6, + "name": "count:any" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [14, 15, 16, 17] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 199, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PARTSUPP"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["P_PARTKEY", "P_NAME", "P_MFGR", "P_BRAND", "P_TYPE", "P_SIZE", "P_CONTAINER", "P_RETAILPRICE", "P_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 23, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PART"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "Brand#21", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "MEDIUM PLATED%", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 38, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 8, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 31, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 44, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 5, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 14, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 24, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "subquery": { + "inPredicate": { + "needles": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }], + "haystack": { + "project": { + "common": { + "emit": { + "outputMapping": [7] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "%Customer%Complaints%", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + } + } + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_DISTINCT", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "names": ["P_BRAND", "P_TYPE", "P_SIZE", "SUPPLIER_CNT"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan17.json b/src/substrait/textplan/data/tpch-plan17.json new file mode 100644 index 00000000..d178f018 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan17.json @@ -0,0 +1,824 @@ +# select sum(l.l_extendedprice) / 7.0 as avg_yearly from "lineitem" l, "part" p where p.p_partkey = l.l_partkey and p.p_brand = 'Brand#13' and p.p_container = 'JUMBO CAN' and l.l_quantity < ( select 0.2 smoke.sh tpch_smoke.sh avg(l2.l_quantity) from "lineitem" l2 where l2.l_partkey = p.p_partkey ) +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 2, + "name": "lt:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "avg:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 4, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 5, + "name": "sum:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 6, + "name": "divide:dec_dec" + } + }], + "relations": [{ + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [1] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [25] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["P_PARTKEY", "P_NAME", "P_MFGR", "P_BRAND", "P_TYPE", "P_SIZE", "P_CONTAINER", "P_RETAILPRICE", "P_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 23, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PART"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 19 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "Brand#13", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 22 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "JUMBO CAN", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "subquery": { + "scalar": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [1] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 16 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 3, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "decimal": { + "scale": 1, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "literal": { + "decimal": { + "value": "AgAAAAAAAAAAAAAAAAAAAA==", + "precision": 2, + "scale": 1 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + } + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "decimal": { + "value": "RgAAAAAAAAAAAAAAAAAAAA==", + "precision": 2, + "scale": 1 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + }] + } + }, + "names": ["AVG_YEARLY"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan18.json b/src/substrait/textplan/data/tpch-plan18.json new file mode 100644 index 00000000..cee5c998 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan18.json @@ -0,0 +1,913 @@ +# select c.c_name, c.c_custkey, o.o_orderkey, o.o_orderdate, o.o_totalprice, sum(l.l_quantity) from "customer" c, "orders" o, "lineitem" l where o.o_orderkey in ( select l_orderkey from "lineitem" group by l_orderkey having sum(l_quantity) > 300 ) and c.c_custkey = o.o_custkey and o.o_orderkey = l.l_orderkey group by c.c_name, c.c_custkey, o.o_orderkey, o.o_orderdate, o.o_totalprice order by o.o_totalprice desc, o.o_orderdate limit 100 +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "sum:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "gt:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "equal:any_any" + } + }], + "relations": [{ + "root": { + "input": { + "fetch": { + "common": { + "direct": { + } + }, + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [33, 34, 35, 36, 37, 38] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 117, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["CUSTOMER"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "subquery": { + "inPredicate": { + "needles": [{ + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + }], + "haystack": { + "project": { + "common": { + "emit": { + "outputMapping": [2] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16, 17] + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 1, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "condition": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 300, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + } + } + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 17 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 11 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 21 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 1, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "offset": "0", + "count": "100" + } + }, + "names": ["C_NAME", "C_CUSTKEY", "O_ORDERKEY", "O_ORDERDATE", "O_TOTALPRICE", "EXPR$5"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan20.json b/src/substrait/textplan/data/tpch-plan20.json new file mode 100644 index 00000000..15b37eb4 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan20.json @@ -0,0 +1,1110 @@ +# select s.s_name, s.s_address from "supplier" s, "nation" n where s.s_suppkey in ( select ps.ps_suppkey from "partsupp" ps where ps. ps_partkey in ( select p.p_partkey from "part" p where p.p_name like 'antique%' ) and ps.ps_availqty > ( select 0.5 smoke.sh tpch_smoke.sh sum(l.l_quantity) from "lineitem" l where l.l_partkey = ps.ps_partkey and l.l_suppkey = ps.ps_suppkey and l.l_shipdate >= date '1993-01-01' and l.l_shipdate < date '1993-01-01' + interval '1' year ) ) and s.s_nationkey = n.n_nationkey and n.n_name = 'KENYA' order by s.s_name +{ + "extensionUris": [{ + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 5, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "like:vchar_vchar" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "gt:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 4, + "name": "gte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "lt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 6, + "name": "add:date_year" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 7, + "name": "sum:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 8, + "name": "multiply:dec_dec" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [11, 12] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "subquery": { + "inPredicate": { + "needles": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }], + "haystack": { + "project": { + "common": { + "emit": { + "outputMapping": [5] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["PS_PARTKEY", "PS_SUPPKEY", "PS_AVAILQTY", "PS_SUPPLYCOST", "PS_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 199, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PARTSUPP"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "subquery": { + "inPredicate": { + "needles": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }], + "haystack": { + "project": { + "common": { + "emit": { + "outputMapping": [9] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["P_PARTKEY", "P_NAME", "P_MFGR", "P_BRAND", "P_TYPE", "P_SIZE", "P_CONTAINER", "P_RETAILPRICE", "P_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 23, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["PART"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 55, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "antique%", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + } + } + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 1, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "subquery": { + "scalar": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [1] + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "date": 8401, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 5, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "date": 8401, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "intervalYearToMonth": { + "years": 1, + "months": 0 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 7, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 8, + "args": [], + "outputType": { + "decimal": { + "scale": 1, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "literal": { + "decimal": { + "value": "BQAAAAAAAAAAAAAAAAAAAA==", + "precision": 2, + "scale": 1 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + } + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + } + } + } + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "KENYA", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "names": ["S_NAME", "S_ADDRESS"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan21.json b/src/substrait/textplan/data/tpch-plan21.json new file mode 100644 index 00000000..2b89f1f4 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan21.json @@ -0,0 +1,1227 @@ +# select s.s_name, count(*) as numwait from "supplier" s, "lineitem" l1, "orders" o, "nation" n where s.s_suppkey = l1.l_suppkey and o.o_orderkey = l1.l_orderkey and o.o_orderstatus = 'F' and l1.l_receiptdate > l1.l_commitdate and exists ( select smoke.sh tpch_smoke.sh from "lineitem" l2 where l2.l_orderkey = l1.l_orderkey and l2.l_suppkey <> l1.l_suppkey ) and not exists ( select smoke.sh tpch_smoke.sh from "lineitem" l3 where l3.l_orderkey = l1.l_orderkey and l3.l_suppkey <> l1.l_suppkey and l3.l_receiptdate > l3.l_commitdate ) and s.s_nationkey = n.n_nationkey and n.n_name = 'BRAZIL' group by s.s_name order by numwait desc, s.s_name limit 100 +{ + "extensionUris": [{ + "extensionUriAnchor": 4, + "uri": "/functions_aggregate_generic.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_datetime.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 1, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 2, + "name": "gt:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 3, + "name": "not_equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 4, + "name": "not:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "count:any" + } + }], + "relations": [{ + "root": { + "input": { + "fetch": { + "common": { + "direct": { + } + }, + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [36] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "cross": { + "common": { + "direct": { + } + }, + "left": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_NATIONKEY", "S_PHONE", "S_ACCTBAL", "S_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 101, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["SUPPLIER"] + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + } + } + }, + "right": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["N_NATIONKEY", "N_NAME", "N_REGIONKEY", "N_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 152, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["NATION"] + } + } + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 23 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 25 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "fixedChar": "F", + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 19 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 18 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "subquery": { + "setPredicate": { + "predicateOp": "PREDICATE_OP_EXISTS", + "tuples": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + } + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "subquery": { + "setPredicate": { + "predicateOp": "PREDICATE_OP_EXISTS", + "tuples": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 12 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 11 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + } + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 32 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 33 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "cast": { + "type": { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "input": { + "literal": { + "fixedChar": "BRAZIL", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_DESC_NULLS_FIRST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "offset": "0", + "count": "100" + } + }, + "names": ["S_NAME", "NUMWAIT"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/data/tpch-plan22.json b/src/substrait/textplan/data/tpch-plan22.json new file mode 100644 index 00000000..2908ad92 --- /dev/null +++ b/src/substrait/textplan/data/tpch-plan22.json @@ -0,0 +1,1819 @@ +# select cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal from ( select substring(c_phone from 1 for 2) as cntrycode, c_acctbal from "customer" c where substring(c_phone from 1 for 2) in ('24', '31', '11', '16', '21', '20', '34') and c_acctbal > ( select avg(c_acctbal) from "customer" where c_acctbal > 0.00 and substring(c_phone from 1 for 2) in ('24', '31', '11', '16', '21', '20', '34') ) and not exists ( select smoke.sh tpch_smoke.sh from "orders" o where o.o_custkey = c.c_custkey ) ) as custsale group by cntrycode order by cntrycode +{ + "extensionUris": [{ + "extensionUriAnchor": 5, + "uri": "/functions_aggregate_generic.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_boolean.yaml" + }, { + "extensionUriAnchor": 3, + "uri": "/functions_string.yaml" + }, { + "extensionUriAnchor": 4, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_comparison.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "and:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 1, + "name": "or:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 2, + "name": "equal:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 3, + "name": "substring:fchar_i32_i32" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 4, + "name": "gt:any_any" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 5, + "name": "avg:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 6, + "name": "not:bool" + } + }, { + "extensionFunction": { + "extensionUriReference": 5, + "functionAnchor": 7, + "name": "count:any" + } + }, { + "extensionFunction": { + "extensionUriReference": 4, + "functionAnchor": 8, + "name": "sum:dec" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [8, 9] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 117, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["CUSTOMER"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "24", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "31", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "11", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "16", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "21", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "20", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "34", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "subquery": { + "scalar": { + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [8] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "varchar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 40, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 117, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["CUSTOMER"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "decimal": { + "value": "AAAAAAAAAAAAAAAAAAAAAA==", + "precision": 3, + "scale": 2 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "24", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "31", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "11", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "16", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "21", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "20", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }, { + "value": { + "cast": { + "type": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "fixedChar": "34", + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [] + }], + "measures": [{ + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + } + } + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 6, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "subquery": { + "setPredicate": { + "predicateOp": "PREDICATE_OP_EXISTS", + "tuples": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 79, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["ORDERS"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "outerReference": { + "stepsOut": 1 + } + } + } + }], + "options": [] + } + } + } + } + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "varchar": { + "length": 15, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "i32": 2, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 7, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [], + "options": [] + } + }, { + "measure": { + "functionReference": 8, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "names": ["CNTRYCODE", "NUMCUST", "TOTACCTBAL"] + } + }], + "expectedTypeUrls": [] +} diff --git a/src/substrait/textplan/parser/CMakeLists.txt b/src/substrait/textplan/parser/CMakeLists.txt index fcc01413..36e24027 100644 --- a/src/substrait/textplan/parser/CMakeLists.txt +++ b/src/substrait/textplan/parser/CMakeLists.txt @@ -12,6 +12,8 @@ add_library( SubstraitPlanPipelineVisitor.h SubstraitPlanRelationVisitor.cpp SubstraitPlanRelationVisitor.h + SubstraitPlanSubqueryRelationVisitor.cpp + SubstraitPlanSubqueryRelationVisitor.h SubstraitPlanTypeVisitor.cpp SubstraitPlanTypeVisitor.h LoadText.cpp diff --git a/src/substrait/textplan/parser/LoadText.cpp b/src/substrait/textplan/parser/LoadText.cpp index c76a7b31..44f3f5bc 100644 --- a/src/substrait/textplan/parser/LoadText.cpp +++ b/src/substrait/textplan/parser/LoadText.cpp @@ -13,7 +13,7 @@ namespace io::substrait::textplan { absl::StatusOr<::substrait::proto::Plan> loadFromText(const std::string& text) { auto stream = loadTextString(text); - auto parseResult = io::substrait::textplan::parseStream(stream); + auto parseResult = parseStream(&stream); if (!parseResult.successful()) { auto errors = parseResult.getAllErrors(); return absl::UnknownError(absl::StrJoin(errors, "")); diff --git a/src/substrait/textplan/parser/ParseText.cpp b/src/substrait/textplan/parser/ParseText.cpp index a4ba32fc..7367125c 100644 --- a/src/substrait/textplan/parser/ParseText.cpp +++ b/src/substrait/textplan/parser/ParseText.cpp @@ -14,6 +14,7 @@ #include "substrait/textplan/parser/SubstraitParserErrorListener.h" #include "substrait/textplan/parser/SubstraitPlanPipelineVisitor.h" #include "substrait/textplan/parser/SubstraitPlanRelationVisitor.h" +#include "substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.h" #include "substrait/textplan/parser/SubstraitPlanVisitor.h" namespace io::substrait::textplan { @@ -36,10 +37,10 @@ antlr4::ANTLRInputStream loadTextString(std::string_view text) { return {text}; } -ParseResult parseStream(antlr4::ANTLRInputStream stream) { - io::substrait::textplan::SubstraitParserErrorListener errorListener; +ParseResult parseStream(antlr4::ANTLRInputStream* stream) { + SubstraitParserErrorListener errorListener; - SubstraitPlanLexer lexer(&stream); + SubstraitPlanLexer lexer(stream); lexer.removeErrorListeners(); lexer.addErrorListener(&errorListener); antlr4::CommonTokenStream tokens(&lexer); @@ -57,6 +58,15 @@ ParseResult parseStream(antlr4::ANTLRInputStream stream) { visitorSymbolTable, visitorErrorListener); try { visitor->visitPlan(tree); + } catch (std::invalid_argument& ex) { + // Catches the any_cast exception and logs a useful error message. + errorListener.syntaxError( + &parser, + nullptr, + /*line=*/1, + /*charPositionInLine=*/1, + ex.what(), + std::current_exception()); } catch (...) { errorListener.syntaxError( &parser, @@ -71,7 +81,7 @@ ParseResult parseStream(antlr4::ANTLRInputStream stream) { *visitor->getSymbolTable(), visitor->getErrorListener()); try { pipelineVisitor->visitPlan(tree); - } catch (std::invalid_argument ex) { + } catch (std::invalid_argument& ex) { // Catches the any_cast exception and logs a useful error message. errorListener.syntaxError( &parser, @@ -94,7 +104,40 @@ ParseResult parseStream(antlr4::ANTLRInputStream stream) { *pipelineVisitor->getSymbolTable(), pipelineVisitor->getErrorListener()); try { relationVisitor->visitPlan(tree); - } catch (std::invalid_argument ex) { + } catch (std::invalid_argument& ex) { + // Catches the any_cast exception and logs a useful error message. + errorListener.syntaxError( + &parser, + nullptr, + /*line=*/1, + /*charPositionInLine=*/1, + ex.what(), + std::current_exception()); + } catch (...) { + errorListener.syntaxError( + &parser, + nullptr, + /*line=*/1, + /*charPositionInLine=*/1, + "uncaught parser relation exception encountered", + std::current_exception()); + } + + if (relationVisitor->getErrorListener()->hasErrors()) { + // We have enough errors that proceeding to the final step isn't useful. + return { + *relationVisitor->getSymbolTable(), + errorListener.getErrorMessages(), + relationVisitor->getErrorListener()->getErrorMessages()}; + } + + auto subQueryRelationVisitor = + std::make_shared( + *relationVisitor->getSymbolTable(), + relationVisitor->getErrorListener()); + try { + subQueryRelationVisitor->visitPlan(tree); + } catch (std::invalid_argument& ex) { // Catches the any_cast exception and logs a useful error message. errorListener.syntaxError( &parser, @@ -113,11 +156,11 @@ ParseResult parseStream(antlr4::ANTLRInputStream stream) { std::current_exception()); } - auto finalSymbolTable = relationVisitor->getSymbolTable(); + auto finalSymbolTable = subQueryRelationVisitor->getSymbolTable(); return { *finalSymbolTable, errorListener.getErrorMessages(), - relationVisitor->getErrorListener()->getErrorMessages()}; + subQueryRelationVisitor->getErrorListener()->getErrorMessages()}; } } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/ParseText.h b/src/substrait/textplan/parser/ParseText.h index caaf7b3b..b9e0dc1e 100644 --- a/src/substrait/textplan/parser/ParseText.h +++ b/src/substrait/textplan/parser/ParseText.h @@ -17,6 +17,6 @@ namespace io::substrait::textplan { std::optional loadTextFile(std::string_view filename); antlr4::ANTLRInputStream loadTextString(std::string_view text); -ParseResult parseStream(antlr4::ANTLRInputStream stream); +ParseResult parseStream(antlr4::ANTLRInputStream* stream); } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/README.md b/src/substrait/textplan/parser/README.md new file mode 100644 index 00000000..8748c709 --- /dev/null +++ b/src/substrait/textplan/parser/README.md @@ -0,0 +1,19 @@ +# Parser Phases + +There are four phases that the parser goes through to parse a textplan into its +internal representation (stored inside a ```SymbolTable`````). + +* ```SubstraitPlanVisitor``` + * Finds the relations, processes functions, data files, and schemas. +* ```SubstraitPlanPipelineVisitor``` + * Stores the connections between the relations as determined by the pipelines + section. +* ```SubstraitPlanRelationVisitor``` + * +* ```SubstraitPlanSubqueryRelationVisitor``` + * Determines the input fields for subqueries. + +```SubstraitPlanTypeVisitor``` is a common visitor used by all but +```SubstraitPlanPipelineVisitor``` to do type lookups. + +The phase order is implemented in ```ParserText.cpp```. diff --git a/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp index 5e0e1806..25de3151 100644 --- a/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.cpp @@ -6,6 +6,7 @@ #include "SubstraitPlanParser/SubstraitPlanParser.h" #include "substrait/textplan/Any.h" +#include "substrait/textplan/Finally.h" #include "substrait/textplan/Location.h" #include "substrait/textplan/StructuredSymbolData.h" #include "substrait/textplan/SymbolTable.h" @@ -47,7 +48,7 @@ void SubstraitPlanPipelineVisitor::updateRelationSymbol( } else { // Update the location on this symbol, so we can find it in its canonical // location. - symbolTable_->updateLocation(*symbol, Location(ctx)); + symbolTable_->addPermanentLocation(*symbol, Location(ctx)); } } @@ -146,4 +147,105 @@ std::any SubstraitPlanPipelineVisitor::visitPipeline( return result; } +std::any SubstraitPlanPipelineVisitor::visitRelation( + SubstraitPlanParser::RelationContext* ctx) { + // Mark the current scope for any operations within this relation. + auto previousScope = currentRelationScope_; + auto previousScopeLocation = currentRelationScopeLocation_; + auto resetCurrentScopeLocation = finally([&]() { + currentRelationScope_ = previousScope; + currentRelationScopeLocation_ = previousScopeLocation; + }); + auto* symbol = symbolTable_->lookupSymbolByLocationAndType( + Location(ctx), SymbolType::kRelation); + currentRelationScope_ = symbol; + if (symbol == nullptr) { + errorListener_->addError( + ctx->getStart(), + "Internal error: Previously encountered symbol went missing."); + return defaultResult(); + } + currentRelationScopeLocation_ = Location(ctx); + + return SubstraitPlanParserBaseVisitor::visitRelation(ctx); +} + +std::any SubstraitPlanPipelineVisitor::visitExpressionScalarSubquery( + SubstraitPlanParser::ExpressionScalarSubqueryContext* ctx) { + const auto* symbol = + symbolTable_->lookupSymbolByName(ctx->relation_ref()->id(0)->getText()); + if (symbol == nullptr) { + errorListener_->addError( + ctx->relation_ref()->getStart(), + "Internal error: Previously encountered symbol went missing."); + return defaultResult(); + } + auto relationData = + ANY_CAST(std::shared_ptr, currentRelationScope_->blob); + relationData->subQueryPipelines.push_back(symbol); + symbolTable_->setParentQueryLocation(*symbol, currentRelationScopeLocation_); + return SubstraitPlanParserBaseVisitor::visitExpressionScalarSubquery(ctx); +} + +std::any SubstraitPlanPipelineVisitor::visitExpressionInPredicateSubquery( + SubstraitPlanParser::ExpressionInPredicateSubqueryContext* ctx) { + const auto* symbol = + symbolTable_->lookupSymbolByName(ctx->relation_ref()->id(0)->getText()); + if (symbol == nullptr) { + errorListener_->addError( + ctx->relation_ref()->id(0)->getStart(), + "Internal error: Previously encountered symbol went missing."); + return defaultResult(); + } + auto relationData = + ANY_CAST(std::shared_ptr, currentRelationScope_->blob); + relationData->subQueryPipelines.push_back(symbol); + symbolTable_->setParentQueryLocation(*symbol, currentRelationScopeLocation_); + return SubstraitPlanParserBaseVisitor::visitExpressionInPredicateSubquery( + ctx); +} + +std::any SubstraitPlanPipelineVisitor::visitExpressionSetPredicateSubquery( + SubstraitPlanParser::ExpressionSetPredicateSubqueryContext* ctx) { + bool encounteredError = false; + for (auto ref : ctx->relation_ref()) { + const auto* symbol = + symbolTable_->lookupSymbolByName(ref->id(0)->getText()); + if (symbol == nullptr) { + errorListener_->addError( + ref->id(0)->getStart(), + "Internal error: Previously encountered symbol went missing."); + encounteredError = true; + } + auto relationData = + ANY_CAST(std::shared_ptr, currentRelationScope_->blob); + relationData->subQueryPipelines.push_back(symbol); + symbolTable_->setParentQueryLocation( + *symbol, currentRelationScopeLocation_); + } + if (encounteredError) { + return defaultResult(); + } + return SubstraitPlanParserBaseVisitor::visitExpressionSetPredicateSubquery( + ctx); +} + +std::any SubstraitPlanPipelineVisitor::visitExpressionSetComparisonSubquery( + SubstraitPlanParser::ExpressionSetComparisonSubqueryContext* ctx) { + const auto* symbol = + symbolTable_->lookupSymbolByName(ctx->relation_ref()->id(0)->getText()); + if (symbol == nullptr) { + errorListener_->addError( + ctx->relation_ref()->id(0)->getStart(), + "Internal error: Previously encountered symbol went missing."); + return defaultResult(); + } + auto relationData = + ANY_CAST(std::shared_ptr, currentRelationScope_->blob); + relationData->subQueryPipelines.push_back(symbol); + symbolTable_->setParentQueryLocation(*symbol, currentRelationScopeLocation_); + return SubstraitPlanParserBaseVisitor::visitExpressionSetComparisonSubquery( + ctx); +} + } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.h b/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.h index 596d4e93..f7273153 100644 --- a/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.h +++ b/src/substrait/textplan/parser/SubstraitPlanPipelineVisitor.h @@ -30,6 +30,17 @@ class SubstraitPlanPipelineVisitor : public SubstraitPlanParserBaseVisitor { std::any visitPipelines(SubstraitPlanParser::PipelinesContext* ctx) override; std::any visitPipeline(SubstraitPlanParser::PipelineContext* ctx) override; + std::any visitRelation(SubstraitPlanParser::RelationContext* ctx) override; + std::any visitExpressionScalarSubquery( + SubstraitPlanParser::ExpressionScalarSubqueryContext* ctx) override; + std::any visitExpressionInPredicateSubquery( + SubstraitPlanParser::ExpressionInPredicateSubqueryContext* ctx) override; + std::any visitExpressionSetPredicateSubquery( + SubstraitPlanParser::ExpressionSetPredicateSubqueryContext* ctx) override; + std::any visitExpressionSetComparisonSubquery( + SubstraitPlanParser::ExpressionSetComparisonSubqueryContext* ctx) + override; + private: // Creates a symbol table entry if we don't already have one, then adds the // current location. @@ -39,6 +50,9 @@ class SubstraitPlanPipelineVisitor : public SubstraitPlanParserBaseVisitor { std::shared_ptr symbolTable_; std::shared_ptr errorListener_; + + const SymbolInfo* currentRelationScope_{nullptr}; + Location currentRelationScopeLocation_{Location::kUnknownLocation}; }; } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index 4a6d5567..d7aeb10c 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -345,7 +345,8 @@ std::any SubstraitPlanRelationVisitor::visitRelation( setRelationType(relationType, &relation); relationData->relation = relation; - symbolTable_->updateLocation(*symbol, PROTO_LOCATION(relationData->relation)); + symbolTable_->addPermanentLocation( + *symbol, PROTO_LOCATION(relationData->relation)); // Mark the current scope for any operations within this relation. auto previousScope = currentRelationScope_; @@ -364,28 +365,31 @@ std::any SubstraitPlanRelationVisitor::visitRelation( addExpressionsToSchema(relationData); - // Now visit the emit details. - for (auto detail : ctx->relation_detail()) { - if (isRelationEmitDetail(detail)) { - visitRelationDetail(detail); + // We will handle subqueries in the next phase. + if (!isWithinSubquery(ctx)) { + // Now visit the emit details. + for (auto detail : ctx->relation_detail()) { + if (isRelationEmitDetail(detail)) { + visitRelationDetail(detail); + } } - } - // Aggregate relations are different in that they alter the emitted fields - // by default. - if (relationType == RelationType::kAggregate) { - relationData->outputFieldReferences.insert( - relationData->outputFieldReferences.end(), - relationData->generatedFieldReferences.begin(), - relationData->generatedFieldReferences.end()); - } + // Aggregate relations are different in that they alter the emitted fields + // by default. + if (relationType == RelationType::kAggregate) { + relationData->outputFieldReferences.insert( + relationData->outputFieldReferences.end(), + relationData->generatedFieldReferences.begin(), + relationData->generatedFieldReferences.end()); + } - applyOutputMappingToSchema(ctx->getStart(), relationType, relationData); + applyOutputMappingToSchema(ctx->getStart(), relationType, relationData); - // Emit one empty grouping for an aggregation relation not specifying any. - if (relationType == RelationType::kAggregate && - relationData->relation.aggregate().groupings_size() == 0) { - relationData->relation.mutable_aggregate()->add_groupings(); + // Emit one empty grouping for an aggregation relation not specifying any. + if (relationType == RelationType::kAggregate && + relationData->relation.aggregate().groupings_size() == 0) { + relationData->relation.mutable_aggregate()->add_groupings(); + } } return defaultResult(); } @@ -549,7 +553,8 @@ std::any SubstraitPlanRelationVisitor::visitRelationFilter( } if (result.type() != typeid(::substrait::proto::Expression)) { errorListener_->addError( - ctx->getStart(), "Could not parse as an expression."); + ctx->getStart(), + "Could not parse as an expression (pass 1, spot 1)."); return defaultResult(); } *parentRelationData->relation.mutable_filter()->mutable_condition() = @@ -588,7 +593,7 @@ std::any SubstraitPlanRelationVisitor::visitRelationUsesSchema( if (sym.type != SymbolType::kSchemaColumn) { continue; } - if (sym.location != symbol->location) { + if (sym.sourceLocation != symbol->sourceLocation) { continue; } parentRelationData->outputFieldReferences.push_back(&sym); @@ -617,7 +622,7 @@ std::any SubstraitPlanRelationVisitor::visitRelationExpression( SymbolType::kRelation); auto parentRelationData = ANY_CAST(std::shared_ptr, parentSymbol->blob); - auto result = visitChildren(ctx); + auto result = visitExpression(ctx->expression()); auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); switch (parentRelationType) { case RelationType::kJoin: @@ -631,10 +636,20 @@ std::any SubstraitPlanRelationVisitor::visitRelationExpression( *parentRelationData->relation.mutable_join()->mutable_expression() = ANY_CAST(::substrait::proto::Expression, result); break; - case RelationType::kProject: + case RelationType::kProject: { *parentRelationData->relation.mutable_project()->add_expressions() = ANY_CAST(::substrait::proto::Expression, result); + std::string name; + if (ctx->id() != nullptr) { + name = ctx->id()->getText(); + } else { + name = symbolTable_->getUniqueName(kIntermediateNodeName); + } + parentRelationData->generatedFieldReferenceAliases + [parentRelationData->relation.project().expressions_size() - 1] = + name; break; + } default: errorListener_->addError( ctx->getStart(), @@ -813,7 +828,14 @@ std::any SubstraitPlanRelationVisitor::visitRelationEmit( ctx->getStart(), "Emits do not make sense for this kind of relation."); return defaultResult(); } - common->mutable_emit()->add_output_mapping(ANY_CAST(int32_t, result)); + typedef std::pair intPair; + auto [stepsOut, fieldReference] = ANY_CAST(intPair, result); + if (stepsOut > 0) { + errorListener_->addError( + ctx->getStart(), "Emitting outer references is not supported."); + return defaultResult(); + } + common->mutable_emit()->add_output_mapping(fieldReference); return defaultResult(); } @@ -962,7 +984,7 @@ std::any SubstraitPlanRelationVisitor::visitRelationSourceReference( if (sym.type != SymbolType::kSourceDetail) { continue; } - if (sym.location != symbol->location) { + if (sym.sourceLocation != symbol->sourceLocation) { continue; } *source->add_items() = *ANY_CAST( @@ -975,7 +997,7 @@ std::any SubstraitPlanRelationVisitor::visitRelationSourceReference( auto* source = parentRelationData->relation.mutable_read()->mutable_named_table(); for (const auto& sym : - symbolTable_->lookupSymbolsByLocation(symbol->location)) { + symbolTable_->lookupSymbolsByLocation(symbol->sourceLocation)) { if (sym->type != SymbolType::kSourceDetail) { continue; } @@ -1061,7 +1083,26 @@ std::any SubstraitPlanRelationVisitor::visitExpression( auto* castCtx = dynamic_cast(ctx)) { return visitExpressionCast(castCtx); + } else if ( + auto* scalarSubqueryCtx = + dynamic_cast( + ctx)) { + return visitExpressionScalarSubquery(scalarSubqueryCtx); + } else if ( + auto* inSubqueryCtx = dynamic_cast< + SubstraitPlanParser::ExpressionInPredicateSubqueryContext*>(ctx)) { + return visitExpressionInPredicateSubquery(inSubqueryCtx); + } else if ( + auto* setSubqueryCtx = dynamic_cast< + SubstraitPlanParser::ExpressionSetPredicateSubqueryContext*>(ctx)) { + return visitExpressionSetPredicateSubquery(setSubqueryCtx); + } else if ( + auto* setComparisonCtx = dynamic_cast< + SubstraitPlanParser::ExpressionSetComparisonSubqueryContext*>(ctx)) { + return visitExpressionSetComparisonSubquery(setComparisonCtx); } + errorListener_->addError( + ctx->getStart(), "Internal error: unsupported expression type."); return defaultResult(); } @@ -1117,14 +1158,23 @@ std::any SubstraitPlanRelationVisitor::visitExpressionFunctionUse( continue; } - auto result = visitExpression(exp); - if (result.type() != typeid(::substrait::proto::Expression)) { - errorListener_->addError( - ctx->id()->getStart(), "Could not parse as an expression."); - return expr; + if (hasSubquery(exp)) { + // Not ready to look at this function just yet. + ::substrait::proto::Expression newExpr; + *expr.mutable_scalar_function()->add_arguments()->mutable_value() = + newExpr; + } else { + auto result = visitExpression(exp); + if (result.type() != typeid(::substrait::proto::Expression)) { + errorListener_->addError( + ctx->id()->getStart(), + "Could not parse as an expression (pass 1, spot 2)."); + return expr; + } + auto newExpr = ANY_CAST(::substrait::proto::Expression, result); + *expr.mutable_scalar_function()->add_arguments()->mutable_value() = + newExpr; } - auto newExpr = ANY_CAST(::substrait::proto::Expression, result); - *expr.mutable_scalar_function()->add_arguments()->mutable_value() = newExpr; } if (ctx->literal_complex_type() != nullptr) { auto literalType = ANY_CAST( @@ -1143,14 +1193,33 @@ std::any SubstraitPlanRelationVisitor::visitExpressionConstant( return expr; } +std::any SubstraitPlanRelationVisitor::visitExpressionCast( + SubstraitPlanParser::ExpressionCastContext* ctx) { + ::substrait::proto::Expression expr; + auto origExpression = ANY_CAST( + ::substrait::proto::Expression, visitExpression(ctx->expression())); + auto literalType = ANY_CAST( + ::substrait::proto::Type, + visitLiteral_complex_type(ctx->literal_complex_type())); + *expr.mutable_cast()->mutable_type() = literalType; + *expr.mutable_cast()->mutable_input() = origExpression; + return expr; +} + std::any SubstraitPlanRelationVisitor::visitExpressionColumn( SubstraitPlanParser::ExpressionColumnContext* ctx) { auto relationData = ANY_CAST(std::shared_ptr, currentRelationScope_->blob); std::string symbolName = ctx->getText(); - int32_t fieldReference = - findFieldReferenceByName(ctx->getStart(), relationData, symbolName); + if (currentRelationScope_->parentQueryLocation != + Location::kUnknownLocation) { + // Skip evaluating expressions in subqueries in this pass. + ::substrait::proto::Expression expr; + return expr; + } + auto [stepsOut, fieldReference] = findFieldReferenceByName( + ctx->getStart(), currentRelationScope_, relationData, symbolName); ::substrait::proto::Expression expr; if (fieldReference != -1) { @@ -1158,23 +1227,26 @@ std::any SubstraitPlanRelationVisitor::visitExpressionColumn( ->mutable_direct_reference() ->mutable_struct_field() ->set_field(fieldReference); - // TODO -- Update the following when non-direct references are implemented. - expr.mutable_selection()->mutable_root_reference(); + // TODO -- Update this with other kinds of references. + if (stepsOut > 0) { + expr.mutable_selection()->mutable_outer_reference()->set_steps_out( + stepsOut); + } else { + expr.mutable_selection()->mutable_root_reference(); + } } return expr; } -std::any SubstraitPlanRelationVisitor::visitExpressionCast( - SubstraitPlanParser::ExpressionCastContext* ctx) { - ::substrait::proto::Expression expr; - auto origExpression = ANY_CAST( - ::substrait::proto::Expression, visitExpression(ctx->expression())); - auto literalType = ANY_CAST( - ::substrait::proto::Type, - visitLiteral_complex_type(ctx->literal_complex_type())); - *expr.mutable_cast()->mutable_type() = literalType; - *expr.mutable_cast()->mutable_input() = origExpression; - return expr; +std::any SubstraitPlanRelationVisitor::visitExpression_list( + SubstraitPlanParser::Expression_listContext* ctx) { + std::vector<::substrait::proto::Expression> exprs; + for (auto exprCtx : ctx->expression()) { + auto expr = + ANY_CAST(::substrait::proto::Expression, visitExpression(exprCtx)); + exprs.push_back(expr); + } + return exprs; } std::any SubstraitPlanRelationVisitor::visitConstant( @@ -1252,7 +1324,7 @@ std::any SubstraitPlanRelationVisitor::visitColumn_name( auto relationData = ANY_CAST(std::shared_ptr, currentRelationScope_->blob); return findFieldReferenceByName( - ctx->getStart(), relationData, ctx->getText()); + ctx->getStart(), currentRelationScope_, relationData, ctx->getText()); } ::substrait::proto::Expression_Literal @@ -1892,7 +1964,8 @@ void SubstraitPlanRelationVisitor::addExpressionsToSchema( std::shared_ptr& relationData) { const auto& relation = relationData->relation; switch (relation.rel_type_case()) { - case ::substrait::proto::Rel::kProject: + case ::substrait::proto::Rel::kProject: { + int expressionNumber = 0; for (const auto& expr : relation.project().expressions()) { if (expr.selection().direct_reference().has_struct_field()) { if (expr.selection().direct_reference().struct_field().field() < @@ -1904,8 +1977,15 @@ void SubstraitPlanRelationVisitor::addExpressionsToSchema( .field()]); } } else { - const auto& uniqueName = - symbolTable_->getUniqueName(kIntermediateNodeName); + std::string uniqueName; + if (relationData->generatedFieldReferenceAliases.find( + expressionNumber) != + relationData->generatedFieldReferenceAliases.end()) { + uniqueName = + relationData->generatedFieldReferenceAliases[expressionNumber]; + } else { + uniqueName = symbolTable_->getUniqueName(kIntermediateNodeName); + } auto newSymbol = symbolTable_->defineSymbol( uniqueName, PROTO_LOCATION(expr), @@ -1914,8 +1994,10 @@ void SubstraitPlanRelationVisitor::addExpressionsToSchema( std::nullopt); relationData->generatedFieldReferences.push_back(newSymbol); } + expressionNumber++; } break; + } default: // Only project and aggregate relations affect the output mapping. break; @@ -1926,7 +2008,7 @@ std::string SubstraitPlanRelationVisitor::fullyQualifiedReference( const SymbolInfo* fieldReference) { for (const auto& symbol : symbolTable_->getSymbols()) { if (symbol->type == SymbolType::kSchema && - symbol->location == fieldReference->location) { + symbol->sourceLocation == fieldReference->sourceLocation) { auto fqn = symbol->name + "." + fieldReference->name; return fqn; } @@ -1935,8 +2017,9 @@ std::string SubstraitPlanRelationVisitor::fullyQualifiedReference( return fieldReference->name; } -int SubstraitPlanRelationVisitor::findFieldReferenceByName( +std::pair SubstraitPlanRelationVisitor::findFieldReferenceByName( antlr4::Token* token, + const SymbolInfo* symbol, std::shared_ptr& relationData, const std::string& name) { auto fieldReferencesSize = relationData->fieldReferences.size(); @@ -1950,10 +2033,13 @@ int SubstraitPlanRelationVisitor::findFieldReferenceByName( if (generatedField != relationData->generatedFieldReferences.rend()) { auto fieldPlacement = generatedField - relationData->generatedFieldReferences.rbegin(); - return static_cast( - (fieldReferencesSize + relationData->generatedFieldReferences.size() - - fieldPlacement - 1) & - std::numeric_limits::max()); + return { + 0, + static_cast( + (fieldReferencesSize + + relationData->generatedFieldReferences.size() - fieldPlacement - + 1) & + std::numeric_limits::max())}; } auto field = std::find_if( @@ -1967,13 +2053,15 @@ int SubstraitPlanRelationVisitor::findFieldReferenceByName( if (field != relationData->fieldReferences.rend()) { auto fieldPlacement = field - relationData->fieldReferences.rbegin(); - return static_cast( - (fieldReferencesSize - fieldPlacement - 1) & - std::numeric_limits::max()); + return { + 0, + static_cast( + (fieldReferencesSize - fieldPlacement - 1) & + std::numeric_limits::max())}; } - errorListener_->addError(token, "Reference " + name + " does not exist."); - return -1; + // We didn't find the symbol, but let the next visitor worry about it. + return {0, -1}; } void SubstraitPlanRelationVisitor::applyOutputMappingToSchema( @@ -2017,4 +2105,63 @@ void SubstraitPlanRelationVisitor::applyOutputMappingToSchema( } } +bool SubstraitPlanRelationVisitor::isWithinSubquery( + SubstraitPlanParser::RelationContext* ctx) { + auto symbol = symbolTable_->lookupSymbolByLocationAndType( + PARSER_LOCATION(ctx), SymbolType::kRelation); + if (symbol->parentQueryLocation != Location::kUnknownLocation) { + return true; + } + + // Also check our scope. + return currentRelationScope_->parentQueryLocation != + Location::kUnknownLocation; +} + +bool SubstraitPlanRelationVisitor::hasSubquery( + SubstraitPlanParser::ExpressionContext* ctx) { + if (auto* funcUseCtx = + dynamic_cast( + ctx)) { + for (auto* expr : funcUseCtx->expression()) { + if (hasSubquery(expr)) { + return true; + } + } + return false; + } else if ( + auto* constantCtx = + dynamic_cast(ctx)) { + return false; + } else if ( + auto* columnCtx = + dynamic_cast(ctx)) { + return false; + } else if ( + auto* castCtx = + dynamic_cast(ctx)) { + return hasSubquery(castCtx->expression()); + } else if ( + auto* scalarSubqueryCtx = + dynamic_cast( + ctx)) { + return true; + } else if ( + auto* inSubqueryCtx = dynamic_cast< + SubstraitPlanParser::ExpressionInPredicateSubqueryContext*>(ctx)) { + return true; + } else if ( + auto* setSubqueryCtx = dynamic_cast< + SubstraitPlanParser::ExpressionSetPredicateSubqueryContext*>(ctx)) { + return true; + } else if ( + auto* setComparisonCtx = dynamic_cast< + SubstraitPlanParser::ExpressionSetComparisonSubqueryContext*>(ctx)) { + return true; + } + errorListener_->addError( + ctx->getStart(), "Internal error: unsupported expression type."); + return false; +} + } // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h index 8844819f..8a6c0407 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.h @@ -103,6 +103,9 @@ class SubstraitPlanRelationVisitor : public SubstraitPlanTypeVisitor { std::any visitExpressionColumn( SubstraitPlanParser::ExpressionColumnContext* ctx) override; + std::any visitExpression_list( + SubstraitPlanParser::Expression_listContext* ctx) override; + std::any visitConstant(SubstraitPlanParser::ConstantContext* ctx) override; std::any visitMap_literal( @@ -191,11 +194,15 @@ class SubstraitPlanRelationVisitor : public SubstraitPlanTypeVisitor { std::string fullyQualifiedReference(const SymbolInfo* fieldReference); - int findFieldReferenceByName( + std::pair findFieldReferenceByName( antlr4::Token* token, + const SymbolInfo* symbol, std::shared_ptr& relationData, const std::string& name); + bool isWithinSubquery(SubstraitPlanParser::RelationContext* ctx); + bool hasSubquery(SubstraitPlanParser::ExpressionContext* ctx); + const SymbolInfo* currentRelationScope_{nullptr}; // Not owned. }; diff --git a/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.cpp new file mode 100644 index 00000000..717cc2b7 --- /dev/null +++ b/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.cpp @@ -0,0 +1,2271 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.h" + +#include +#include +#include +#include +#include + +#include "SubstraitPlanParser/SubstraitPlanParser.h" +#include "SubstraitPlanTypeVisitor.h" +#include "absl/strings/ascii.h" +#include "absl/strings/numbers.h" +#include "absl/strings/strip.h" +#include "date/tz.h" +#include "substrait/expression/DecimalLiteral.h" +#include "substrait/proto/algebra.pb.h" +#include "substrait/proto/type.pb.h" +#include "substrait/textplan/Any.h" +#include "substrait/textplan/Finally.h" +#include "substrait/textplan/Location.h" +#include "substrait/textplan/StringManipulation.h" +#include "substrait/textplan/StructuredSymbolData.h" +#include "substrait/textplan/SymbolTable.h" + +namespace io::substrait::textplan { + +namespace { + +const std::string kAggregationPhasePrefix = "aggregationphase"; +const std::string kAggregationInvocationPrefix = "aggregationinvocation"; +const std::string kJoinTypePrefix = "jointype"; +const std::string kSortDirectionPrefix = "sortdirection"; + +const std::string kIntermediateNodeName = "intermediate"; + +enum RelationFilterBehavior { + kDefault = 0, + kBestEffort = 1, + kPostJoin = 2, +}; + +std::string toLower(const std::string& str) { + std::string s = str; + std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { + return std::tolower(c); + }); + return s; +} + +void setNullable(::substrait::proto::Type* type) { + switch (type->kind_case()) { + case ::substrait::proto::Type::kBool: + type->mutable_bool_()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kI8: + type->mutable_i8()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kI16: + type->mutable_i16()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kI32: + type->mutable_i32()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kI64: + type->mutable_i64()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kFp32: + type->mutable_fp32()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kFp64: + type->mutable_fp64()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kString: + type->mutable_string()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kBinary: + type->mutable_binary()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kTimestamp: + type->mutable_timestamp()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kDate: + type->mutable_date()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kTime: + type->mutable_time()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kIntervalYear: + type->mutable_interval_year()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kIntervalDay: + type->mutable_interval_day()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kTimestampTz: + type->mutable_timestamp_tz()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kUuid: + type->mutable_uuid()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kFixedChar: + type->mutable_fixed_char()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kVarchar: + type->mutable_varchar()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kFixedBinary: + type->mutable_fixed_binary()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kDecimal: + type->mutable_decimal()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kStruct: + type->mutable_struct_()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kList: + type->mutable_list()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kMap: + type->mutable_map()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kUserDefined: + type->mutable_user_defined()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE); + break; + case ::substrait::proto::Type::kUserDefinedTypeReference: + case ::substrait::proto::Type::KIND_NOT_SET: + // We are guaranteed to have a valid type so no need to emit an error. + break; + } +} + +void setRelationType( + RelationType relationType, + ::substrait::proto::Rel* relation) { + switch (relationType) { + case RelationType::kRead: + relation->mutable_read()->clear_common(); + break; + case RelationType::kProject: + relation->mutable_project()->clear_common(); + break; + case RelationType::kJoin: + relation->mutable_join()->clear_common(); + break; + case RelationType::kCross: + relation->mutable_cross()->clear_common(); + break; + case RelationType::kFetch: + relation->mutable_fetch()->clear_common(); + break; + case RelationType::kAggregate: + relation->mutable_aggregate()->clear_common(); + break; + case RelationType::kSort: + relation->mutable_sort()->clear_common(); + break; + case RelationType::kFilter: + relation->mutable_filter()->clear_common(); + break; + case RelationType::kSet: + relation->mutable_set()->clear_common(); + break; + case RelationType::kExchange: + case RelationType::kDdl: + case RelationType::kWrite: + break; + case RelationType::kHashJoin: + relation->mutable_hash_join()->clear_common(); + break; + case RelationType::kMergeJoin: + relation->mutable_merge_join()->clear_common(); + break; + case RelationType::kExtensionLeaf: + relation->mutable_extension_leaf()->clear_common(); + break; + case RelationType::kExtensionSingle: + relation->mutable_extension_single()->clear_common(); + break; + case RelationType::kExtensionMulti: + relation->mutable_extension_multi()->clear_common(); + break; + case RelationType::kUnknown: + break; + } +} + +::substrait::proto::RelCommon* findCommonRelation( + RelationType relationType, + ::substrait::proto::Rel* relation) { + switch (relationType) { + case RelationType::kRead: + return relation->mutable_read()->mutable_common(); + case RelationType::kProject: + return relation->mutable_project()->mutable_common(); + case RelationType::kJoin: + return relation->mutable_join()->mutable_common(); + case RelationType::kCross: + return relation->mutable_cross()->mutable_common(); + case RelationType::kFetch: + return relation->mutable_fetch()->mutable_common(); + case RelationType::kAggregate: + return relation->mutable_aggregate()->mutable_common(); + case RelationType::kSort: + return relation->mutable_sort()->mutable_common(); + case RelationType::kFilter: + return relation->mutable_filter()->mutable_common(); + case RelationType::kSet: + return relation->mutable_set()->mutable_common(); + case RelationType::kExtensionLeaf: + return relation->mutable_extension_leaf()->mutable_common(); + case RelationType::kExtensionMulti: + return relation->mutable_extension_multi()->mutable_common(); + case RelationType::kExtensionSingle: + return relation->mutable_extension_single()->mutable_common(); + case RelationType::kHashJoin: + return relation->mutable_hash_join()->mutable_common(); + case RelationType::kMergeJoin: + return relation->mutable_merge_join()->mutable_common(); + case RelationType::kExchange: + case RelationType::kDdl: + case RelationType::kWrite: + case RelationType::kUnknown: + break; + } + return nullptr; +} + +std::string normalizeProtoEnum(std::string_view text, std::string_view prefix) { + std::string result{text}; + // Remove non-alphabetic characters. + result.erase( + std::remove_if( + result.begin(), + result.end(), + [](auto const& c) -> bool { return !std::isalpha(c); }), + result.end()); + // Lowercase. + std::transform( + result.begin(), result.end(), result.begin(), [](unsigned char c) { + return std::tolower(c); + }); + // Remove the prefix if it exists. + if (startsWith(result, prefix)) { + result = result.substr(prefix.length()); + } + return result; +} + +void addInputFieldsToSchema( + RelationType relationType, + std::shared_ptr& relationData) { + if (relationData->continuingPipeline != nullptr) { + auto continuingRelationData = ANY_CAST( + std::shared_ptr, relationData->continuingPipeline->blob); + if (!continuingRelationData->outputFieldReferences.empty()) { + // There is an emit sequence so use that. + for (auto field : continuingRelationData->outputFieldReferences) { + relationData->fieldReferences.push_back(field); + } + } else { + // There was no emit so just access all the field references. + for (auto field : continuingRelationData->fieldReferences) { + relationData->fieldReferences.push_back(field); + } + for (auto field : continuingRelationData->generatedFieldReferences) { + relationData->fieldReferences.push_back(field); + } + } + } + + for (auto pipeline : relationData->newPipelines) { + auto pipelineRelationData = + ANY_CAST(std::shared_ptr, pipeline->blob); + if (!pipelineRelationData->outputFieldReferences.empty()) { + for (auto field : pipelineRelationData->outputFieldReferences) { + relationData->fieldReferences.push_back(field); + } + } else { + for (auto field : pipelineRelationData->fieldReferences) { + relationData->fieldReferences.push_back(field); + } + for (auto field : pipelineRelationData->generatedFieldReferences) { + relationData->fieldReferences.push_back(field); + } + } + } +} + +void resetSchema(std::shared_ptr& relationData) { + relationData->fieldReferences.clear(); + relationData->generatedFieldReferences.clear(); + relationData->outputFieldReferences.clear(); +} + +bool isRelationEmitDetail(SubstraitPlanParser::Relation_detailContext* ctx) { + return dynamic_cast(ctx) != + nullptr; +} + +Location getParentQueryLocation( + const SymbolInfo* symbol, + std::shared_ptr& relationData) { + auto actualParentQueryLocation = symbol->parentQueryLocation; + if (actualParentQueryLocation != Location::kUnknownLocation) { + return actualParentQueryLocation; + } + auto currRelationData = relationData; + while (currRelationData->pipelineStart != nullptr) { + if (currRelationData->pipelineStart->parentQueryLocation != + Location::kUnknownLocation) { + return currRelationData->pipelineStart->parentQueryLocation; + } + currRelationData = ANY_CAST( + std::shared_ptr, currRelationData->pipelineStart->blob); + } + return Location::kUnknownLocation; +} + +::substrait::proto::Expression_Subquery_SetComparison_ComparisonOp +comparisonToProto(const std::string& text) { + std::unordered_map< + std::string, + ::substrait::proto::Expression_Subquery_SetComparison_ComparisonOp> + comparisonOpMap = { + {"le", + ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_LE}, + {"ge", + ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_GE}, + {"eq", + ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_EQ}, + {"ne", + ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_NE}, + {"lt", + ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_LT}, + {"gt", + ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_GT}, + }; + auto it = comparisonOpMap.find(toLower(text)); + if (it != comparisonOpMap.end()) { + return it->second; + } + return ::substrait::proto:: + Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_UNSPECIFIED; +} + +} // namespace + +std::any SubstraitPlanSubqueryRelationVisitor::aggregateResult( + std::any aggregate, + std::any nextResult) { + if (!nextResult.has_value()) { + // No point returning an unspecified result over whatever we already have. + return aggregate; + } + return nextResult; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelation( + SubstraitPlanParser::RelationContext* ctx) { + // First find the relation created in a previous step. + auto* symbol = symbolTable_->lookupSymbolByLocationAndType( + Location(ctx), SymbolType::kRelation); + if (symbol == nullptr) { + // This error has been previously dealt with thus we can safely skip it. + return defaultResult(); + } + // Create the relation data before visiting children, so they can update it. + auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); + ::substrait::proto::Rel relation; + + auto relationType = ANY_CAST(RelationType, symbol->subtype); + setRelationType(relationType, &relation); + + relationData->relation = relation; + symbolTable_->addPermanentLocation( + *symbol, PROTO_LOCATION(relationData->relation)); + + // Mark the current scope for any operations within this relation. + auto previousScope = currentRelationScope_; + auto resetCurrentScope = + finally([&]() { currentRelationScope_ = previousScope; }); + currentRelationScope_ = symbol; + + resetSchema(relationData); + + // if (!isWithinSubquery(ctx)) { + addInputFieldsToSchema(relationType, relationData); + //} + + // Visit everything but the emit details to gather necessary information. + for (auto detail : ctx->relation_detail()) { + if (!isRelationEmitDetail(detail)) { + visitRelationDetail(detail); + } + } + + // if (isWithinSubquery(ctx)) { + addExpressionsToSchema(relationData); + //} + + // Now visit the emit details. + for (auto detail : ctx->relation_detail()) { + if (isRelationEmitDetail(detail)) { + visitRelationDetail(detail); + } + } + + // Aggregate relations are different in that they alter the emitted fields + // by default. + if (relationType == RelationType::kAggregate) { + relationData->outputFieldReferences.insert( + relationData->outputFieldReferences.end(), + relationData->generatedFieldReferences.begin(), + relationData->generatedFieldReferences.end()); + } + + applyOutputMappingToSchema(ctx->getStart(), relationType, relationData); + + // Emit one empty grouping for an aggregation relation not specifying any. + if (relationType == RelationType::kAggregate && + relationData->relation.aggregate().groupings_size() == 0) { + relationData->relation.mutable_aggregate()->add_groupings(); + } + + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationDetail( + SubstraitPlanParser::Relation_detailContext* ctx) { + if (auto* commonCtx = + dynamic_cast(ctx)) { + return visitRelationCommon(commonCtx); + } else if ( + auto* usesSchemaCtx = + dynamic_cast(ctx)) { + return visitRelationUsesSchema(usesSchemaCtx); + } else if ( + auto* filterCtx = + dynamic_cast(ctx)) { + return visitRelationFilter(filterCtx); + } else if ( + auto* exprCtx = + dynamic_cast(ctx)) { + return visitRelationExpression(exprCtx); + } else if ( + auto* advExtensionCtx = + dynamic_cast( + ctx)) { + return visitRelationAdvancedExtension(advExtensionCtx); + } else if ( + auto* sourceRefCtx = + dynamic_cast( + ctx)) { + return visitRelationSourceReference(sourceRefCtx); + } else if ( + auto* groupingCtx = + dynamic_cast(ctx)) { + return visitRelationGrouping(groupingCtx); + } else if ( + auto* measureCtx = + dynamic_cast(ctx)) { + return visitRelationMeasure(measureCtx); + } else if ( + auto* sortCtx = + dynamic_cast(ctx)) { + return visitRelationSort(sortCtx); + } else if ( + auto* countCtx = + dynamic_cast(ctx)) { + return visitRelationCount(countCtx); + } else if ( + auto* joinTypeCtx = + dynamic_cast(ctx)) { + return visitRelationJoinType(joinTypeCtx); + } else if ( + auto* emitCtx = + dynamic_cast(ctx)) { + return visitRelationEmit(emitCtx); + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelation_filter_behavior( + SubstraitPlanParser::Relation_filter_behaviorContext* ctx) { + std::string text = toLower(ctx->getText()); + // Only look at alphabetic characters for this comparison. + text.erase( + std::remove_if( + text.begin(), + text.end(), + [](auto const& c) -> bool { return !std::isalnum(c); }), + text.end()); + if (text == "besteffort") { + return kBestEffort; + } else if (text == "postjoin") { + return kPostJoin; + } + errorListener_->addError( + ctx->getStart(), + "Best effort and post join are the only two legal filter behavior " + "choices. You may also not provide one which will result to the default " + "filter behavior."); + return kDefault; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationFilter( + SubstraitPlanParser::RelationFilterContext* ctx) { + RelationFilterBehavior behavior = kDefault; + if (ctx->relation_filter_behavior() != nullptr) { + behavior = ANY_CAST( + RelationFilterBehavior, + visitRelation_filter_behavior(ctx->relation_filter_behavior())); + } + + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + PARSER_LOCATION(ctx->parent), SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto result = visitChildren(ctx); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + switch (parentRelationType) { + case RelationType::kRead: + switch (behavior) { + case kDefault: + if (parentRelationData->relation.read().has_filter()) { + errorListener_->addError( + ctx->getStart(), + "A filter has already been specified for this read relation. " + "Only one filter and one best effort filter may be specified " + "for a read relation."); + break; + } + *parentRelationData->relation.mutable_read()->mutable_filter() = + ANY_CAST(::substrait::proto::Expression, result); + break; + case kBestEffort: + if (parentRelationData->relation.read().has_best_effort_filter()) { + errorListener_->addError( + ctx->getStart(), + "A best effort filter has already been specified for this read " + "relation. Only one filter and one best effort filter may be " + "specified for a read relation."); + break; + } + *parentRelationData->relation.mutable_read() + ->mutable_best_effort_filter() = + ANY_CAST(::substrait::proto::Expression, result); + break; + case kPostJoin: + default: + errorListener_->addError( + ctx->getStart(), + "Default and best effort filters are the only legal filter for " + "read relations."); + break; + } + break; + case RelationType::kJoin: + if (behavior == kPostJoin) { + if (parentRelationData->relation.join().has_post_join_filter()) { + errorListener_->addError( + ctx->getStart(), + "A post join filter has already been specified for this join " + "relation. Only one may be specified."); + break; + } + *parentRelationData->relation.mutable_join() + ->mutable_post_join_filter() = + ANY_CAST(::substrait::proto::Expression, result); + } else { + errorListener_->addError( + ctx->getStart(), + "A post join filter is the only legal option for a join relation."); + } + break; + case RelationType::kFilter: + if (behavior == kDefault) { + if (parentRelationData->relation.filter().has_condition()) { + errorListener_->addError( + ctx->getStart(), + "A filter condition has already been specified. Only one may be " + "specified."); + break; + } + if (result.type() != typeid(::substrait::proto::Expression)) { + errorListener_->addError( + ctx->getStart(), + "Could not parse as an expression (pass 2, spot 1)."); + return defaultResult(); + } + *parentRelationData->relation.mutable_filter()->mutable_condition() = + ANY_CAST(::substrait::proto::Expression, result); + } else { + errorListener_->addError( + ctx->getStart(), + "No filter behavior options are permissible for a filter join."); + } + break; + default: + errorListener_->addError( + ctx->getStart(), + "Filters are not permitted for this kind of relation."); + break; + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationUsesSchema( + SubstraitPlanParser::RelationUsesSchemaContext* ctx) { + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + + if (parentRelationType == RelationType::kRead) { + auto schemaName = ctx->id()->getText(); + auto* symbol = symbolTable_->lookupSymbolByName(schemaName); + if (symbol != nullptr) { + auto* schema = + parentRelationData->relation.mutable_read()->mutable_base_schema(); + for (const auto& sym : *symbolTable_) { + if (sym.type != SymbolType::kSchemaColumn) { + continue; + } + if (sym.sourceLocation != symbol->sourceLocation) { + continue; + } + parentRelationData->outputFieldReferences.push_back(&sym); + schema->add_names(sym.name); + auto typeProto = ANY_CAST(::substrait::proto::Type, sym.blob); + if (typeProto.kind_case() != ::substrait::proto::Type::KIND_NOT_SET) { + *schema->mutable_struct_()->add_types() = typeProto; + // If the schema contains any types, the struct is required. + schema->mutable_struct_()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_REQUIRED); + } + } + } + } else { + errorListener_->addError( + ctx->getStart(), + "Schema references are not defined for this kind of relation."); + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationExpression( + SubstraitPlanParser::RelationExpressionContext* ctx) { + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto result = visitChildren(ctx); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + switch (parentRelationType) { + case RelationType::kJoin: + if (parentRelationData->relation.join().has_expression()) { + errorListener_->addError( + ctx->getStart(), + "A join expression has already been specified. Only one may be " + "specified."); + break; + } + *parentRelationData->relation.mutable_join()->mutable_expression() = + ANY_CAST(::substrait::proto::Expression, result); + break; + case RelationType::kProject: + *parentRelationData->relation.mutable_project()->add_expressions() = + ANY_CAST(::substrait::proto::Expression, result); + break; + default: + errorListener_->addError( + ctx->getStart(), + "Expressions are not permitted for this kind of relation."); + break; + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationGrouping( + SubstraitPlanParser::RelationGroupingContext* ctx) { + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto result = visitChildren(ctx); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + switch (parentRelationType) { + case RelationType::kAggregate: { + if (parentRelationData->relation.aggregate().groupings_size() == 0) { + parentRelationData->relation.mutable_aggregate()->add_groupings(); + } + // Always add new expressions to the first groupings group. + auto newExpr = parentRelationData->relation.mutable_aggregate() + ->mutable_groupings(0) + ->add_grouping_expressions(); + *newExpr = ANY_CAST(::substrait::proto::Expression, result); + if (newExpr->has_selection()) { + newExpr->mutable_selection()->mutable_root_reference(); + if (newExpr->selection().direct_reference().has_struct_field()) { + parentRelationData->generatedFieldReferences.push_back( + parentRelationData->fieldReferences[newExpr->selection() + .direct_reference() + .struct_field() + .field()]); + } + } + break; + } + default: + errorListener_->addError( + ctx->getStart(), + "Groupings are not permitted for this kind of relation."); + break; + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationMeasure( + SubstraitPlanParser::RelationMeasureContext* ctx) { + // Construct the measure. + ::substrait::proto::AggregateRel_Measure measure; + auto invocation = ::substrait::proto:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED; + std::vector<::substrait::proto::SortField> sorts; + for (auto detail : ctx->measure_detail()) { + auto detailItem = ANY_CAST( + ::substrait::proto::AggregateRel_Measure, visitMeasure_detail(detail)); + if (detail->getStart()->getType() == SubstraitPlanParser::MEASURE) { + if (measure.has_measure()) { + errorListener_->addError( + detail->getStart(), + "A measure expression has already been provided for this measure."); + break; + } + *measure.mutable_measure() = detailItem.measure(); + } else if (detail->getStart()->getType() == SubstraitPlanParser::FILTER) { + if (measure.has_filter()) { + errorListener_->addError( + detail->getStart(), + "A filter has already been provided for this measure."); + break; + } + *measure.mutable_filter() = detailItem.filter(); + } else if ( + detail->getStart()->getType() == SubstraitPlanParser::INVOCATION) { + invocation = detailItem.measure().invocation(); + } else if (detail->getStart()->getType() == SubstraitPlanParser::SORT) { + auto newSorts = detailItem.measure().sorts(); + sorts.insert(sorts.end(), newSorts.begin(), newSorts.end()); + } + } + if (invocation != + ::substrait::proto:: + AggregateFunction_AggregationInvocation_AGGREGATION_INVOCATION_UNSPECIFIED) { + measure.mutable_measure()->set_invocation(invocation); + } + for (const auto& sort : sorts) { + *measure.mutable_measure()->add_sorts() = sort; + } + + // Add it to our relation. + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + switch (parentRelationType) { + case RelationType::kAggregate: + *parentRelationData->relation.mutable_aggregate()->add_measures() = + measure; + break; + default: + errorListener_->addError( + ctx->getStart(), + "Measures are not permitted for this kind of relation."); + break; + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationJoinType( + SubstraitPlanParser::RelationJoinTypeContext* ctx) { + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + if (parentRelationType == RelationType::kJoin) { + std::string text = + normalizeProtoEnum(ctx->id()->getText(), kJoinTypePrefix); + ::substrait::proto::JoinRel_JoinType joinType; + if (text == "unspecified") { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_UNSPECIFIED; + } else if (text == "inner") { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_INNER; + } else if (text == "outer") { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_OUTER; + } else if (text == "left") { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_LEFT; + } else if (text == "right") { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_RIGHT; + } else if (text == "semi") { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_SEMI; + } else if (text == "anti") { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_ANTI; + } else if (text == "single") { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_SINGLE; + } else { + joinType = ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_UNSPECIFIED; + } + if (joinType == + ::substrait::proto::JoinRel_JoinType_JOIN_TYPE_UNSPECIFIED) { + this->errorListener_->addError( + ctx->getStart(), + "Unsupported join type direction: " + ctx->id()->getText()); + } + parentRelationData->relation.mutable_join()->set_type(joinType); + + // TODO -- Add support for HashJoin/MergeJoin which have different enums. + } else { + errorListener_->addError( + ctx->getStart(), + "Join types are not supported for this relation type."); + return defaultResult(); + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationEmit( + SubstraitPlanParser::RelationEmitContext* ctx) { + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto result = visitChildren(ctx); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + auto common = + findCommonRelation(parentRelationType, &parentRelationData->relation); + if (common == nullptr) { + errorListener_->addError( + ctx->getStart(), "Emits do not make sense for this kind of relation."); + return defaultResult(); + } + typedef std::pair intPair; + auto [stepsOut, fieldReference] = ANY_CAST(intPair, result); + if (stepsOut > 0) { + errorListener_->addError( + ctx->getStart(), "Emitting outer references is not supported."); + return defaultResult(); + } + common->mutable_emit()->add_output_mapping(fieldReference); + return defaultResult(); +} + +int32_t SubstraitPlanSubqueryRelationVisitor::visitAggregationInvocation( + SubstraitPlanParser::IdContext* ctx) { + std::string text = + normalizeProtoEnum(ctx->getText(), kAggregationInvocationPrefix); + if (text == "unspecified") { + return ::substrait::proto::AggregateFunction:: + AGGREGATION_INVOCATION_UNSPECIFIED; + } else if (text == "all") { + return ::substrait::proto::AggregateFunction::AGGREGATION_INVOCATION_ALL; + } else if (text == "distinct") { + return ::substrait::proto::AggregateFunction:: + AGGREGATION_INVOCATION_DISTINCT; + } + this->errorListener_->addError( + ctx->getStart(), + "Unrecognized aggregation invocation: " + ctx->getText()); + return ::substrait::proto::AggregateFunction:: + AGGREGATION_INVOCATION_UNSPECIFIED; +} + +int32_t SubstraitPlanSubqueryRelationVisitor::visitAggregationPhase( + SubstraitPlanParser::IdContext* ctx) { + std::string text = + normalizeProtoEnum(ctx->getText(), kAggregationPhasePrefix); + if (text == "unspecified") { + return ::substrait::proto::AGGREGATION_PHASE_UNSPECIFIED; + } else if (text == "initialtointermediate") { + return ::substrait::proto::AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE; + } else if (text == "intermediatetointermediate") { + return ::substrait::proto::AGGREGATION_PHASE_INTERMEDIATE_TO_INTERMEDIATE; + } else if (text == "initialtoresult") { + return ::substrait::proto::AGGREGATION_PHASE_INITIAL_TO_RESULT; + } else if (text == "intermediatetoresult") { + return ::substrait::proto::AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT; + } + this->errorListener_->addError( + ctx->getStart(), "Unrecognized aggregation phase: " + ctx->getText()); + return ::substrait::proto::AGGREGATION_PHASE_UNSPECIFIED; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitMeasure_detail( + SubstraitPlanParser::Measure_detailContext* ctx) { + ::substrait::proto::AggregateRel_Measure measure; + switch (ctx->getStart()->getType()) { + case SubstraitPlanParser::MEASURE: { + auto function = measure.mutable_measure(); + auto result = visitExpression(ctx->expression()); + auto expr = ANY_CAST(::substrait::proto::Expression, result); + if (expr.has_scalar_function()) { + const auto& scalarFunc = expr.scalar_function(); + function->set_function_reference(scalarFunc.function_reference()); + for (const auto& arg : scalarFunc.arguments()) { + *function->add_arguments() = arg; + } + for (const auto& option : scalarFunc.options()) { + *function->add_options() = option; + } + if (scalarFunc.has_output_type()) { + *function->mutable_output_type() = scalarFunc.output_type(); + } + if (ctx->literal_complex_type() != nullptr) { + // The version here overrides any that might be in the function. + *function->mutable_output_type() = ANY_CAST( + ::substrait::proto::Type, + visitLiteral_complex_type(ctx->literal_complex_type())); + } + if (ctx->id(0) != nullptr) { + measure.mutable_measure()->set_phase( + static_cast<::substrait::proto::AggregationPhase>( + visitAggregationPhase(ctx->id(0)))); + } + } else { + errorListener_->addError( + ctx->id(0)->getStart(), + "Expected an expression utilizing a function here."); + } + // If we have a NAMED clause, add a symbol reference. + if (ctx->id().size() > 1) { + auto symbol = symbolTable_->defineSymbol( + ctx->id(1)->getText(), + PROTO_LOCATION(measure), + SymbolType::kMeasure, + std::nullopt, + std::nullopt); + + // Add it to our generated field mapping. + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location( + dynamic_cast(ctx->parent->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + parentRelationData->generatedFieldReferences.push_back(symbol); + } + return measure; + } + case SubstraitPlanParser::FILTER: + *measure.mutable_filter() = ANY_CAST( + ::substrait::proto::Expression, visitExpression(ctx->expression())); + return measure; + case SubstraitPlanParser::INVOCATION: + measure.mutable_measure()->set_invocation( + static_cast< + ::substrait::proto::AggregateFunction_AggregationInvocation>( + visitAggregationInvocation(ctx->id(0)))); + return measure; + case SubstraitPlanParser::SORT: + *measure.mutable_measure()->add_sorts() = ANY_CAST( + ::substrait::proto::SortField, visitSort_field(ctx->sort_field())); + return measure; + default: + // Alert that this kind of measure detail is not in the grammar. + return measure; + } +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationSourceReference( + SubstraitPlanParser::RelationSourceReferenceContext* ctx) { + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + + if (parentRelationType != RelationType::kRead) { + errorListener_->addError( + ctx->getStart(), + "Source references are not defined for this kind of relation."); + return defaultResult(); + } + + auto sourceName = ctx->source_reference()->id()->getText(); + auto* symbol = symbolTable_->lookupSymbolByName(sourceName); + if (symbol == nullptr) { + return defaultResult(); + } + switch (ANY_CAST(SourceType, symbol->subtype)) { + case SourceType::kLocalFiles: { + auto* source = + parentRelationData->relation.mutable_read()->mutable_local_files(); + for (const auto& sym : *symbolTable_) { + if (sym.type != SymbolType::kSourceDetail) { + continue; + } + if (sym.sourceLocation != symbol->sourceLocation) { + continue; + } + *source->add_items() = *ANY_CAST( + std::shared_ptr<::substrait::proto::ReadRel_LocalFiles_FileOrFiles>, + sym.blob); + } + break; + } + case SourceType::kNamedTable: { + auto* source = + parentRelationData->relation.mutable_read()->mutable_named_table(); + for (const auto& sym : + symbolTable_->lookupSymbolsByLocation(symbol->sourceLocation)) { + if (sym->type != SymbolType::kSourceDetail) { + continue; + } + source->add_names(sym->name); + } + break; + } + case SourceType::kVirtualTable: + // TODO -- Implement. + break; + case SourceType::kExtensionTable: + // TODO -- Implement. + break; + case SourceType::kUnknown: + break; + } + + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationSort( + SubstraitPlanParser::RelationSortContext* ctx) { + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + switch (parentRelationType) { + case RelationType::kSort: + *parentRelationData->relation.mutable_sort()->add_sorts() = ANY_CAST( + ::substrait::proto::SortField, visitSort_field(ctx->sort_field())); + break; + default: + errorListener_->addError( + ctx->getStart(), + "Sorts are not permitted for this kind of relation."); + break; + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitRelationCount( + SubstraitPlanParser::RelationCountContext* ctx) { + auto* parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + Location(dynamic_cast(ctx->parent)), + SymbolType::kRelation); + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); + switch (parentRelationType) { + case RelationType::kFetch: { + ::substrait::proto::Type type; + type.mutable_i64()->set_nullability( + ::substrait::proto::Type_Nullability_NULLABILITY_REQUIRED); + auto number = visitNumber(ctx->NUMBER(), type); + parentRelationData->relation.mutable_fetch()->set_count(number.i64()); + break; + } + default: + errorListener_->addError( + ctx->getStart(), "Count only applies to fetch relations."); + break; + } + return defaultResult(); +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitExpression( + SubstraitPlanParser::ExpressionContext* ctx) { + if (auto* funcUseCtx = + dynamic_cast( + ctx)) { + return visitExpressionFunctionUse(funcUseCtx); + } else if ( + auto* constantCtx = + dynamic_cast(ctx)) { + return visitExpressionConstant(constantCtx); + } else if ( + auto* columnCtx = + dynamic_cast(ctx)) { + return visitExpressionColumn(columnCtx); + } else if ( + auto* castCtx = + dynamic_cast(ctx)) { + return visitExpressionCast(castCtx); + } else if ( + auto* scalarSubqueryCtx = + dynamic_cast( + ctx)) { + return visitExpressionScalarSubquery(scalarSubqueryCtx); + } else if ( + auto* inSubqueryCtx = dynamic_cast< + SubstraitPlanParser::ExpressionInPredicateSubqueryContext*>(ctx)) { + return visitExpressionInPredicateSubquery(inSubqueryCtx); + } else if ( + auto* setSubqueryCtx = dynamic_cast< + SubstraitPlanParser::ExpressionSetPredicateSubqueryContext*>(ctx)) { + return visitExpressionSetPredicateSubquery(setSubqueryCtx); + } else if ( + auto* setComparisonCtx = dynamic_cast< + SubstraitPlanParser::ExpressionSetComparisonSubqueryContext*>(ctx)) { + return visitExpressionSetComparisonSubquery(setComparisonCtx); + } + errorListener_->addError( + ctx->getStart(), "Internal error: unsupported expression type."); + return defaultResult(); +} + +::substrait::proto::Expression +SubstraitPlanSubqueryRelationVisitor::visitExpressionIfThenUse( + SubstraitPlanParser::ExpressionFunctionUseContext* ctx) { + ::substrait::proto::Expression expr; + size_t currExprNum = 0; + size_t totalExprCount = ctx->expression().size(); + while (currExprNum + 2 <= totalExprCount) { + // Peel off an if/then pair. + auto ifThen = expr.mutable_if_then()->add_ifs(); + *ifThen->mutable_if_() = ANY_CAST( + ::substrait::proto::Expression, + visitExpression(ctx->expression(currExprNum))); + *ifThen->mutable_then() = ANY_CAST( + ::substrait::proto::Expression, + visitExpression(ctx->expression(currExprNum + 1))); + currExprNum += 2; + } + if (currExprNum + 1 <= totalExprCount) { + // Use the last expression as the else clause. + *expr.mutable_if_then()->mutable_else_() = ANY_CAST( + ::substrait::proto::Expression, + visitExpression(ctx->expression(currExprNum))); + } + return expr; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitExpressionFunctionUse( + SubstraitPlanParser::ExpressionFunctionUseContext* ctx) { + ::substrait::proto::Expression expr; + std::string funcName = ctx->id()->getText(); + if (absl::AsciiStrToLower(funcName) == "ifthen") { + return visitExpressionIfThenUse(ctx); + } + uint32_t funcReference = 0; + auto symbol = symbolTable_->lookupSymbolByName(funcName); + if (symbol == nullptr || symbol->type != SymbolType::kFunction) { + errorListener_->addError( + ctx->id()->getStart(), + ctx->id()->getText() + " is not a function reference."); + } else { + auto functionData = ANY_CAST(std::shared_ptr, symbol->blob); + funcReference = functionData->anchor; + } + + expr.mutable_scalar_function()->set_function_reference(funcReference); + for (const auto& exp : ctx->expression()) { + if (endsWith(exp->getText(), "_enum")) { + std::string str{absl::StripSuffix(exp->getText(), "_enum")}; + expr.mutable_scalar_function()->add_arguments()->set_enum_(str); + continue; + } + + auto result = visitExpression(exp); + if (result.type() != typeid(::substrait::proto::Expression)) { + errorListener_->addError( + ctx->id()->getStart(), + "Could not parse as an expression (pass 2, spot 2)."); + return expr; + } + auto newExpr = ANY_CAST(::substrait::proto::Expression, result); + *expr.mutable_scalar_function()->add_arguments()->mutable_value() = newExpr; + } + if (ctx->literal_complex_type() != nullptr) { + auto literalType = ANY_CAST( + ::substrait::proto::Type, + visitLiteral_complex_type(ctx->literal_complex_type())); + *expr.mutable_scalar_function()->mutable_output_type() = literalType; + } + return expr; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitExpressionConstant( + SubstraitPlanParser::ExpressionConstantContext* ctx) { + ::substrait::proto::Expression expr; + *expr.mutable_literal() = + ANY_CAST(::substrait::proto::Expression_Literal, visitChildren(ctx)); + return expr; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitExpressionCast( + SubstraitPlanParser::ExpressionCastContext* ctx) { + ::substrait::proto::Expression expr; + auto origExpression = ANY_CAST( + ::substrait::proto::Expression, visitExpression(ctx->expression())); + auto literalType = ANY_CAST( + ::substrait::proto::Type, + visitLiteral_complex_type(ctx->literal_complex_type())); + *expr.mutable_cast()->mutable_type() = literalType; + *expr.mutable_cast()->mutable_input() = origExpression; + return expr; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitExpressionColumn( + SubstraitPlanParser::ExpressionColumnContext* ctx) { + auto relationData = + ANY_CAST(std::shared_ptr, currentRelationScope_->blob); + + std::string symbolName = ctx->getText(); + auto [stepsOut, fieldReference] = findFieldReferenceByName( + ctx->getStart(), currentRelationScope_, relationData, symbolName); + + ::substrait::proto::Expression expr; + if (fieldReference != -1) { + expr.mutable_selection() + ->mutable_direct_reference() + ->mutable_struct_field() + ->set_field(fieldReference); + // TODO -- Update this with other kinds of references. + if (stepsOut > 0) { + expr.mutable_selection()->mutable_outer_reference()->set_steps_out( + stepsOut); + } else { + expr.mutable_selection()->mutable_root_reference(); + } + } + return expr; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitExpressionScalarSubquery( + SubstraitPlanParser::ExpressionScalarSubqueryContext* ctx) { + ::substrait::proto::Expression expr; + // First find the relation created in a previous step. + auto symbol = + symbolTable_->lookupSymbolByName(ctx->relation_ref()->getText()); + if (symbol == nullptr) { + errorListener_->addError( + ctx->getStart(), "Internal error -- Failed to find a known symbol."); + return expr; + } + + auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); + *expr.mutable_subquery()->mutable_scalar()->mutable_input() = + relationData->relation; + return expr; +} + +std::any +SubstraitPlanSubqueryRelationVisitor::visitExpressionInPredicateSubquery( + SubstraitPlanParser::ExpressionInPredicateSubqueryContext* ctx) { + ::substrait::proto::Expression expr; + // First find the relation created in a previous step. + auto symbol = + symbolTable_->lookupSymbolByName(ctx->relation_ref()->getText()); + if (symbol == nullptr) { + errorListener_->addError( + ctx->getStart(), "Internal error -- Failed to find a known symbol."); + return expr; + } + + // Now look at expression_list. + auto exprs = ANY_CAST( + std::vector<::substrait::proto::Expression>, + visitExpression_list(ctx->expression_list())); + + // Now construct the subquery expression. + auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); + expr.mutable_subquery()->mutable_in_predicate()->mutable_haystack(); + symbolTable_->addPermanentLocation( + *symbol, PROTO_LOCATION(expr.subquery().in_predicate().haystack())); + + for (const auto& e : exprs) { + *expr.mutable_subquery()->mutable_in_predicate()->add_needles() = e; + } + + return expr; +} + +std::any +SubstraitPlanSubqueryRelationVisitor::visitExpressionSetPredicateSubquery( + SubstraitPlanParser::ExpressionSetPredicateSubqueryContext* ctx) { + ::substrait::proto::Expression expr; + // First find the relation created in a previous step. + auto symbol = + symbolTable_->lookupSymbolByName(ctx->relation_ref(0)->getText()); + if (symbol == nullptr) { + errorListener_->addError( + ctx->getStart(), "Internal error -- Failed to find a known symbol."); + return expr; + } + auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); + expr.mutable_subquery()->mutable_set_predicate()->mutable_tuples(); + if (ctx->EXISTS() != nullptr) { + expr.mutable_subquery()->mutable_set_predicate()->set_predicate_op( + ::substrait::proto:: + Expression_Subquery_SetPredicate_PredicateOp_PREDICATE_OP_EXISTS); + } else if (ctx->UNIQUE() != nullptr) { + expr.mutable_subquery()->mutable_set_predicate()->set_predicate_op( + ::substrait::proto:: + Expression_Subquery_SetPredicate_PredicateOp_PREDICATE_OP_UNIQUE); + } else { + errorListener_->addError( + ctx->getStart(), "Internal error -- Unrecognized predicate operation."); + } + return expr; +} + +std::any +SubstraitPlanSubqueryRelationVisitor::visitExpressionSetComparisonSubquery( + SubstraitPlanParser::ExpressionSetComparisonSubqueryContext* ctx) { + ::substrait::proto::Expression expr; + *expr.mutable_subquery()->mutable_set_comparison()->mutable_left() = ANY_CAST( + ::substrait::proto::Expression, visitExpression(ctx->expression())); + expr.mutable_subquery()->mutable_set_comparison()->set_comparison_op( + comparisonToProto(ctx->COMPARISON()->getText())); + // Next find the relation created in a previous step. + auto symbol = + symbolTable_->lookupSymbolByName(ctx->relation_ref()->getText()); + if (symbol == nullptr) { + errorListener_->addError( + ctx->getStart(), "Internal error -- Failed to find a known symbol."); + return expr; + } + auto relationData = ANY_CAST(std::shared_ptr, symbol->blob); + expr.mutable_subquery()->mutable_set_comparison()->mutable_right(); + return expr; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitExpression_list( + SubstraitPlanParser::Expression_listContext* ctx) { + std::vector<::substrait::proto::Expression> exprs; + for (auto exprCtx : ctx->expression()) { + auto expr = + ANY_CAST(::substrait::proto::Expression, visitExpression(exprCtx)); + exprs.push_back(expr); + } + return exprs; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitConstant( + SubstraitPlanParser::ConstantContext* ctx) { + ::substrait::proto::Expression_Literal literal; + if (ctx->literal_basic_type() != nullptr) { + auto literalType = ANY_CAST( + ::substrait::proto::Type, + visitLiteral_basic_type(ctx->literal_basic_type())); + return visitConstantWithType(ctx, literalType); + } else if (ctx->literal_complex_type() != nullptr) { + auto literalType = ANY_CAST( + ::substrait::proto::Type, + visitLiteral_complex_type(ctx->literal_complex_type())); + return visitConstantWithType(ctx, literalType); + } else { + // Try to figure out this literal without a type. + if (ctx->NULLVAL() != nullptr) { + errorListener_->addError( + ctx->NULLVAL()->getSymbol(), "Null literals require type."); + } else if (ctx->TRUEVAL() != nullptr) { + literal.set_boolean(true); + } else if (ctx->FALSEVAL() != nullptr) { + literal.set_boolean(false); + } else if (ctx->STRING() != nullptr) { + auto literalType = ::substrait::proto::Type(); + literalType.mutable_string(); + literal = visitString(ctx->STRING(), literalType); + } else { + errorListener_->addError( + ctx->getStart(), "Literals should include a type."); + } + } + return literal; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitMap_literal( + SubstraitPlanParser::Map_literalContext* ctx) { + ::substrait::proto::Expression_Literal literal; + literal.mutable_map()->clear_key_values(); + for (auto pair : ctx->map_literal_value()) { + auto item = ANY_CAST( + ::substrait::proto::Expression_Literal_Map_KeyValue, + visitMap_literal_value(pair)); + *literal.mutable_map()->add_key_values() = item; + } + return literal; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitMap_literal_value( + SubstraitPlanParser::Map_literal_valueContext* ctx) { + ::substrait::proto::Expression_Literal_Map_KeyValue keyValue; + auto key = ANY_CAST( + ::substrait::proto::Expression_Literal, visitConstant(ctx->constant(0))); + auto value = ANY_CAST( + ::substrait::proto::Expression_Literal, visitConstant(ctx->constant(1))); + *keyValue.mutable_key() = key; + *keyValue.mutable_value() = value; + return keyValue; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitStruct_literal( + SubstraitPlanParser::Struct_literalContext* ctx) { + ::substrait::proto::Expression_Literal literal; + for (auto constant : ctx->constant()) { + auto item = ANY_CAST( + ::substrait::proto::Expression_Literal, visitConstant(constant)); + *literal.mutable_struct_()->add_fields() = item; + } + return literal; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitColumn_name( + SubstraitPlanParser::Column_nameContext* ctx) { + auto relationData = + ANY_CAST(std::shared_ptr, currentRelationScope_->blob); + return findFieldReferenceByName( + ctx->getStart(), currentRelationScope_, relationData, ctx->getText()); +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitConstantWithType( + SubstraitPlanParser::ConstantContext* ctx, + const ::substrait::proto::Type& literalType) { + ::substrait::proto::Expression_Literal literal; + + if (ctx->NUMBER() != nullptr) { + literal = visitNumber(ctx->NUMBER(), literalType); + } else if (ctx->STRING() != nullptr) { + switch (literalType.kind_case()) { + case ::substrait::proto::Type::kTimestamp: + literal = visitTimestamp(ctx); + break; + case ::substrait::proto::Type::kTimestampTz: + literal = visitTimestampTz(ctx); + break; + case ::substrait::proto::Type::kDate: + literal = visitDate(ctx); + break; + case ::substrait::proto::Type::kTime: + literal = visitTime(ctx); + break; + default: + literal = visitString(ctx->STRING(), literalType); + break; + } + } else if (ctx->TRUEVAL() != nullptr) { + if (literalType.has_bool_()) { + literal.set_boolean(true); + } else { + errorListener_->addError( + ctx->TRUEVAL()->getSymbol(), + "Provided value does not match the provided type."); + } + } else if (ctx->FALSEVAL() != nullptr) { + if (literalType.has_bool_()) { + literal.set_boolean(false); + } else { + errorListener_->addError( + ctx->FALSEVAL()->getSymbol(), + "Provided value does not match the provided type."); + } + } else if (ctx->NULLVAL()) { + *literal.mutable_null() = literalType; + setNullable(literal.mutable_null()); + } else { + switch (literalType.kind_case()) { + case ::substrait::proto::Type::kStruct: + literal = visitStruct(ctx, literalType); + break; + case ::substrait::proto::Type::kList: + literal = visitList(ctx, literalType); + break; + case ::substrait::proto::Type::kMap: + literal = visitMap(ctx, literalType); + break; + case ::substrait::proto::Type::kIntervalYear: + literal = visitIntervalYear(ctx); + break; + case ::substrait::proto::Type::kIntervalDay: + literal = visitIntervalDay(ctx); + break; + default: + errorListener_->addError( + ctx->getStart(), + "Unsupported type " + std::to_string(literalType.kind_case()) + + "."); + break; + } + } + + return literal; +} + +::substrait::proto::Expression_Literal_Map_KeyValue +SubstraitPlanSubqueryRelationVisitor::visitMapLiteralValueWithType( + SubstraitPlanParser::Map_literal_valueContext* ctx, + const ::substrait::proto::Type& literalType) { + ::substrait::proto::Expression_Literal_Map_KeyValue keyValue; + *keyValue.mutable_key() = + visitConstantWithType(ctx->constant(0), literalType.map().key()); + *keyValue.mutable_value() = + visitConstantWithType(ctx->constant(1), literalType.map().value()); + return keyValue; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitMapLiteralWithType( + SubstraitPlanParser::Map_literalContext* ctx, + const ::substrait::proto::Type& literalType) { + ::substrait::proto::Expression_Literal literal; + if (ctx->map_literal_value().empty()) { + *literal.mutable_empty_map() = literalType.map(); + return literal; + } + for (const auto& keyValue : ctx->map_literal_value()) { + *literal.mutable_map()->add_key_values() = + visitMapLiteralValueWithType(keyValue, literalType); + } + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitStructLiteralWithType( + SubstraitPlanParser::Struct_literalContext* ctx, + const ::substrait::proto::Type_Struct& literalType) { + ::substrait::proto::Expression_Literal literal; + if (ctx->constant().empty()) { + return literal; + } + for (int i = 0; i < ctx->constant().size(); ++i) { + if (i >= literalType.types().size()) { + break; + } + *literal.mutable_struct_()->add_fields() = + visitConstantWithType(ctx->constant(i), literalType.types(i)); + } + if (ctx->constant().size() != literalType.types().size()) { + errorListener_->addError( + ctx->getStart(), + "The number of fields does not match the number of types in this " + "struct."); + } + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitString( + antlr4::tree::TerminalNode* node, + const ::substrait::proto::Type& literalType) { + std::string input = node->getText(); + + // First remove the surrounding quote marks. + std::string str; + if (startsWith(input, "```")) { + str = input.substr(3, input.length() - 6); + } else if (startsWith(input, "``")) { + str = input.substr(2, input.length() - 4); + } else if (input[0] == '"' || input[0] == '`') { + str = input.substr(1, input.length() - 2); + } else { + str = input; + } + + // Perform escapes if necessary. + std::string resultStr; + if (startsWith(input, "`")) { + // Don't perform escapes on raw strings. + resultStr = str; + } else { + resultStr = escapeText(node, str); + } + + ::substrait::proto::Expression_Literal literal; + switch (literalType.kind_case()) { + case ::substrait::proto::Type::kString: { + literal.set_string(resultStr); + break; + } + case ::substrait::proto::Type::kFixedChar: { + literal.set_fixed_char(resultStr); + break; + } + case ::substrait::proto::Type::kVarchar: { + literal.mutable_var_char()->set_value(resultStr); + literal.mutable_var_char()->set_length(literalType.varchar().length()); + break; + } + case ::substrait::proto::Type::kBinary: { + literal.set_binary(resultStr); + break; + } + case ::substrait::proto::Type::kFixedBinary: { + literal.set_fixed_binary(resultStr); + break; + } + case ::substrait::proto::Type::kUuid: { + // Remove all dashes for consistency. + std::string modifiedStr = resultStr; + modifiedStr.erase( + std::remove_if( + modifiedStr.begin(), + modifiedStr.end(), + [](unsigned char c) -> bool { return c == '-'; }), + modifiedStr.end()); + if (std::find_if( + modifiedStr.begin(), modifiedStr.end(), [](unsigned char c) { + return !std::isxdigit(c); + }) != modifiedStr.end()) { + errorListener_->addError( + node->getSymbol(), + "UUIDs should be be specified with hexadecimal characters with " + "optional dashes only."); + modifiedStr = "0"; + } else if (modifiedStr.length() != 32) { + errorListener_->addError( + node->getSymbol(), + "UUIDs are 128 bits long and thus should be specified with exactly " + "32 hexadecimal digits."); + modifiedStr = modifiedStr.substr(0, 32); + } + literal.set_uuid(modifiedStr); + break; + } + case ::substrait::proto::Type::kTimestampTz: + case ::substrait::proto::Type::kTimestamp: + case ::substrait::proto::Type::kDate: + case ::substrait::proto::Type::kTime: + literal.set_string(resultStr); + break; + case ::substrait::proto::Type::KIND_NOT_SET: + // Use of an unrecognized symbol is handled more generally elsewhere. + break; + default: + errorListener_->addError( + node->getSymbol(), + "Unexpected string type: " + std::to_string(literalType.kind_case())); + break; + } + + return literal; +} + +std::string SubstraitPlanSubqueryRelationVisitor::escapeText( + const antlr4::tree::TerminalNode* node, + const std::string& str) { + std::stringstream result; + + for (std::size_t i = 0; i < str.length(); ++i) { + if (str[i] == '\\' && i < str.length() - 1) { + switch (str[i + 1]) { + case '\\': + result << '\\'; + break; + case '\'': + result << '\''; + break; + case '"': + result << '"'; + break; + case 'b': + result << '\b'; + break; + case 'f': + result << '\f'; + break; + case 'n': + result << '\n'; + break; + case 'r': + result << '\r'; + break; + case 't': + result << '\t'; + break; + case 'x': + if (i < str.length() - 3) { + int32_t hex{0}; + if (absl::SimpleHexAtoi(str.substr(i + 2, 2), &hex)) { + result << static_cast(hex & 0xff); + i += 3; + continue; + } + // This is an invalid sequence - leave escaped. + } + // This is an unrecognized sequence - leave escaped. + result << str[i]; + result << str[i + 1]; + break; + case 'u': + if (i < str.length() - 3) { + if (str[i + 2] != '{') { + break; + } + auto substr = str.substr(i + 3); + size_t x = substr.find('}'); + int64_t hex; + if (x > 0 && absl::SimpleHexAtoi(substr.substr(0, x), &hex)) { + if (x <= 2) { + result << static_cast(hex & 0xff); + } else if (x <= 4) { + result << static_cast(hex >> 8 & 0xff); + result << static_cast(hex & 0xff); + } else if (x <= 6) { + result << static_cast(hex >> 16 & 0xff); + result << static_cast(hex >> 8 & 0xff); + result << static_cast(hex & 0xff); + } + i += x + 3; + continue; + } + // Drop through to handle the invalid sequence. + } + default: + this->errorListener_->SubstraitErrorListener::addError( + node->getSymbol()->getLine(), + node->getSymbol()->getCharPositionInLine() + i + 1, + "Unknown slash escape sequence."); + result << str[i]; + result << str[i + 1]; + break; + } + ++i; // Increments for the character after the slash. + } else { + result << str[i]; + } + } + + return result.str(); +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitNumber( + antlr4::tree::TerminalNode* node, + const ::substrait::proto::Type& literalType) { + ::substrait::proto::Expression_Literal literal; + try { + switch (literalType.kind_case()) { + case ::substrait::proto::Type::kI8: { + int32_t val = std::stoi(node->getText()); + literal.set_i8(val); + if (literalType.i8().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { + literal.set_nullable(true); + } + break; + } + case ::substrait::proto::Type::kI16: { + int32_t val = std::stoi(node->getText()); + literal.set_i16(val); + if (literalType.i16().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { + literal.set_nullable(true); + } + break; + } + case ::substrait::proto::Type::kI32: { + int32_t val = std::stoi(node->getText()); + literal.set_i32(val); + if (literalType.i32().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { + literal.set_nullable(true); + } + break; + } + case ::substrait::proto::Type::kI64: { + int64_t val = std::stol(node->getText()); + literal.set_i64(val); + if (literalType.i64().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { + literal.set_nullable(true); + } + break; + } + case ::substrait::proto::Type::kFp32: { + float val = std::stof(node->getText()); + literal.set_fp32(val); + if (literalType.fp32().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { + literal.set_nullable(true); + } + break; + } + case ::substrait::proto::Type::kFp64: { + double val = std::stod(node->getText()); + literal.set_fp64(val); + if (literalType.fp64().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { + literal.set_nullable(true); + } + break; + } + case ::substrait::proto::Type::kDecimal: { + auto decimal = ::io::substrait::expression::DecimalLiteral::fromString( + node->getText(), + literalType.decimal().precision(), + literalType.decimal().scale()); + if (!decimal.isValid()) { + errorListener_->addError( + node->getSymbol(), "Could not parse literal as decimal."); + break; + } + *literal.mutable_decimal() = decimal.toProto(); + if (literalType.decimal().nullability() == + ::substrait::proto::Type_Nullability_NULLABILITY_NULLABLE) { + literal.set_nullable(true); + } + break; + } + default: + break; + } + } catch (...) { + errorListener_->addError( + node->getSymbol(), "Could not parse as a numeric literal."); + } + + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitList( + SubstraitPlanParser::ConstantContext* ctx, + const ::substrait::proto::Type& literalType) { + ::substrait::proto::Expression_Literal literal; + + if (ctx->map_literal() != nullptr) { + auto mapLiteral = ANY_CAST( + ::substrait::proto::Expression_Literal, + visitMap_literal(ctx->map_literal())); + if (!mapLiteral.map().key_values().empty()) { + errorListener_->addError( + ctx->getStart(), "Lists only accept literals separated by commas."); + } + *literal.mutable_empty_list()->mutable_type() = literalType.list().type(); + literal.mutable_empty_list()->set_nullability( + literalType.list().nullability()); + } else if (ctx->struct_literal() != nullptr) { + for (const auto& constant : ctx->struct_literal()->constant()) { + *literal.mutable_list()->add_values() = + visitConstantWithType(constant, literalType.list().type()); + } + } + + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitMap( + SubstraitPlanParser::ConstantContext* ctx, + const ::substrait::proto::Type& literalType) { + ::substrait::proto::Expression_Literal literal; + + if (ctx->map_literal() != nullptr) { + literal = ANY_CAST( + ::substrait::proto::Expression_Literal, + visitMapLiteralWithType(ctx->map_literal(), literalType)); + } else if (ctx->struct_literal() != nullptr) { + errorListener_->addError( + ctx->getStart(), + "Map literals require pairs of values separated by colons."); + + *literal.mutable_empty_map() = literalType.map(); + } else { + errorListener_->addError(ctx->getStart(), "Unrecognized map construction."); + *literal.mutable_empty_map() = literalType.map(); + } + + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitStruct( + SubstraitPlanParser::ConstantContext* ctx, + const ::substrait::proto::Type& literalType) { + ::substrait::proto::Expression_Literal literal; + + if (ctx->map_literal() != nullptr) { + // We know that there this is an empty literal, treat as such. + literal.mutable_struct_()->clear_fields(); + } else if (ctx->struct_literal() != nullptr) { + literal = visitStructLiteralWithType( + ctx->struct_literal(), literalType.struct_()); + // Validate that the literal matches the type. + } + + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitIntervalYear( + SubstraitPlanParser::ConstantContext* ctx) { + ::substrait::proto::Type literalType; + literalType.mutable_struct_()->add_types()->mutable_i32(); + literalType.mutable_struct_()->add_types()->mutable_i32(); + + auto s = visitStruct(ctx, literalType); + + ::substrait::proto::Expression_Literal literal; + literal.mutable_interval_year_to_month()->set_years( + s.struct_().fields(0).i32()); + literal.mutable_interval_year_to_month()->set_months( + s.struct_().fields(1).i32()); + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitIntervalDay( + SubstraitPlanParser::ConstantContext* ctx) { + ::substrait::proto::Type literalType; + literalType.mutable_struct_()->add_types()->mutable_i32(); + literalType.mutable_struct_()->add_types()->mutable_i32(); + literalType.mutable_struct_()->add_types()->mutable_i32(); + + auto s = visitStruct(ctx, literalType); + + ::substrait::proto::Expression_Literal literal; + literal.mutable_interval_day_to_second()->set_days( + s.struct_().fields(0).i32()); + literal.mutable_interval_day_to_second()->set_seconds( + s.struct_().fields(1).i32()); + literal.mutable_interval_day_to_second()->set_microseconds( + s.struct_().fields(2).i32()); + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitTimestamp( + SubstraitPlanParser::ConstantContext* ctx) { + ::substrait::proto::Type literalType; + literalType.mutable_timestamp(); + + auto s = visitString(ctx->STRING(), literalType); + + date::sys_seconds timePoint; + std::istringstream ss(s.string()); + ss >> date::parse("%Y-%m-%d %H:%M:%S", timePoint); + if (ss.fail()) { + errorListener_->addError(ctx->getStart(), "Could not parse timestamp."); + return {}; + } + + ::substrait::proto::Expression_Literal literal; + literal.set_timestamp(std::chrono::system_clock::to_time_t(timePoint)); + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitTimestampTz( + SubstraitPlanParser::ConstantContext* ctx) { + ::substrait::proto::Type literalType; + literalType.mutable_timestamp_tz(); + + auto s = visitString(ctx->STRING(), literalType); + + date::local_time localTimePoint; + std::string tzName; + std::istringstream ss{s.string()}; + ss >> date::parse("%Y-%m-%d %H:%M:%S %Z", localTimePoint, tzName); + if (ss.fail()) { + errorListener_->addError( + ctx->getStart(), "Could not parse timestamp with timezone."); + return {}; + } + // TODO -- Support timezones like -0100 using %z. #60 + + ::substrait::proto::Expression_Literal literal; + try { + auto timePoint = date::locate_zone(tzName)->to_sys(localTimePoint); + literal.set_timestamp_tz(std::chrono::system_clock::to_time_t(timePoint)); + } catch (...) { + errorListener_->addError( + ctx->getStart(), "Failed to locate timezone in the TZ database."); + } + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitDate( + SubstraitPlanParser::ConstantContext* ctx) { + ::substrait::proto::Type literalType; + literalType.mutable_date(); + + auto s = visitString(ctx->STRING(), literalType); + + date::sys_days timePoint; + std::istringstream ss(s.string()); + ss >> date::parse("%Y-%m-%d", timePoint); + if (ss.fail()) { + errorListener_->addError(ctx->getStart(), "Could not parse date."); + return {}; + } + + ::substrait::proto::Expression_Literal literal; + auto duration = timePoint - date::sys_days(); + literal.set_date(duration.count()); + return literal; +} + +::substrait::proto::Expression_Literal +SubstraitPlanSubqueryRelationVisitor::visitTime( + SubstraitPlanParser::ConstantContext* ctx) { + ::substrait::proto::Type literalType; + literalType.mutable_time(); + + auto s = visitString(ctx->STRING(), literalType); + + std::chrono::microseconds timeOfDay; + std::istringstream ss(s.string()); + ss >> date::parse("%H:%M:%S", timeOfDay); + if (ss.fail()) { + ss = std::istringstream(s.string()); + ss >> date::parse("%H:%M", timeOfDay); + if (ss.fail()) { + errorListener_->addError(ctx->getStart(), "Could not parse time."); + return {}; + } + } + + ::substrait::proto::Expression_Literal literal; + literal.set_time(timeOfDay.count()); + return literal; +} + +std::any SubstraitPlanSubqueryRelationVisitor::visitSort_field( + SubstraitPlanParser::Sort_fieldContext* ctx) { + ::substrait::proto::SortField sort; + *sort.mutable_expr() = ANY_CAST( + ::substrait::proto::Expression, visitExpression(ctx->expression())); + if (ctx->id() != nullptr) { + sort.set_direction(static_cast<::substrait::proto::SortField_SortDirection>( + visitSortDirection(ctx->id()))); + } + return sort; +} + +int32_t SubstraitPlanSubqueryRelationVisitor::visitSortDirection( + SubstraitPlanParser::IdContext* ctx) { + std::string text = normalizeProtoEnum(ctx->getText(), kSortDirectionPrefix); + if (text == "unspecified") { + return ::substrait::proto::SortField::SORT_DIRECTION_UNSPECIFIED; + } else if (text == "ascnullsfirst") { + return ::substrait::proto::SortField::SORT_DIRECTION_ASC_NULLS_FIRST; + } else if (text == "ascnullslast") { + return ::substrait::proto::SortField::SORT_DIRECTION_ASC_NULLS_LAST; + } else if (text == "descnullsfirst") { + return ::substrait::proto::SortField::SORT_DIRECTION_DESC_NULLS_FIRST; + } else if (text == "descnullslast") { + return ::substrait::proto::SortField::SORT_DIRECTION_DESC_NULLS_LAST; + } else if (text == "clustered") { + return ::substrait::proto::SortField::SORT_DIRECTION_CLUSTERED; + } + this->errorListener_->addError( + ctx->getStart(), "Unrecognized sort direction: " + ctx->getText()); + return ::substrait::proto::SortField::SORT_DIRECTION_UNSPECIFIED; +} + +void SubstraitPlanSubqueryRelationVisitor::addExpressionsToSchema( + std::shared_ptr& relationData) { + const auto& relation = relationData->relation; + switch (relation.rel_type_case()) { + case ::substrait::proto::Rel::kProject: + for (const auto& expr : relation.project().expressions()) { + if (expr.selection().direct_reference().has_struct_field()) { + if (expr.selection().direct_reference().struct_field().field() < + relationData->fieldReferences.size()) { + relationData->generatedFieldReferences.push_back( + relationData->fieldReferences[expr.selection() + .direct_reference() + .struct_field() + .field()]); + } + } else { + auto newSymbol = symbolTable_->defineSymbol( + relationData->generatedFieldReferenceAliases + [relationData->generatedFieldReferences.size()], + PROTO_LOCATION(expr), + SymbolType::kUnknown, + std::nullopt, + std::nullopt); + relationData->generatedFieldReferences.push_back(newSymbol); + } + } + break; + default: + // Only project and aggregate relations affect the output mapping. + break; + } +} + +std::string SubstraitPlanSubqueryRelationVisitor::fullyQualifiedReference( + const SymbolInfo* fieldReference) { + for (const auto& symbol : symbolTable_->getSymbols()) { + if (symbol->type == SymbolType::kSchema && + symbol->sourceLocation == fieldReference->sourceLocation) { + auto fqn = symbol->name + "." + fieldReference->name; + return fqn; + } + } + // Shouldn't happen, but return no schema if we can't find one. + return fieldReference->name; +} + +std::pair +SubstraitPlanSubqueryRelationVisitor::findFieldReferenceByName( + antlr4::Token* token, + const SymbolInfo* symbol, + std::shared_ptr& relationData, + const std::string& name) { + auto fieldReferencesSize = relationData->fieldReferences.size(); + + auto generatedField = std::find_if( + relationData->generatedFieldReferences.rbegin(), + relationData->generatedFieldReferences.rend(), + [&](auto ref) { + return (!ref->alias.empty() && ref->alias == name || ref->name == name); + }); + if (generatedField != relationData->generatedFieldReferences.rend()) { + auto fieldPlacement = + generatedField - relationData->generatedFieldReferences.rbegin(); + return { + 0, + static_cast( + (fieldReferencesSize + + relationData->generatedFieldReferences.size() - fieldPlacement - + 1) & + std::numeric_limits::max())}; + } + + auto field = std::find_if( + relationData->fieldReferences.rbegin(), + relationData->fieldReferences.rend(), + [&](auto ref) { + return ( + !ref->alias.empty() && ref->alias == name || ref->name == name || + fullyQualifiedReference(ref) == name); + }); + + if (field != relationData->fieldReferences.rend()) { + auto fieldPlacement = field - relationData->fieldReferences.rbegin(); + return { + 0, + static_cast( + (fieldReferencesSize - fieldPlacement - 1) & + std::numeric_limits::max())}; + } + + auto actualParentQueryLocation = getParentQueryLocation(symbol, relationData); + if (actualParentQueryLocation != Location::kUnknownLocation) { + auto parentSymbol = symbolTable_->lookupSymbolByLocationAndType( + actualParentQueryLocation, SymbolType::kRelation); + if (parentSymbol != nullptr) { + // This symbol is not in the current scope, try an outer one. + auto parentRelationData = + ANY_CAST(std::shared_ptr, parentSymbol->blob); + + auto [stepsOut, fieldReference] = findFieldReferenceByName( + token, parentSymbol, parentRelationData, name); + if (fieldReference != -1) { + return {stepsOut + 1, fieldReference}; + } + // Not found but already reported. + return {stepsOut + 1, -1}; + } + } + + errorListener_->addError(token, "Reference " + name + " does not exist."); + return {0, -1}; +} + +void SubstraitPlanSubqueryRelationVisitor::applyOutputMappingToSchema( + antlr4::Token* token, + RelationType relationType, + std::shared_ptr& relationData) { + auto common = findCommonRelation(relationType, &relationData->relation); + if (common == nullptr) { + return; + } + if (common->emit().output_mapping_size() == 0) { + common->mutable_direct(); + } else { + if (!relationData->outputFieldReferences.empty()) { + // TODO -- Add support for aggregate relations. + errorListener_->addError( + token, "Aggregate relations do not yet support emit sections."); + return; + } + for (auto mapping : common->emit().output_mapping()) { + auto fieldReferencesSize = relationData->fieldReferences.size(); + if (mapping < fieldReferencesSize) { + relationData->outputFieldReferences.push_back( + relationData->fieldReferences[mapping]); + } else if ( + mapping < + fieldReferencesSize + relationData->generatedFieldReferences.size()) { + relationData->outputFieldReferences.push_back( + relationData + ->generatedFieldReferences[mapping - fieldReferencesSize]); + } else { + errorListener_->addError( + token, + "Field #" + std::to_string(mapping) + " requested but only " + + std::to_string( + fieldReferencesSize + + relationData->generatedFieldReferences.size()) + + " are available."); + } + } + } +} + +bool SubstraitPlanSubqueryRelationVisitor::isWithinSubquery( + SubstraitPlanParser::RelationContext* ctx) { + auto symbol = symbolTable_->lookupSymbolByLocationAndType( + PARSER_LOCATION(ctx), SymbolType::kRelation); + if (symbol->parentQueryLocation != Location::kUnknownLocation) { + return true; + } + + // Also check our scope. + return currentRelationScope_->parentQueryLocation != + Location::kUnknownLocation; +} + +} // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.h b/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.h new file mode 100644 index 00000000..7795f204 --- /dev/null +++ b/src/substrait/textplan/parser/SubstraitPlanSubqueryRelationVisitor.h @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +#include "SubstraitPlanParser/SubstraitPlanParser.h" +#include "substrait/textplan/SymbolTable.h" +#include "substrait/textplan/parser/SubstraitParserErrorListener.h" +#include "substrait/textplan/parser/SubstraitPlanTypeVisitor.h" + +namespace substrait::proto { +class Expression; +class Expression_Literal; +class Expression_Literal_Map_KeyValue; +class NamedStruct; +class Type; +class Type_Struct; +} // namespace substrait::proto + +namespace io::substrait::textplan { + +class RelationData; + +class SubstraitPlanSubqueryRelationVisitor : public SubstraitPlanTypeVisitor { + public: + SubstraitPlanSubqueryRelationVisitor( + const SymbolTable& symbolTable, + std::shared_ptr errorListener) + : SubstraitPlanTypeVisitor(symbolTable, std::move(errorListener)) {} + + [[nodiscard]] std::shared_ptr getSymbolTable() const { + return symbolTable_; + }; + + [[nodiscard]] std::shared_ptr getErrorListener() + const { + return errorListener_; + }; + + std::any aggregateResult(std::any aggregate, std::any nextResult) override; + + std::any visitRelation(SubstraitPlanParser::RelationContext* ctx) override; + + // visitRelationDetail is a new method delegating to the methods below. + std::any visitRelationDetail( + SubstraitPlanParser::Relation_detailContext* ctx); + + std::any visitRelation_filter_behavior( + SubstraitPlanParser::Relation_filter_behaviorContext* ctx) override; + + std::any visitRelationFilter( + SubstraitPlanParser::RelationFilterContext* ctx) override; + + std::any visitRelationUsesSchema( + SubstraitPlanParser::RelationUsesSchemaContext* ctx) override; + + std::any visitRelationExpression( + SubstraitPlanParser::RelationExpressionContext* ctx) override; + + std::any visitRelationGrouping( + SubstraitPlanParser::RelationGroupingContext* ctx) override; + + std::any visitRelationMeasure( + SubstraitPlanParser::RelationMeasureContext* ctx) override; + + std::any visitRelationJoinType( + SubstraitPlanParser::RelationJoinTypeContext* ctx) override; + + std::any visitRelationEmit( + SubstraitPlanParser::RelationEmitContext* ctx) override; + + int32_t visitAggregationInvocation(SubstraitPlanParser::IdContext* ctx); + + int32_t visitAggregationPhase(SubstraitPlanParser::IdContext* ctx); + + std::any visitMeasure_detail( + SubstraitPlanParser::Measure_detailContext* ctx) override; + + std::any visitRelationSourceReference( + SubstraitPlanParser::RelationSourceReferenceContext* ctx) override; + + std::any visitRelationSort( + SubstraitPlanParser::RelationSortContext* ctx) override; + + std::any visitRelationCount( + SubstraitPlanParser::RelationCountContext* ctx) override; + + // visitExpression is a new method delegating to the methods below. + std::any visitExpression(SubstraitPlanParser::ExpressionContext* ctx); + + // visitExpressionIfThenUse handles the built-in IFTHEN function-like syntax. + ::substrait::proto::Expression visitExpressionIfThenUse( + SubstraitPlanParser::ExpressionFunctionUseContext* ctx); + + std::any visitExpressionFunctionUse( + SubstraitPlanParser::ExpressionFunctionUseContext* ctx) override; + + std::any visitExpressionConstant( + SubstraitPlanParser::ExpressionConstantContext* ctx) override; + + std::any visitExpressionCast( + SubstraitPlanParser::ExpressionCastContext* ctx) override; + + std::any visitExpressionColumn( + SubstraitPlanParser::ExpressionColumnContext* ctx) override; + + std::any visitExpressionScalarSubquery( + SubstraitPlanParser::ExpressionScalarSubqueryContext* ctx) override; + + std::any visitExpressionInPredicateSubquery( + SubstraitPlanParser::ExpressionInPredicateSubqueryContext* ctx) override; + + std::any visitExpressionSetPredicateSubquery( + SubstraitPlanParser::ExpressionSetPredicateSubqueryContext* ctx) override; + + std::any visitExpressionSetComparisonSubquery( + SubstraitPlanParser::ExpressionSetComparisonSubqueryContext* ctx) + override; + + std::any visitExpression_list( + SubstraitPlanParser::Expression_listContext* ctx) override; + + std::any visitConstant(SubstraitPlanParser::ConstantContext* ctx) override; + + std::any visitMap_literal( + SubstraitPlanParser::Map_literalContext* ctx) override; + + std::any visitMap_literal_value( + SubstraitPlanParser::Map_literal_valueContext* ctx) override; + + std::any visitStruct_literal( + SubstraitPlanParser::Struct_literalContext* ctx) override; + + std::any visitColumn_name( + SubstraitPlanParser::Column_nameContext* ctx) override; + + std::any visitSort_field( + SubstraitPlanParser::Sort_fieldContext* ctx) override; + + ::substrait::proto::Expression_Literal visitConstantWithType( + SubstraitPlanParser::ConstantContext* ctx, + const ::substrait::proto::Type& literalType); + + ::substrait::proto::Expression_Literal_Map_KeyValue + visitMapLiteralValueWithType( + SubstraitPlanParser::Map_literal_valueContext* ctx, + const ::substrait::proto::Type& literalType); + + ::substrait::proto::Expression_Literal visitMapLiteralWithType( + SubstraitPlanParser::Map_literalContext* ctx, + const ::substrait::proto::Type& literalType); + + ::substrait::proto::Expression_Literal visitStructLiteralWithType( + SubstraitPlanParser::Struct_literalContext* ctx, + const ::substrait::proto::Type_Struct& literalType); + + ::substrait::proto::Expression_Literal visitString( + antlr4::tree::TerminalNode* node, + const ::substrait::proto::Type& literalType); + + ::substrait::proto::Expression_Literal visitNumber( + antlr4::tree::TerminalNode* node, + const ::substrait::proto::Type& literalType); + + ::substrait::proto::Expression_Literal visitList( + SubstraitPlanParser::ConstantContext* ctx, + const ::substrait::proto::Type& literalType); + + ::substrait::proto::Expression_Literal visitMap( + SubstraitPlanParser::ConstantContext* ctx, + const ::substrait::proto::Type& literalType); + + ::substrait::proto::Expression_Literal visitStruct( + SubstraitPlanParser::ConstantContext* ctx, + const ::substrait::proto::Type& literalType); + + ::substrait::proto::Expression_Literal visitIntervalYear( + SubstraitPlanParser::ConstantContext* ctx); + + ::substrait::proto::Expression_Literal visitIntervalDay( + SubstraitPlanParser::ConstantContext* ctx); + + ::substrait::proto::Expression_Literal visitTimestamp( + SubstraitPlanParser::ConstantContext* ctx); + + ::substrait::proto::Expression_Literal visitTimestampTz( + SubstraitPlanParser::ConstantContext* ctx); + + ::substrait::proto::Expression_Literal visitDate( + SubstraitPlanParser::ConstantContext* ctx); + + ::substrait::proto::Expression_Literal visitTime( + SubstraitPlanParser::ConstantContext* ctx); + + int32_t visitSortDirection(SubstraitPlanParser::IdContext* ctx); + + private: + std::string escapeText( + const antlr4::tree::TerminalNode* node, + const std::string& str); + + void addExpressionsToSchema(std::shared_ptr& relationData); + + void applyOutputMappingToSchema( + antlr4::Token* token, + RelationType relationType, + std::shared_ptr& relationData); + + std::string fullyQualifiedReference(const SymbolInfo* fieldReference); + + std::pair findFieldReferenceByName( + antlr4::Token* token, + const SymbolInfo* symbol, + std::shared_ptr& relationData, + const std::string& name); + + bool isWithinSubquery(SubstraitPlanParser::RelationContext* ctx); + + const SymbolInfo* currentRelationScope_{nullptr}; // Not owned. +}; + +} // namespace io::substrait::textplan diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp index e3d47c5a..67fcc49f 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp @@ -208,7 +208,7 @@ std::any SubstraitPlanVisitor::visitSchema_definition( continue; } symbol->schema = schemaSymbol; - symbol->location = Location(ctx); + symbol->sourceLocation = Location(ctx); } return nullptr; @@ -305,25 +305,20 @@ std::any SubstraitPlanVisitor::visitRelation_type( std::transform(id.begin(), id.end(), id.begin(), [](unsigned char c) { return std::tolower(c); }); - // TODO -- Replace this with a handcrafted function or a trie. - if (id == "read") { - return RelationType::kRead; - } else if (id == "project") { - return RelationType::kProject; - } else if (id == "join") { - return RelationType::kJoin; - } else if (id == "cross") { - return RelationType::kCross; - } else if (id == "fetch") { - return RelationType::kFetch; - } else if (id == "aggregate") { - return RelationType::kAggregate; - } else if (id == "sort") { - return RelationType::kSort; - } else if (id == "filter") { - return RelationType::kFilter; - } else if (id == "set") { - return RelationType::kSet; + std::unordered_map relationTypeMap = { + {"read", RelationType::kRead}, + {"project", RelationType::kProject}, + {"join", RelationType::kJoin}, + {"cross", RelationType::kCross}, + {"fetch", RelationType::kFetch}, + {"aggregate", RelationType::kAggregate}, + {"sort", RelationType::kSort}, + {"filter", RelationType::kFilter}, + {"set", RelationType::kSet} + }; + auto it = relationTypeMap.find(id); + if (it != relationTypeMap.end()) { + return it->second; } this->errorListener_->addError( ctx->getStart(), "Unrecognized relation type: " + ctx->getText()); @@ -573,7 +568,7 @@ std::any SubstraitPlanVisitor::visitRelation_ref( SubstraitPlanParser::Relation_refContext* ctx) { auto rel = ANY_CAST(std::string, visitId(ctx->id(0))); if (ctx->id().size() > 1) { - visitId(ctx->id(1)); // TODO -- Make use of the reference. + visitId(ctx->id(1)); // TODO -- Make use of the schema reference. } return rel; } diff --git a/src/substrait/textplan/parser/Tool.cpp b/src/substrait/textplan/parser/Tool.cpp index 84302d9c..3c31bca6 100644 --- a/src/substrait/textplan/parser/Tool.cpp +++ b/src/substrait/textplan/parser/Tool.cpp @@ -15,7 +15,7 @@ void readText(const char* filename) { std::cerr << "An error occurred while reading: " << filename << std::endl; return; } - auto parseResult = io::substrait::textplan::parseStream(*stream); + auto parseResult = io::substrait::textplan::parseStream(&*stream); if (!parseResult.successful()) { for (const std::string& msg : parseResult.getAllErrors()) { std::cout << msg << std::endl; @@ -23,7 +23,17 @@ void readText(const char* filename) { return; } - std::cout << SymbolTablePrinter::outputToText(parseResult.getSymbolTable()); + SubstraitErrorListener errorListener; + auto text = SymbolTablePrinter::outputToText( + parseResult.getSymbolTable(), &errorListener); + if (errorListener.hasErrors()) { + for (const std::string& msg : errorListener.getErrorMessages()) { + std::cout << msg << std::endl; + } + return; + } + + std::cout << text; } } // namespace diff --git a/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 b/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 index 355a38c7..adbfc9f7 100644 --- a/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 +++ b/src/substrait/textplan/parser/grammar/SubstraitPlanLexer.g4 @@ -46,6 +46,14 @@ COUNT: 'COUNT'; TYPE: 'TYPE'; EMIT: 'EMIT'; +SUBQUERY: 'SUBQUERY'; +EXISTS: 'EXISTS'; +UNIQUE: 'UNIQUE'; +IN: 'IN'; +OR: 'OR'; +ALL: 'ALL'; +COMPARISON: 'EQ'|'NE'|'LT'|'GT'|'LE'|'GE'; + VIRTUAL_TABLE: 'VIRTUAL_TABLE'; LOCAL_FILES: 'LOCAL_FILES'; NAMED_TABLE: 'NAMED_TABLE'; @@ -92,7 +100,7 @@ QUESTIONMARK: '?'; ATSIGN: '@'; IDENTIFIER - : [A-Z][A-Z0-9]* + : [A-Z][A-Z0-9$]* ; NUMBER diff --git a/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 b/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 index 3613ce8a..9bbc1e0a 100644 --- a/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 +++ b/src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4 @@ -87,9 +87,17 @@ relation_detail expression : id LEFTPAREN (expression COMMA?)* RIGHTPAREN (ARROW literal_complex_type)? # expressionFunctionUse - | constant # expressionConstant - | column_name # expressionColumn - | expression AS literal_complex_type # expressionCast + | constant # expressionConstant + | column_name # expressionColumn + | expression AS literal_complex_type # expressionCast + | SUBQUERY relation_ref # expressionScalarSubquery + | expression_list IN SUBQUERY relation_ref # expressionInPredicateSubquery + | (UNIQUE|EXISTS) IN SUBQUERY relation_ref (OR SUBQUERY relation_ref)* # expressionSetPredicateSubquery + | expression COMPARISON ALL SUBQUERY relation_ref # expressionSetComparisonSubquery + ; + +expression_list + : LEFTPAREN expression ( COMMA expression )* RIGHTPAREN ; constant @@ -226,4 +234,7 @@ simple_id | TYPE | EMIT | NAMED + | ALL + | COMPARISON + | OR ; diff --git a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp index 307835f5..fa7c9b78 100644 --- a/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp +++ b/src/substrait/textplan/parser/tests/TextPlanParserTest.cpp @@ -1061,9 +1061,12 @@ std::vector getTestCases() { "1:0 → extraneous input 'relation' expecting {, " "'EXTENSION_SPACE', 'NAMED', 'SCHEMA', 'PIPELINES', 'FILTER', " "'GROUPING', 'MEASURE', 'SORT', 'COUNT', 'TYPE', 'EMIT', " + "'OR', 'ALL', COMPARISON, " "'SOURCE', 'ROOT', 'NULL', IDENTIFIER}", "1:24 → mismatched input '{' expecting 'RELATION'", "1:9 → Unrecognized relation type: notyperelation", + "1:9 → Internal error: Previously encountered symbol " + "went missing.", }), }, { @@ -1182,7 +1185,7 @@ std::vector getTestCases() { TEST(TextPlanParser, LoadFromFile) { auto stream = loadTextFile("data/provided_sample1.splan"); ASSERT_TRUE(stream.has_value()) << "Test input file missing."; - auto result = parseStream(*stream); + auto result = parseStream(&*stream); ASSERT_THAT(result, ParsesOk()); } @@ -1190,7 +1193,7 @@ TEST_P(TextPlanParserTestFixture, Parse) { auto [name, input, matcher] = GetParam(); auto stream = loadTextString(input); - auto result = parseStream(stream); + auto result = parseStream(&stream); ASSERT_THAT(result, matcher); } diff --git a/src/substrait/textplan/tests/ParseResultMatchers.cpp b/src/substrait/textplan/tests/ParseResultMatchers.cpp index b83baca1..5801ae2b 100644 --- a/src/substrait/textplan/tests/ParseResultMatchers.cpp +++ b/src/substrait/textplan/tests/ParseResultMatchers.cpp @@ -218,8 +218,10 @@ class WhenSerializedMatcher { bool MatchAndExplain( const ParseResult& result, ::testing::MatchResultListener* listener) const { - std::string outputText = - SymbolTablePrinter::outputToText(result.getSymbolTable()); + SubstraitErrorListener errorListener; + std::string outputText = SymbolTablePrinter::outputToText( + result.getSymbolTable(), &errorListener); + // Ignores errors as other matchers check for that. return MatchPrintAndExplain(outputText, stringMatcher_, listener); } diff --git a/src/substrait/textplan/tests/RoundtripTest.cpp b/src/substrait/textplan/tests/RoundtripTest.cpp index 42f447c0..89068d28 100644 --- a/src/substrait/textplan/tests/RoundtripTest.cpp +++ b/src/substrait/textplan/tests/RoundtripTest.cpp @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: Apache-2.0 */ +#include + #include #include #include @@ -79,17 +81,20 @@ TEST_P(RoundTripBinaryToTextFixture, RoundTrip) { auto textResult = parseBinaryPlan(plan); auto textSymbols = textResult.getSymbolTable().getSymbols(); + SubstraitErrorListener errorListener; std::string outputText = - SymbolTablePrinter::outputToText(textResult.getSymbolTable()); + SymbolTablePrinter::outputToText(textResult.getSymbolTable(), + &errorListener); + textResult.addErrors(errorListener.getErrorMessages()); - ASSERT_THAT(textResult, AllOf(ParsesOk(), HasErrors({}))) + ASSERT_THAT(textResult, ParsesOk()) << std::endl << "Initial result:" << std::endl << addLineNumbers(outputText) << std::endl << textResult.getSymbolTable().toDebugString() << std::endl; auto stream = loadTextString(outputText); - auto result = parseStream(stream); + auto result = parseStream(&stream); ASSERT_NO_THROW(auto outputBinary = SymbolTablePrinter::outputToBinaryPlan( result.getSymbolTable());); diff --git a/src/substrait/textplan/tests/SymbolTableTest.cpp b/src/substrait/textplan/tests/SymbolTableTest.cpp index 319592ef..042419ad 100644 --- a/src/substrait/textplan/tests/SymbolTableTest.cpp +++ b/src/substrait/textplan/tests/SymbolTableTest.cpp @@ -121,23 +121,27 @@ TEST_F(SymbolTableTest, LocationsUnchangedAfterCopy) { symbols[2]->blob), ::testing::Eq(ptr3)); - ASSERT_THAT(symbols[0]->location, ::testing::Eq(symbols[0]->location)); ASSERT_THAT( - symbols[0]->location, - ::testing::Not(::testing::Eq(symbols[1]->location))); + symbols[0]->sourceLocation, ::testing::Eq(symbols[0]->sourceLocation)); ASSERT_THAT( - symbols[0]->location, - ::testing::Not(::testing::Eq(symbols[2]->location))); + symbols[0]->sourceLocation, + ::testing::Not(::testing::Eq(symbols[1]->sourceLocation))); ASSERT_THAT( - symbols[1]->location, - ::testing::Not(::testing::Eq(symbols[2]->location))); + symbols[0]->sourceLocation, + ::testing::Not(::testing::Eq(symbols[2]->sourceLocation))); + ASSERT_THAT( + symbols[1]->sourceLocation, + ::testing::Not(::testing::Eq(symbols[2]->sourceLocation))); ASSERT_THAT( - table.getSymbols()[0]->location, ::testing::Eq(symbols[0]->location)); + table.getSymbols()[0]->sourceLocation, + ::testing::Eq(symbols[0]->sourceLocation)); ASSERT_THAT( - table.getSymbols()[1]->location, ::testing::Eq(symbols[1]->location)); + table.getSymbols()[1]->sourceLocation, + ::testing::Eq(symbols[1]->sourceLocation)); ASSERT_THAT( - table.getSymbols()[2]->location, ::testing::Eq(symbols[2]->location)); + table.getSymbols()[2]->sourceLocation, + ::testing::Eq(symbols[2]->sourceLocation)); } } // namespace