diff --git a/src/substrait/textplan/StructuredSymbolData.h b/src/substrait/textplan/StructuredSymbolData.h index 97f94bbb..676fa211 100644 --- a/src/substrait/textplan/StructuredSymbolData.h +++ b/src/substrait/textplan/StructuredSymbolData.h @@ -41,15 +41,18 @@ struct RelationData { // Each field reference here was generated within the current relation. std::vector generatedFieldReferences; - // Local aliases for field references in this relation. + // Local aliases for field references in this relation. Used to replace the + // normal form symbols would take for this relation's use only. (Later + // references to the symbol would use the alias.) std::map generatedFieldReferenceAlternativeExpression; // If populated, supersedes the combination of fieldReferences and // generatedFieldReferences for the field symbols exposed by this relation. std::vector outputFieldReferences; - // Contains the field reference names seen so far along with the id of the - // first occurrence. + // Contains the field reference names seen so far while processing this + // relation along with the id of the first occurrence. Used to detect when + // fully qualified references are necessary. std::map seenFieldReferenceNames; }; diff --git a/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp b/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp index 17e6f1ce..dfbdf38f 100644 --- a/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp +++ b/src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp @@ -19,7 +19,7 @@ namespace io::substrait::textplan { namespace { -const std::string kIntermediateNodeName = "intermediate"; +const std::string kIntermediateNodeName{"intermediate"}; const std::string kRootNames{"root.names"}; std::string shortName(std::string str) { @@ -90,7 +90,6 @@ ::google::protobuf::RepeatedField getOutputMapping( return relation.filter().common().emit().output_mapping(); case ::substrait::proto::Rel::kFetch: return relation.fetch().common().emit().output_mapping(); - ; case ::substrait::proto::Rel::kAggregate: return relation.aggregate().common().emit().output_mapping(); case ::substrait::proto::Rel::kSort: @@ -100,7 +99,7 @@ ::google::protobuf::RepeatedField getOutputMapping( case ::substrait::proto::Rel::kProject: return relation.project().common().emit().output_mapping(); case ::substrait::proto::Rel::kSet: - return relation.sort().common().emit().output_mapping(); + return relation.set().common().emit().output_mapping(); case ::substrait::proto::Rel::kExtensionSingle: return relation.extension_single().common().emit().output_mapping(); case ::substrait::proto::Rel::kExtensionMulti: @@ -330,6 +329,7 @@ std::any InitialPlanProtoVisitor::visitNamedStruct( SymbolType::kField, SourceType::kUnknown, type); + nameNum++; } return BasePlanProtoVisitor::visitNamedStruct(named); } diff --git a/src/substrait/textplan/converter/PipelineVisitor.cpp b/src/substrait/textplan/converter/PipelineVisitor.cpp index 218f798b..d63a6f56 100644 --- a/src/substrait/textplan/converter/PipelineVisitor.cpp +++ b/src/substrait/textplan/converter/PipelineVisitor.cpp @@ -118,6 +118,7 @@ std::any PipelineVisitor::visitPlanRelation( const ::substrait::proto::PlanRel& relation) { auto symbols = symbolTable_->lookupSymbolsByLocation(PROTO_LOCATION(relation)); + // A symbol is guaranteed as we previously visited the parse tree. auto relationData = ANY_CAST(std::shared_ptr, symbols[0]->blob); switch (relation.rel_type_case()) { case ::substrait::proto::PlanRel::kRel: { diff --git a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp index dc1a0b32..a8273f12 100644 --- a/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanRelationVisitor.cpp @@ -278,10 +278,12 @@ void addInputFieldsToSchema( auto continuingRelationData = ANY_CAST( std::shared_ptr, relationData->continuingPipeline->blob); if (!continuingRelationData->outputFieldReferences.empty()) { + // There is an emit sequence so use that. for (auto field : continuingRelationData->outputFieldReferences) { relationData->fieldReferences.push_back(field); } } else { + // There was no emit so just access all the field references. for (auto field : continuingRelationData->fieldReferences) { relationData->fieldReferences.push_back(field); } @@ -594,7 +596,7 @@ std::any SubstraitPlanRelationVisitor::visitRelationUsesSchema( auto typeProto = ANY_CAST(::substrait::proto::Type, sym.blob); if (typeProto.kind_case() != ::substrait::proto::Type::KIND_NOT_SET) { *schema->mutable_struct_()->add_types() = typeProto; - // If the schema contains one more types, the structure is required. + // If the schema contains any types, the struct is required. schema->mutable_struct_()->set_nullability( ::substrait::proto::Type_Nullability_NULLABILITY_REQUIRED); } @@ -1161,8 +1163,6 @@ std::any SubstraitPlanRelationVisitor::visitExpressionColumn( // TODO -- Update the following when non-direct references are implemented. expr.mutable_selection()->mutable_root_reference(); } - - // visitChildren(ctx); return expr; } @@ -1990,6 +1990,7 @@ void SubstraitPlanRelationVisitor::applyOutputMappingToSchema( common->mutable_direct(); } else { if (!relationData->outputFieldReferences.empty()) { + // TODO -- Add support for aggregate relations. errorListener_->addError( token, "Aggregate relations do not yet support emit sections."); return; diff --git a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp index a5a756a6..e3d47c5a 100644 --- a/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp +++ b/src/substrait/textplan/parser/SubstraitPlanVisitor.cpp @@ -511,7 +511,6 @@ std::any SubstraitPlanVisitor::visitFile_detail( } else { return visitChildren(ctx); } - // symbol->blob.swap(item); return defaultResult(); }