-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add support for emit in aggregate relations #122
Changes from 15 commits
fb5519e
d3dea20
c56d59b
f399361
f2f4245
054b2ed
522e999
ed0731c
517c705
181332d
737dadf
0c4ea54
51abce4
6b64b76
bd114dc
ce112d7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -316,6 +316,13 @@ bool isRelationEmitDetail(SubstraitPlanParser::Relation_detailContext* ctx) { | |
nullptr; | ||
} | ||
|
||
bool isAggregate(const SymbolInfo* symbol) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are there two implementations of this function (PlanPrinterVisitor.cpp:74)? The definitions are even different. If the difference is intentional, that's definitely worth a comment or at least renaming the functions to clarify their distinct behaviors. If the difference is not intentional, could this become There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Long term there shouldn't be this function at all and just a single type in the representation. Right now the intermediate form can have both types (depending on how it was parsed) which needs to be addressed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, a comment would definitely be advisable just to give future readers a clue that this is a stopgap and that in the future there should only be |
||
if (const auto typeCase = ANY_CAST_IF(RelationType, symbol->subtype)) { | ||
return (typeCase == RelationType::kAggregate); | ||
} | ||
return false; | ||
} | ||
|
||
} // namespace | ||
|
||
std::any SubstraitPlanRelationVisitor::aggregateResult( | ||
|
@@ -819,7 +826,9 @@ std::any SubstraitPlanRelationVisitor::visitRelationEmit( | |
SymbolType::kRelation); | ||
auto parentRelationData = | ||
ANY_CAST(std::shared_ptr<RelationData>, parentSymbol->blob); | ||
this->processingEmit_ = true; | ||
auto result = visitChildren(ctx); | ||
this->processingEmit_ = false; | ||
auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); | ||
auto common = | ||
findCommonRelation(parentRelationType, &parentRelationData->relation); | ||
|
@@ -2023,6 +2032,9 @@ std::pair<int, int> SubstraitPlanRelationVisitor::findFieldReferenceByName( | |
std::shared_ptr<RelationData>& relationData, | ||
const std::string& name) { | ||
auto fieldReferencesSize = relationData->fieldReferences.size(); | ||
if (isAggregate(symbol) && this->processingEmit_) { | ||
fieldReferencesSize = 0; | ||
} | ||
|
||
auto generatedField = std::find_if( | ||
relationData->generatedFieldReferences.rbegin(), | ||
|
@@ -2075,10 +2087,19 @@ void SubstraitPlanRelationVisitor::applyOutputMappingToSchema( | |
if (common->emit().output_mapping_size() == 0) { | ||
common->mutable_direct(); | ||
} else { | ||
if (!relationData->outputFieldReferences.empty()) { | ||
// TODO -- Add support for aggregate relations. | ||
errorListener_->addError( | ||
token, "Aggregate relations do not yet support emit sections."); | ||
if (relationData->relation.has_aggregate()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks very similar to the code at the end of |
||
auto oldReferences = relationData->outputFieldReferences; | ||
relationData->outputFieldReferences.clear(); | ||
for (auto mapping : common->emit().output_mapping()) { | ||
if (mapping < oldReferences.size()) { | ||
relationData->outputFieldReferences.push_back(oldReferences[mapping]); | ||
} else { | ||
errorListener_->addError( | ||
token, | ||
"Field #" + std::to_string(mapping) + " requested but only " + | ||
std::to_string(oldReferences.size()) + " are available."); | ||
} | ||
} | ||
return; | ||
} | ||
for (auto mapping : common->emit().output_mapping()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,8 +33,6 @@ const std::string kAggregationInvocationPrefix = "aggregationinvocation"; | |
const std::string kJoinTypePrefix = "jointype"; | ||
const std::string kSortDirectionPrefix = "sortdirection"; | ||
|
||
const std::string kIntermediateNodeName = "intermediate"; | ||
|
||
enum RelationFilterBehavior { | ||
kDefault = 0, | ||
kBestEffort = 1, | ||
|
@@ -374,6 +372,13 @@ comparisonToProto(const std::string& text) { | |
Expression_Subquery_SetComparison_ComparisonOp_COMPARISON_OP_UNSPECIFIED; | ||
} | ||
|
||
bool isAggregate(const SymbolInfo* symbol) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
if (const auto typeCase = ANY_CAST_IF(RelationType, symbol->subtype)) { | ||
return (typeCase == RelationType::kAggregate); | ||
} | ||
return false; | ||
} | ||
|
||
} // namespace | ||
|
||
std::any SubstraitPlanSubqueryRelationVisitor::aggregateResult( | ||
|
@@ -871,7 +876,9 @@ std::any SubstraitPlanSubqueryRelationVisitor::visitRelationEmit( | |
SymbolType::kRelation); | ||
auto parentRelationData = | ||
ANY_CAST(std::shared_ptr<RelationData>, parentSymbol->blob); | ||
this->processingEmit_ = true; | ||
auto result = visitChildren(ctx); | ||
this->processingEmit_ = false; | ||
auto parentRelationType = ANY_CAST(RelationType, parentSymbol->subtype); | ||
auto common = | ||
findCommonRelation(parentRelationType, &parentRelationData->relation); | ||
|
@@ -2163,6 +2170,9 @@ SubstraitPlanSubqueryRelationVisitor::findFieldReferenceByName( | |
std::shared_ptr<RelationData>& relationData, | ||
const std::string& name) { | ||
auto fieldReferencesSize = relationData->fieldReferences.size(); | ||
if (isAggregate(symbol) && this->processingEmit_) { | ||
fieldReferencesSize = 0; | ||
} | ||
|
||
auto generatedField = std::find_if( | ||
relationData->generatedFieldReferences.rbegin(), | ||
|
@@ -2234,10 +2244,19 @@ void SubstraitPlanSubqueryRelationVisitor::applyOutputMappingToSchema( | |
if (common->emit().output_mapping_size() == 0) { | ||
common->mutable_direct(); | ||
} else { | ||
if (!relationData->outputFieldReferences.empty()) { | ||
// TODO -- Add support for aggregate relations. | ||
errorListener_->addError( | ||
token, "Aggregate relations do not yet support emit sections."); | ||
if (relationData->relation.has_aggregate()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. more ditto |
||
auto oldReferences = relationData->outputFieldReferences; | ||
relationData->outputFieldReferences.clear(); | ||
for (auto mapping : common->emit().output_mapping()) { | ||
if (mapping < oldReferences.size()) { | ||
relationData->outputFieldReferences.push_back(oldReferences[mapping]); | ||
} else { | ||
errorListener_->addError( | ||
token, | ||
"Field #" + std::to_string(mapping) + " requested but only " + | ||
std::to_string(oldReferences.size()) + " are available."); | ||
} | ||
} | ||
return; | ||
} | ||
for (auto mapping : common->emit().output_mapping()) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Much nicer, thanks. It might be worthwhile to include the type of currentScope (and therefore the range of fieldReferences which would have been valid) below in the out of range error message
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do agree that better error messages are useful but I believe the substrait validator is better suited for helping developers figure what is wrong with their plan generation (allowing this code to be simpler). I've added the type here though.