Skip to content

Commit

Permalink
feat: add root names to the textplan
Browse files Browse the repository at this point in the history
Root names are added to both the parser and converter in this PR.  The root relation is not being treated as a relation internally and instead is treated merely as annotation containing a list of names.  This is primarily because the relation-related codepaths make assumptions that wouldn't apply to the root (such as having a valid Relation proto as its data type).
  • Loading branch information
EpsilonPrime committed Jun 23, 2023
1 parent 4601d5c commit 111caf6
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/substrait/textplan/SymbolTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ enum class SymbolType {
kSource = 7,
kSourceDetail = 8,
kField = 9,
kRoot = 10,

kUnknown = -1,
};
Expand Down
57 changes: 57 additions & 0 deletions src/substrait/textplan/SymbolTablePrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,44 @@ std::string outputRelationsSection(const SymbolTable& symbolTable) {
return text.str();
}

std::string outputRootSection(const SymbolTable& symbolTable) {
std::stringstream text;
bool hasPreviousText = false;
for (const SymbolInfo& info : symbolTable) {
if (info.type != SymbolType::kRoot) {
continue;
}
auto names = ANY_CAST(std::vector<std::string>, info.blob);
if (names.empty()) {
// No point in printing an empty section.
continue;
}
if (hasPreviousText) {
text << "\n";
}
text << "root {"
<< "\n";
text << " names = [";
bool hadName = false;
for (const auto& name : names) {
if (hadName) {
text << ",\n";
} else {
text << "\n";
}
text << " " << name;
hadName = true;
}
if (hadName) {
text << "\n";
}
text << " ]\n";
text << "}\n";
hasPreviousText = true;
}
return text.str();
}

std::string outputSchemaSection(const SymbolTable& symbolTable) {
std::stringstream text;
bool hasPreviousText = false;
Expand Down Expand Up @@ -427,6 +465,15 @@ std::string SymbolTablePrinter::outputToText(const SymbolTable& symbolTable) {
hasPreviousText = true;
}

newText = outputRootSection(symbolTable);
if (!newText.empty()) {
if (hasPreviousText) {
text << "\n";
}
text << newText;
hasPreviousText = true;
}

newText = outputSchemaSection(symbolTable);
if (!newText.empty()) {
if (hasPreviousText) {
Expand Down Expand Up @@ -676,6 +723,16 @@ ::substrait::proto::Plan SymbolTablePrinter::outputToBinaryPlan(
addInputsToRelation(
*relationData->newPipelines[0],
relation->mutable_root()->mutable_input());

const auto& rootSymbol =
symbolTable.nthSymbolByType(0, SymbolType::kRoot);
if (rootSymbol != SymbolInfo::kUnknown) {
const auto& rootNames =
ANY_CAST(std::vector<std::string>, rootSymbol.blob);
for (const auto& name : rootNames) {
relation->mutable_root()->add_names(name);
}
}
}
}

Expand Down
9 changes: 9 additions & 0 deletions src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,15 @@ std::any InitialPlanProtoVisitor::visitRelation(

std::any InitialPlanProtoVisitor::visitRelationRoot(
const ::substrait::proto::RelRoot& relation) {
auto uniqueName = symbolTable_->getUniqueName("root");
std::vector<std::string> names;
names.insert(names.end(), relation.names().begin(), relation.names().end());
symbolTable_->defineSymbol(
uniqueName,
PROTO_LOCATION(relation),
SymbolType::kRoot,
SourceType::kUnknown,
names);
BasePlanProtoVisitor::visitRelationRoot(relation);
return std::nullopt;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ std::any SubstraitPlanRelationVisitor::visitRelation(
// This error has been previously dealt with thus we can safely skip it.
return defaultResult();
}
if (symbol->type == SymbolType::kRoot) {
return defaultResult();
}
auto relationData = ANY_CAST(std::shared_ptr<RelationData>, symbol->blob);
::substrait::proto::Rel relation;

Expand Down
31 changes: 31 additions & 0 deletions src/substrait/textplan/parser/SubstraitPlanVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
#include "substrait/textplan/Finally.h"
#include "substrait/textplan/Location.h"
#include "substrait/textplan/StructuredSymbolData.h"
#include "substrait/textplan/SymbolTable.h"
#include "substrait/type/Type.h"

namespace io::substrait::textplan {

const std::string kRootName{"root"};

// Removes leading and trailing quotation marks.
std::string extractFromString(std::string s) {
if (s.size() < 2) {
Expand Down Expand Up @@ -160,6 +163,34 @@ std::any SubstraitPlanVisitor::visitSchema_item(

std::any SubstraitPlanVisitor::visitRelation(
SubstraitPlanParser::RelationContext* ctx) {
if (!ctx->id().empty()) {
auto prevRoot = symbolTable_->lookupSymbolByName(kRootName);
if (prevRoot != nullptr) {
if (prevRoot->type == SymbolType::kRoot) {
errorListener_->addError(
ctx->getStart(), "A root relation was already defined.");
} else {
errorListener_->addError(
ctx->getStart(), "A relation named root was already defined.");
}
return nullptr;
}
std::vector<std::string> names;
for (const auto& id : ctx->id()) {
names.push_back(id->getText());
}
symbolTable_->defineSymbol(
kRootName,
Location(ctx),
SymbolType::kRoot,
SourceType::kUnknown,
names);
return nullptr;
}
if (ctx->relation_type() == nullptr) {
errorListener_->addError(ctx->getStart(), "Could not parse this relation.");
return nullptr;
}
auto relType =
ANY_CAST(RelationType, visitRelation_type(ctx->relation_type()));
if (ctx->relation_ref() == nullptr) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ NAMED_TABLE: 'NAMED_TABLE';
EXTENSION_TABLE: 'EXTENSION_TABLE';

SOURCE: 'SOURCE';
ROOT: 'ROOT';
ITEMS: 'ITEMS';
NAMES: 'NAMES';
URI_FILE: 'URI_FILE';
Expand Down
3 changes: 3 additions & 0 deletions src/substrait/textplan/parser/grammar/SubstraitPlanParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pipeline
// TODO -- Make the token order involving ids consistent between relations and other top-level entities.
relation
: relation_type RELATION relation_ref LEFTBRACE relation_detail* RIGHTBRACE
| ROOT LEFTBRACE NAMES EQUAL LEFTBRACKET id (COMMA id)* COMMA? RIGHTBRACKET RIGHTBRACE
;

relation_type
Expand Down Expand Up @@ -209,6 +210,8 @@ id
simple_id
: IDENTIFIER
| FILTER
| ROOT
| SOURCE
| SCHEMA
| NULLVAL
| SORT
Expand Down
2 changes: 1 addition & 1 deletion src/substrait/textplan/parser/tests/TextPlanParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ std::vector<TestCase> getTestCases() {
"1:0 → extraneous input 'relation' expecting {<EOF>, "
"'EXTENSION_SPACE', 'SCHEMA', 'PIPELINES', 'FILTER', "
"'GROUPING', 'MEASURE', 'SORT', 'COUNT', 'TYPE', 'SOURCE', "
"'NULL', IDENTIFIER}",
"'ROOT', 'NULL', IDENTIFIER}",
"1:24 → mismatched input '{' expecting 'RELATION'",
"1:9 → Unrecognized relation type: notyperelation",
}),
Expand Down

0 comments on commit 111caf6

Please sign in to comment.