Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: extension spaces, functions, aggregation relations to binary #67

Merged
merged 34 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6439189
feat: fields, schemas, and sources binary output for the textplan parser
EpsilonPrime May 31, 2023
628cec5
Pulled over from parser-part6.
EpsilonPrime Jun 2, 2023
32380f2
Convert to using type over string.
EpsilonPrime Jun 2, 2023
252b61e
Fix for binary->text nullability for fixedchar and varchar.
EpsilonPrime Jun 2, 2023
8d7d21f
Various fixes
EpsilonPrime Jun 2, 2023
5bb355e
Minor cleanup.
EpsilonPrime Jun 2, 2023
4fbcf67
Fixed more occurrences of nullability not being handled correctly.
EpsilonPrime Jun 2, 2023
26c701f
Updated based on grammar changes.
EpsilonPrime Jun 2, 2023
6a7ee48
Fixed test to match recent change to numbering strategy.
EpsilonPrime Jun 2, 2023
248ef10
Added newly emitted fields to an earlier test.
EpsilonPrime Jun 2, 2023
72b6dbe
Added an option to allow the roundtrip tests to be skipped (useful un…
EpsilonPrime Jun 2, 2023
1ba347c
More nullability fixes.
EpsilonPrime Jun 2, 2023
6ca2b4e
Update type to match original json plan.
EpsilonPrime Jun 3, 2023
bf29afa
Broke the bad literals test into multiple to make it easier to debug.
EpsilonPrime Jun 3, 2023
fafc972
Now detects when functions are missing type declarations.
EpsilonPrime Jun 3, 2023
2187faf
Removed one case of duplicate error messages.
EpsilonPrime Jun 5, 2023
29374fd
Fixed bad merge.
EpsilonPrime Jun 5, 2023
fa7d5aa
Fixed sort desc nulls first.
EpsilonPrime Jun 5, 2023
4d67bc6
Now warns if roundtrip testing is off.
EpsilonPrime Jun 5, 2023
5cbc9c0
Addressed review comments.
EpsilonPrime Jun 6, 2023
21c9c1b
Bug fix.
EpsilonPrime Jun 6, 2023
2eb897e
Updated
EpsilonPrime Jun 6, 2023
e3dc336
Remove unused code.
EpsilonPrime Jun 7, 2023
2a6d737
Type fix for symbols found in the table by location.
EpsilonPrime Jun 7, 2023
102febf
Added some more errors.
EpsilonPrime Jun 7, 2023
b5c4de5
More refactoring.
EpsilonPrime Jun 7, 2023
eced5ca
Ran the reformatter.
EpsilonPrime Jun 7, 2023
7921f59
Clang tidy fix.
EpsilonPrime Jun 7, 2023
299eb0c
Add roundtrip testing to the clang tidy script so it can find all the…
EpsilonPrime Jun 7, 2023
ce3cc59
Merge branch 'main' into parser-part6
EpsilonPrime Jun 8, 2023
926ebce
Updated based on review notes.
EpsilonPrime Jun 8, 2023
2e98f09
Cleaned up includes.
EpsilonPrime Jun 8, 2023
889baf8
Replaced iostream with sstream to keep included header size down.
EpsilonPrime Jun 12, 2023
68fea3b
A few clang fixes.
EpsilonPrime Jun 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/substrait/textplan/ParseResult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@

#include "substrait/textplan/ParseResult.h"

#include <iosfwd>
#include <iostream>
#include <sstream>

namespace io::substrait::textplan {

Expand Down
6 changes: 3 additions & 3 deletions src/substrait/textplan/SymbolTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,13 @@ const SymbolInfo* SymbolTable::lookupSymbolByName(
return symbols_[itr->second].get();
}

const SymbolInfo& SymbolTable::lookupSymbolByLocation(
const SymbolInfo* SymbolTable::lookupSymbolByLocation(
const Location& location) const {
auto itr = symbolsByLocation_.find(location);
if (itr == symbolsByLocation_.end()) {
return SymbolInfo::kUnknown;
return nullptr;
}
return *symbols_[itr->second];
return symbols_[itr->second].get();
}

const SymbolInfo& SymbolTable::nthSymbolByType(uint32_t n, SymbolType type)
Expand Down
2 changes: 1 addition & 1 deletion src/substrait/textplan/SymbolTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class SymbolTable {
[[nodiscard]] const SymbolInfo* lookupSymbolByName(
const std::string& name) const;

[[nodiscard]] const SymbolInfo& lookupSymbolByLocation(
[[nodiscard]] const SymbolInfo* lookupSymbolByLocation(
const Location& location) const;

[[nodiscard]] const SymbolInfo& nthSymbolByType(uint32_t n, SymbolType type)
Expand Down
164 changes: 87 additions & 77 deletions src/substrait/textplan/SymbolTablePrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,83 +74,9 @@ void localFileToText(
}

std::string typeToText(const ::substrait::proto::Type& type) {
switch (type.kind_case()) {
case ::substrait::proto::Type::kBool:
if (type.bool_().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "bool?";
}
return "bool";
case ::substrait::proto::Type::kI8:
if (type.i8().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "i8?";
}
return "i8";
case ::substrait::proto::Type::kI16:
if (type.i16().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "i16?";
}
return "i16";
case ::substrait::proto::Type::kI32:
if (type.i32().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "i32?";
}
return "i32";
case ::substrait::proto::Type::kI64:
if (type.i64().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "i64?";
}
return "i64";
case ::substrait::proto::Type::kFp32:
if (type.fp32().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "fp32?";
}
return "fp32";
case ::substrait::proto::Type::kFp64:
if (type.fp64().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "fp64?";
}
return "fp64";
case ::substrait::proto::Type::kString:
if (type.string().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "string?";
}
return "string";
case ::substrait::proto::Type::kDecimal:
if (type.string().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "decimal?";
}
return "decimal";
case ::substrait::proto::Type::kVarchar:
if (type.varchar().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "varchar?";
}
return "varchar";
case ::substrait::proto::Type::kFixedChar:
if (type.fixed_char().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "fixedchar?";
}
return "fixedchar";
case ::substrait::proto::Type::kDate:
if (type.date().nullability() ==
::substrait::proto::Type::NULLABILITY_NULLABLE) {
return "date?";
}
return "date";
case ::substrait::proto::Type::KIND_NOT_SET:
default:
return "UNSUPPORTED_TYPE";
}
SymbolTable symbolTable;
PlanPrinterVisitor visitor(symbolTable);
return visitor.typeToText(type);
};

std::string relationToText(
Expand Down Expand Up @@ -386,6 +312,88 @@ std::string outputFunctionsSection(const SymbolTable& symbolTable) {
return text.str();
}

void outputExtensionSpacesToBinaryPlan(
const SymbolTable& symbolTable,
::substrait::proto::Plan* plan) {
for (const SymbolInfo& info : symbolTable) {
if (info.type != SymbolType::kExtensionSpace) {
continue;
}

auto extensionData =
ANY_CAST(std::shared_ptr<ExtensionSpaceData>, info.blob);
auto uri = plan->add_extension_uris();
uri->set_uri(info.name);
uri->set_extension_uri_anchor(extensionData->anchorReference);
}
}

void outputFunctionsToBinaryPlan(
const SymbolTable& symbolTable,
::substrait::proto::Plan* plan) {
std::map<uint32_t, std::string> spaceNames;
std::set<uint32_t> usedSpaces;

// Look at the existing spaces.
for (const SymbolInfo& info : symbolTable) {
if (info.type != SymbolType::kExtensionSpace) {
continue;
}

auto extensionData =
ANY_CAST(std::shared_ptr<ExtensionSpaceData>, info.blob);
spaceNames.insert(
std::make_pair(extensionData->anchorReference, info.name));
}

// Find any spaces that are used but undefined.
for (const SymbolInfo& info : symbolTable) {
if (info.type != SymbolType::kFunction) {
continue;
}

auto extension = ANY_CAST(std::shared_ptr<FunctionData>, info.blob);
if (extension->extensionUriReference.has_value()) {
usedSpaces.insert(extension->extensionUriReference.value());
}
}

// Output the extensions by space in the order they were encountered.
for (const uint32_t space : usedSpaces) {
for (const SymbolInfo& info : symbolTable) {
if (info.type != SymbolType::kFunction) {
continue;
}

auto functionData = ANY_CAST(std::shared_ptr<FunctionData>, info.blob);
if (functionData->extensionUriReference != space) {
continue;
}

auto func = plan->add_extensions()->mutable_extension_function();
func->set_function_anchor(functionData->anchor);
func->set_name(functionData->name);

if (spaceNames.find(space) != spaceNames.end()) {
func->set_extension_uri_reference(space);
}
}
}

for (const SymbolInfo& info : symbolTable) {
if (info.type != SymbolType::kFunction) {
continue;
}

auto functionData = ANY_CAST(std::shared_ptr<FunctionData>, info.blob);
if (!functionData->extensionUriReference.has_value()) {
auto func = plan->add_extensions()->mutable_extension_function();
func->set_function_anchor(functionData->anchor);
func->set_name(functionData->name);
}
}
}

} // namespace

std::string SymbolTablePrinter::outputToText(const SymbolTable& symbolTable) {
Expand Down Expand Up @@ -631,6 +639,8 @@ void SymbolTablePrinter::addInputsToRelation(
::substrait::proto::Plan SymbolTablePrinter::outputToBinaryPlan(
const SymbolTable& symbolTable) {
::substrait::proto::Plan plan;
outputExtensionSpacesToBinaryPlan(symbolTable, &plan);
outputFunctionsToBinaryPlan(symbolTable, &plan);
for (const SymbolInfo& info : symbolTable) {
if (info.type != SymbolType::kRelation) {
continue;
Expand Down
9 changes: 7 additions & 2 deletions src/substrait/textplan/converter/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@ set(TEXTPLAN_SRCS
add_library(substrait_textplan_converter ${TEXTPLAN_SRCS})

target_link_libraries(
substrait_textplan_converter substrait_common substrait_expression
substrait_proto symbol_table error_listener)
substrait_textplan_converter
substrait_common
substrait_expression
substrait_proto
symbol_table
error_listener
date::date)

if(${SUBSTRAIT_CPP_BUILD_TESTING})
add_subdirectory(tests)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,12 +267,12 @@ std::any InitialPlanProtoVisitor::visitNamedStruct(
void InitialPlanProtoVisitor::addFieldsToRelation(
const std::shared_ptr<RelationData>& relationData,
const ::substrait::proto::Rel& relation) {
auto symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation));
if (symbol == SymbolInfo::kUnknown || symbol.type != SymbolType::kRelation) {
auto* symbol = symbolTable_->lookupSymbolByLocation(PROTO_LOCATION(relation));
if (symbol == nullptr || symbol->type != SymbolType::kRelation) {
return;
}
auto symbolRelationData =
ANY_CAST(std::shared_ptr<RelationData>, symbol.blob);
ANY_CAST(std::shared_ptr<RelationData>, symbol->blob);
for (const auto& field : symbolRelationData->fieldReferences) {
relationData->fieldReferences.push_back(field);
}
Expand Down
Loading