Skip to content

Commit

Permalink
Implement parsing of user-defined functions
Browse files Browse the repository at this point in the history
Summary:
This diff implements only the parsing part of user-defined functions. The renderer currently rejects them and support for them will be added in a future diff.

The change here adds in a new kind of `function_call` in the AST called `user_defined`. User-defined functions are accessed via a `variable_lookup`.

Additionally, names are changed to match nomenclature used in the spec (builtin), and error messages have been updated to be more consistent with others.

Reviewed By: createdbysk

Differential Revision: D67587583

fbshipit-source-id: 12ba36ff8153b2bf751ea4f0568cd2b6f3aba8bc
  • Loading branch information
praihan authored and facebook-github-bot committed Dec 25, 2024
1 parent 9eb1d04 commit b791982
Show file tree
Hide file tree
Showing 7 changed files with 362 additions and 56 deletions.
19 changes: 13 additions & 6 deletions third-party/thrift/src/thrift/compiler/whisker/ast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,24 +66,31 @@ std::string partial_apply::path_string() const {

std::string expression::to_string() const {
return detail::variant_match(
content,
which,
[](const variable_lookup& v) { return v.chain_string(); },
[](const function_call& f) {
std::string out = fmt::format("({}", f.name());
for (const auto& arg : f.args) {
for (const auto& arg : f.positional_arguments) {
fmt::format_to(std::back_inserter(out), " {}", arg.to_string());
}
for (const auto& [name, arg] : f.named_arguments) {
fmt::format_to(
std::back_inserter(out), " {}={}", name, arg.value->to_string());
}
out += ")";
return out;
});
}

std::string_view expression::function_call::name() const {
std::string expression::function_call::name() const {
return detail::variant_match(
which,
[&](function_call::not_tag) { return "not"; },
[&](function_call::and_tag) { return "and"; },
[&](function_call::or_tag) { return "or"; });
[](function_call::builtin_not) -> std::string { return "not"; },
[](function_call::builtin_and) -> std::string { return "and"; },
[](function_call::builtin_or) -> std::string { return "or"; },
[](const function_call::user_defined& f) -> std::string {
return f.name.chain_string();
});
}

std::string_view pragma_statement::to_string() const {
Expand Down
108 changes: 91 additions & 17 deletions third-party/thrift/src/thrift/compiler/whisker/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <thrift/compiler/whisker/source_location.h>

#include <map>
#include <memory>
#include <optional>
#include <string>
#include <variant>
Expand Down Expand Up @@ -153,50 +155,122 @@ struct variable_lookup {
struct expression {
source_range loc;
struct function_call {
/**
* Base class for all built-in functions.
*/
struct builtin {};
/**
* Binary functions which are also associative and can be "chained", such as
* `(and arg1 ... argN)`, `(or arg1 ... argN)` etc.
*/
struct builtin_binary_associative {};

/**
* The `(not arg1)` function.
*/
struct not_tag {
struct builtin_not : builtin {
// Remove in C++20 which introduces comparison operator synthesis
friend bool operator==(const not_tag&, const not_tag&) { return true; }
WHISKER_DEFINE_OPERATOR_INEQUALITY(not_tag)
friend bool operator==(const builtin_not&, const builtin_not&) {
return true;
}
WHISKER_DEFINE_OPERATOR_INEQUALITY(builtin_not)
};
struct and_or_tag {}; // for convenience of writing matchers

/**
* The `(and arg1 ... argN)` function.
*/
struct and_tag : and_or_tag {
struct builtin_and : builtin, builtin_binary_associative {
// Remove in C++20 which introduces comparison operator synthesis
friend bool operator==(const and_tag&, const and_tag&) { return true; }
WHISKER_DEFINE_OPERATOR_INEQUALITY(and_tag)
friend bool operator==(const builtin_and&, const builtin_and&) {
return true;
}
WHISKER_DEFINE_OPERATOR_INEQUALITY(builtin_and)
};

/**
* The `(or arg1 ... argN)` function.
*/
struct or_tag : and_or_tag {
struct builtin_or : builtin, builtin_binary_associative {
// Remove in C++20 which introduces comparison operator synthesis
friend bool operator==(const builtin_or&, const builtin_or&) {
return true;
}
WHISKER_DEFINE_OPERATOR_INEQUALITY(builtin_or)
};

/**
* A user-defined function call whose name is variable (chain of
* identifiers).
*
* Example:
* `(my_lib.snake_case "FooBar")` // "foo_bar"
*/
struct user_defined {
variable_lookup name;

friend bool operator==(const user_defined& lhs, const user_defined& rhs) {
return lhs.name == rhs.name;
}
// Remove in C++20 which introduces comparison operator synthesis
friend bool operator==(const or_tag&, const or_tag&) { return true; }
WHISKER_DEFINE_OPERATOR_INEQUALITY(or_tag)
WHISKER_DEFINE_OPERATOR_INEQUALITY(user_defined)
};
std::variant<not_tag, and_tag, or_tag> which;
std::vector<expression> args;

std::variant<builtin_not, builtin_and, builtin_or, user_defined> which;

/**
* Unnamed arguments that are identified by their ordering in the function
* invocation.
*/
std::vector<expression> positional_arguments;

struct named_argument {
identifier name;
// Using the heap to avoid mutually recursion with `expression`.
// Using std::shared_ptr so that this struct is copyable.
std::shared_ptr<expression> value;

friend bool operator==(
const named_argument& lhs, const named_argument& rhs) {
static const auto as_tuple = [](const named_argument& arg) {
return std::tie(arg.name, *arg.value);
};
return as_tuple(lhs) == as_tuple(rhs);
}
// Remove in C++20 which introduces comparison operator synthesis
WHISKER_DEFINE_OPERATOR_INEQUALITY(named_argument)
};
/**
* Named arguments that are identified by their name, with no restrictions
* on their ordering. Every argument must have a unique identifier. All
* named arguments must appear after all positional arguments.
*
* Using std::map for stable ordering when printing the AST.
*/
std::map<std::string_view, named_argument> named_arguments;

/**
* The name of the function call lookup as seen in the source code.
*/
std::string name() const;

friend bool operator==(const function_call& lhs, const function_call& rhs) {
return std::tie(lhs.which, lhs.args) == std::tie(rhs.which, rhs.args);
static const auto as_tuple = [](const function_call& f) {
// Ignore the source range of the function call.
return std::tie(f.which, f.positional_arguments, f.named_arguments);
};
return as_tuple(lhs) == as_tuple(rhs);
}
// Remove in C++20 which introduces comparison operator synthesis
WHISKER_DEFINE_OPERATOR_INEQUALITY(function_call)

std::string_view name() const;
};
std::variant<variable_lookup, function_call> content;
std::variant<variable_lookup, function_call> which;

/**
* Determines if two expressions are syntactically equivalent, excluding their
* location in source code.
*/
friend bool operator==(const expression& lhs, const expression& rhs) {
return lhs.content == rhs.content;
return lhs.which == rhs.which;
}
// Remove in C++20 which introduces comparison operator synthesis
WHISKER_DEFINE_OPERATOR_INEQUALITY(expression)
Expand Down
132 changes: 112 additions & 20 deletions third-party/thrift/src/thrift/compiler/whisker/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -995,51 +995,143 @@ class parser {
if (!try_consume_token(&scan, tok::l_paren)) {
return no_parse_result();
}
scan = scan.make_fresh();

function_call func;

if (try_consume_token(&scan, tok::kw_not)) {
func.which = function_call::not_tag{};
func.which = function_call::builtin_not{};
} else if (try_consume_token(&scan, tok::kw_and)) {
func.which = function_call::and_tag{};
func.which = function_call::builtin_and{};
} else if (try_consume_token(&scan, tok::kw_or)) {
func.which = function_call::or_tag{};
func.which = function_call::builtin_or{};
} else if (parse_result lookup = parse_variable_lookup(scan)) {
func.which = function_call::user_defined{
std::move(lookup).consume_and_advance(&scan)};
} else {
report_fatal_error(scan, "unrecognized function {}", scan.peek());
}
report_expected(scan, "function-lookup in function-call");
}

using named_argument_entry =
decltype(function_call::named_arguments)::value_type;
// argument → { positional-argument | named-argument }
// positional-argument → { expression }
// named-argument → { identifier ~ "=" ~ expression }
const auto parse_argument = [this, &func](parser_scan_window scan)
-> parse_result<std::variant<ast::expression, named_argument_entry>> {
assert(scan.empty());
const token& id = scan.peek();
if (id.kind == tok::identifier && scan.next().peek().kind == tok::eq) {
scan = scan.next(2).make_fresh();
if (parse_result expression = parse_expression(scan)) {
return {
named_argument_entry{
id.string_value(),
function_call::named_argument{
ast::identifier{id.range, std::string(id.string_value())},
std::make_unique<ast::expression>(
std::move(expression).consume_and_advance(&scan))}},
scan};
}
report_expected(scan, "expression in named argument");
}

assert(scan.empty());
if (parse_result expression = parse_expression(scan)) {
return {
ast::expression{std::move(expression).consume_and_advance(&scan)},
scan};
}
if (scan.peek().kind == tok::eq) {
report_fatal_error(
scan,
"expected identifier to precede {} in named argument for function call '{}'",
tok::eq,
func.name());
}
return no_parse_result();
};

while (parse_result cond = parse_expression(scan.make_fresh())) {
func.args.push_back(std::move(cond).consume_and_advance(&scan));
// All named arguments must be at the end of the argument list
bool named_arg_seen = false;
while (parse_result arg = parse_argument(scan.make_fresh())) {
auto arg_scan_start = scan;
detail::variant_match(
std::move(arg).consume_and_advance(&scan),
[&](ast::expression&& positional) {
if (named_arg_seen) {
report_fatal_error(
scan,
"unexpected positional argument '{}' after named arguments in function call '{}'",
positional.to_string(),
func.name());
}
func.positional_arguments.push_back(std::move(positional));
},
[&](named_argument_entry&& named) {
named_arg_seen = true;
std::string_view name = named.first;
if (const auto& [_, inserted] =
func.named_arguments.insert(std::move(named));
!inserted) {
report_fatal_error(
arg_scan_start,
"duplicate named argument '{}' in function call '{}'",
name,
func.name());
}
});
}
scan = scan.make_fresh();

// Validate positional arguments
detail::variant_match(
func.which,
[&](function_call::not_tag) {
if (func.args.size() != 1) {
[&](const function_call::builtin_not&) {
if (func.positional_arguments.size() != 1) {
report_fatal_error(
scan,
"expected 1 argument for helper `not` but got {}",
func.args.size());
"expected 1 argument for function 'not' but found {}",
func.positional_arguments.size());
}
},
[&](function_call::and_or_tag&) {
if (func.args.size() <= 1) {
[&](const function_call::builtin_binary_associative&) {
if (func.positional_arguments.size() <= 1) {
report_fatal_error(
scan,
"expected at least 2 arguments for helper `{}` but got {}",
"expected at least 2 arguments for function '{}' but found {}",
func.name(),
func.args.size());
func.positional_arguments.size());
}
},
[](const function_call::user_defined&) {
// User-defined functions can have any number of arguments
});

// Validate named arguments
detail::variant_match(
func.which,
[&](const function_call::builtin&) {
if (!func.named_arguments.empty()) {
report_fatal_error(
scan,
"named arguments not allowed for function '{}'",
func.name());
}
},
[](const function_call::user_defined&) {
// User-defined functions can have any number of named arguments
});

if (!try_consume_token(&scan, tok::r_paren)) {
report_fatal_error(
report_expected(
scan,
"expected `)` to close helper `{}` but got `{}`",
func.name(),
scan.peek());
fmt::format("{} to close function '{}'", tok::r_paren, func.name()));
}

return {{scan.with_start(scan_start).range(), std::move(func)}, scan};
return {
ast::expression{scan.with_start(scan_start).range(), std::move(func)},
scan};
}

// let-statement →
Expand Down
3 changes: 0 additions & 3 deletions third-party/thrift/src/thrift/compiler/whisker/print_ast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@
#include <thrift/compiler/whisker/print_ast.h>
#include <thrift/compiler/whisker/tree_printer.h>

#include <iterator>
#include <memory>
#include <ostream>
#include <string_view>

#include <fmt/core.h>

Expand Down
Loading

0 comments on commit b791982

Please sign in to comment.