From b791982de8ea03bec5b7956a4425f88430c72d41 Mon Sep 17 00:00:00 2001 From: Pranjal Raihan Date: Tue, 24 Dec 2024 18:36:42 -0800 Subject: [PATCH] Implement parsing of user-defined functions Summary: This diff implements only the parsing part of user-defined functions. The renderer currently rejects them and support for them will be added in a future diff. The change here adds in a new kind of `function_call` in the AST called `user_defined`. User-defined functions are accessed via a `variable_lookup`. Additionally, names are changed to match nomenclature used in the spec (builtin), and error messages have been updated to be more consistent with others. Reviewed By: createdbysk Differential Revision: D67587583 fbshipit-source-id: 12ba36ff8153b2bf751ea4f0568cd2b6f3aba8bc --- .../thrift/src/thrift/compiler/whisker/ast.cc | 19 ++- .../thrift/src/thrift/compiler/whisker/ast.h | 108 +++++++++++--- .../src/thrift/compiler/whisker/parser.cc | 132 +++++++++++++++--- .../src/thrift/compiler/whisker/print_ast.cc | 3 - .../src/thrift/compiler/whisker/render.cc | 32 +++-- .../compiler/whisker/test/parser_test.cc | 112 ++++++++++++++- .../compiler/whisker/test/render_test.cc | 12 ++ 7 files changed, 362 insertions(+), 56 deletions(-) diff --git a/third-party/thrift/src/thrift/compiler/whisker/ast.cc b/third-party/thrift/src/thrift/compiler/whisker/ast.cc index fed63b0f38ecf7..36d1b30182e377 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/ast.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/ast.cc @@ -66,24 +66,31 @@ std::string partial_apply::path_string() const { std::string expression::to_string() const { return detail::variant_match( - content, + which, [](const variable_lookup& v) { return v.chain_string(); }, [](const function_call& f) { std::string out = fmt::format("({}", f.name()); - for (const auto& arg : f.args) { + for (const auto& arg : f.positional_arguments) { fmt::format_to(std::back_inserter(out), " {}", arg.to_string()); } + for (const auto& [name, arg] : f.named_arguments) { + fmt::format_to( + std::back_inserter(out), " {}={}", name, arg.value->to_string()); + } out += ")"; return out; }); } -std::string_view expression::function_call::name() const { +std::string expression::function_call::name() const { return detail::variant_match( which, - [&](function_call::not_tag) { return "not"; }, - [&](function_call::and_tag) { return "and"; }, - [&](function_call::or_tag) { return "or"; }); + [](function_call::builtin_not) -> std::string { return "not"; }, + [](function_call::builtin_and) -> std::string { return "and"; }, + [](function_call::builtin_or) -> std::string { return "or"; }, + [](const function_call::user_defined& f) -> std::string { + return f.name.chain_string(); + }); } std::string_view pragma_statement::to_string() const { diff --git a/third-party/thrift/src/thrift/compiler/whisker/ast.h b/third-party/thrift/src/thrift/compiler/whisker/ast.h index c5f7545dc85b8b..af2a5e1598df68 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/ast.h +++ b/third-party/thrift/src/thrift/compiler/whisker/ast.h @@ -18,6 +18,8 @@ #include +#include +#include #include #include #include @@ -153,50 +155,122 @@ struct variable_lookup { struct expression { source_range loc; struct function_call { + /** + * Base class for all built-in functions. + */ + struct builtin {}; + /** + * Binary functions which are also associative and can be "chained", such as + * `(and arg1 ... argN)`, `(or arg1 ... argN)` etc. + */ + struct builtin_binary_associative {}; + /** * The `(not arg1)` function. */ - struct not_tag { + struct builtin_not : builtin { // Remove in C++20 which introduces comparison operator synthesis - friend bool operator==(const not_tag&, const not_tag&) { return true; } - WHISKER_DEFINE_OPERATOR_INEQUALITY(not_tag) + friend bool operator==(const builtin_not&, const builtin_not&) { + return true; + } + WHISKER_DEFINE_OPERATOR_INEQUALITY(builtin_not) }; - struct and_or_tag {}; // for convenience of writing matchers + /** * The `(and arg1 ... argN)` function. */ - struct and_tag : and_or_tag { + struct builtin_and : builtin, builtin_binary_associative { // Remove in C++20 which introduces comparison operator synthesis - friend bool operator==(const and_tag&, const and_tag&) { return true; } - WHISKER_DEFINE_OPERATOR_INEQUALITY(and_tag) + friend bool operator==(const builtin_and&, const builtin_and&) { + return true; + } + WHISKER_DEFINE_OPERATOR_INEQUALITY(builtin_and) }; + /** * The `(or arg1 ... argN)` function. */ - struct or_tag : and_or_tag { + struct builtin_or : builtin, builtin_binary_associative { + // Remove in C++20 which introduces comparison operator synthesis + friend bool operator==(const builtin_or&, const builtin_or&) { + return true; + } + WHISKER_DEFINE_OPERATOR_INEQUALITY(builtin_or) + }; + + /** + * A user-defined function call whose name is variable (chain of + * identifiers). + * + * Example: + * `(my_lib.snake_case "FooBar")` // "foo_bar" + */ + struct user_defined { + variable_lookup name; + + friend bool operator==(const user_defined& lhs, const user_defined& rhs) { + return lhs.name == rhs.name; + } // Remove in C++20 which introduces comparison operator synthesis - friend bool operator==(const or_tag&, const or_tag&) { return true; } - WHISKER_DEFINE_OPERATOR_INEQUALITY(or_tag) + WHISKER_DEFINE_OPERATOR_INEQUALITY(user_defined) }; - std::variant which; - std::vector args; + + std::variant which; + + /** + * Unnamed arguments that are identified by their ordering in the function + * invocation. + */ + std::vector positional_arguments; + + struct named_argument { + identifier name; + // Using the heap to avoid mutually recursion with `expression`. + // Using std::shared_ptr so that this struct is copyable. + std::shared_ptr value; + + friend bool operator==( + const named_argument& lhs, const named_argument& rhs) { + static const auto as_tuple = [](const named_argument& arg) { + return std::tie(arg.name, *arg.value); + }; + return as_tuple(lhs) == as_tuple(rhs); + } + // Remove in C++20 which introduces comparison operator synthesis + WHISKER_DEFINE_OPERATOR_INEQUALITY(named_argument) + }; + /** + * Named arguments that are identified by their name, with no restrictions + * on their ordering. Every argument must have a unique identifier. All + * named arguments must appear after all positional arguments. + * + * Using std::map for stable ordering when printing the AST. + */ + std::map named_arguments; + + /** + * The name of the function call lookup as seen in the source code. + */ + std::string name() const; friend bool operator==(const function_call& lhs, const function_call& rhs) { - return std::tie(lhs.which, lhs.args) == std::tie(rhs.which, rhs.args); + static const auto as_tuple = [](const function_call& f) { + // Ignore the source range of the function call. + return std::tie(f.which, f.positional_arguments, f.named_arguments); + }; + return as_tuple(lhs) == as_tuple(rhs); } // Remove in C++20 which introduces comparison operator synthesis WHISKER_DEFINE_OPERATOR_INEQUALITY(function_call) - - std::string_view name() const; }; - std::variant content; + std::variant which; /** * Determines if two expressions are syntactically equivalent, excluding their * location in source code. */ friend bool operator==(const expression& lhs, const expression& rhs) { - return lhs.content == rhs.content; + return lhs.which == rhs.which; } // Remove in C++20 which introduces comparison operator synthesis WHISKER_DEFINE_OPERATOR_INEQUALITY(expression) diff --git a/third-party/thrift/src/thrift/compiler/whisker/parser.cc b/third-party/thrift/src/thrift/compiler/whisker/parser.cc index 2556db614ff6b2..ecbccb7a4ee604 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/parser.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/parser.cc @@ -995,51 +995,143 @@ class parser { if (!try_consume_token(&scan, tok::l_paren)) { return no_parse_result(); } + scan = scan.make_fresh(); function_call func; + if (try_consume_token(&scan, tok::kw_not)) { - func.which = function_call::not_tag{}; + func.which = function_call::builtin_not{}; } else if (try_consume_token(&scan, tok::kw_and)) { - func.which = function_call::and_tag{}; + func.which = function_call::builtin_and{}; } else if (try_consume_token(&scan, tok::kw_or)) { - func.which = function_call::or_tag{}; + func.which = function_call::builtin_or{}; + } else if (parse_result lookup = parse_variable_lookup(scan)) { + func.which = function_call::user_defined{ + std::move(lookup).consume_and_advance(&scan)}; } else { - report_fatal_error(scan, "unrecognized function {}", scan.peek()); - } + report_expected(scan, "function-lookup in function-call"); + } + + using named_argument_entry = + decltype(function_call::named_arguments)::value_type; + // argument → { positional-argument | named-argument } + // positional-argument → { expression } + // named-argument → { identifier ~ "=" ~ expression } + const auto parse_argument = [this, &func](parser_scan_window scan) + -> parse_result> { + assert(scan.empty()); + const token& id = scan.peek(); + if (id.kind == tok::identifier && scan.next().peek().kind == tok::eq) { + scan = scan.next(2).make_fresh(); + if (parse_result expression = parse_expression(scan)) { + return { + named_argument_entry{ + id.string_value(), + function_call::named_argument{ + ast::identifier{id.range, std::string(id.string_value())}, + std::make_unique( + std::move(expression).consume_and_advance(&scan))}}, + scan}; + } + report_expected(scan, "expression in named argument"); + } + + assert(scan.empty()); + if (parse_result expression = parse_expression(scan)) { + return { + ast::expression{std::move(expression).consume_and_advance(&scan)}, + scan}; + } + if (scan.peek().kind == tok::eq) { + report_fatal_error( + scan, + "expected identifier to precede {} in named argument for function call '{}'", + tok::eq, + func.name()); + } + return no_parse_result(); + }; - while (parse_result cond = parse_expression(scan.make_fresh())) { - func.args.push_back(std::move(cond).consume_and_advance(&scan)); + // All named arguments must be at the end of the argument list + bool named_arg_seen = false; + while (parse_result arg = parse_argument(scan.make_fresh())) { + auto arg_scan_start = scan; + detail::variant_match( + std::move(arg).consume_and_advance(&scan), + [&](ast::expression&& positional) { + if (named_arg_seen) { + report_fatal_error( + scan, + "unexpected positional argument '{}' after named arguments in function call '{}'", + positional.to_string(), + func.name()); + } + func.positional_arguments.push_back(std::move(positional)); + }, + [&](named_argument_entry&& named) { + named_arg_seen = true; + std::string_view name = named.first; + if (const auto& [_, inserted] = + func.named_arguments.insert(std::move(named)); + !inserted) { + report_fatal_error( + arg_scan_start, + "duplicate named argument '{}' in function call '{}'", + name, + func.name()); + } + }); } + scan = scan.make_fresh(); + // Validate positional arguments detail::variant_match( func.which, - [&](function_call::not_tag) { - if (func.args.size() != 1) { + [&](const function_call::builtin_not&) { + if (func.positional_arguments.size() != 1) { report_fatal_error( scan, - "expected 1 argument for helper `not` but got {}", - func.args.size()); + "expected 1 argument for function 'not' but found {}", + func.positional_arguments.size()); } }, - [&](function_call::and_or_tag&) { - if (func.args.size() <= 1) { + [&](const function_call::builtin_binary_associative&) { + if (func.positional_arguments.size() <= 1) { report_fatal_error( scan, - "expected at least 2 arguments for helper `{}` but got {}", + "expected at least 2 arguments for function '{}' but found {}", func.name(), - func.args.size()); + func.positional_arguments.size()); } + }, + [](const function_call::user_defined&) { + // User-defined functions can have any number of arguments + }); + + // Validate named arguments + detail::variant_match( + func.which, + [&](const function_call::builtin&) { + if (!func.named_arguments.empty()) { + report_fatal_error( + scan, + "named arguments not allowed for function '{}'", + func.name()); + } + }, + [](const function_call::user_defined&) { + // User-defined functions can have any number of named arguments }); if (!try_consume_token(&scan, tok::r_paren)) { - report_fatal_error( + report_expected( scan, - "expected `)` to close helper `{}` but got `{}`", - func.name(), - scan.peek()); + fmt::format("{} to close function '{}'", tok::r_paren, func.name())); } - return {{scan.with_start(scan_start).range(), std::move(func)}, scan}; + return { + ast::expression{scan.with_start(scan_start).range(), std::move(func)}, + scan}; } // let-statement → diff --git a/third-party/thrift/src/thrift/compiler/whisker/print_ast.cc b/third-party/thrift/src/thrift/compiler/whisker/print_ast.cc index b643be3d71d6aa..9f57d03c5c0d94 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/print_ast.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/print_ast.cc @@ -18,10 +18,7 @@ #include #include -#include -#include #include -#include #include diff --git a/third-party/thrift/src/thrift/compiler/whisker/render.cc b/third-party/thrift/src/thrift/compiler/whisker/render.cc index 3d95630b308a1e..266b4f29792d3d 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/render.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/render.cc @@ -342,33 +342,47 @@ class render_engine { } object evaluate(const ast::expression& expr) { + using function_call = ast::expression::function_call; return detail::variant_match( - expr.content, + expr.which, [&](const ast::variable_lookup& variable_lookup) { return lookup_variable(variable_lookup); }, - [&](const ast::expression::function_call& func) { + [&](const function_call& func) { return detail::variant_match( func.which, - [&](ast::expression::function_call::not_tag) { - assert(func.args.size() == 1); // enforced by the parser - return object{!evaluate_as_bool(func.args[0])}; + [&](function_call::builtin_not) -> object { + // enforced by the parser + assert(func.positional_arguments.size() == 1); + assert(func.named_arguments.empty()); + return object{!evaluate_as_bool(func.positional_arguments[0])}; }, - [&](ast::expression::function_call::and_tag) { - for (const ast::expression& arg : func.args) { + [&](function_call::builtin_and) -> object { + // enforced by the parser + assert(func.named_arguments.empty()); + for (const ast::expression& arg : func.positional_arguments) { if (!evaluate_as_bool(arg)) { return object{false}; } } return object{true}; }, - [&](ast::expression::function_call::or_tag) { - for (const ast::expression& arg : func.args) { + [&](function_call::builtin_or) -> object { + // enforced by the parser + assert(func.named_arguments.empty()); + for (const ast::expression& arg : func.positional_arguments) { if (evaluate_as_bool(arg)) { return object{true}; } } return object{false}; + }, + [&](const function_call::user_defined& f) -> object { + diags_.error( + f.name.loc.begin, + "User-defined function '{}' not supported yet.", + f.name.chain_string()); + throw abort_rendering(); }); }); } diff --git a/third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc b/third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc index 88bb025b1ecd62..73f818732c21f1 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/test/parser_test.cc @@ -512,11 +512,121 @@ TEST_F(ParserTest, conditional_block_not_wrong_arity) { diagnostics, testing::ElementsAre(diagnostic( diagnostic_level::error, - "expected 1 argument for helper `not` but got 2", + "expected 1 argument for function 'not' but found 2", path_to_file(1), 1))); } +TEST_F(ParserTest, function_call) { + auto ast = parse_ast("{{ (uppercase (lowercase hello\tspace) world) }}"); + EXPECT_EQ( + to_string(*ast), + "root [path/to/test-1.whisker]\n" + "|- interpolation '(uppercase (lowercase hello space) world)'\n"); +} + +TEST_F(ParserTest, function_call_named_args) { + auto ast = parse_ast("{{ (str.concat hello world sep=comma) }}"); + EXPECT_EQ( + to_string(*ast), + "root [path/to/test-1.whisker]\n" + "|- interpolation '(str.concat hello world sep=comma)'\n"); +} + +TEST_F(ParserTest, function_call_named_args_only) { + auto ast = parse_ast("{{ (str.concat sep = comma) }}"); + EXPECT_EQ( + to_string(*ast), + "root [path/to/test-1.whisker]\n" + "|- interpolation '(str.concat sep=comma)'\n"); +} + +TEST_F(ParserTest, function_call_named_args_duplicate) { + auto ast = + parse_ast("{{ (str.concat hello world sep=comma foo=bar sep=baz) }}"); + EXPECT_FALSE(ast.has_value()); + EXPECT_THAT( + diagnostics, + testing::ElementsAre(diagnostic( + diagnostic_level::error, + "duplicate named argument 'sep' in function call 'str.concat'", + path_to_file(1), + 1))); +} + +TEST_F(ParserTest, function_call_named_args_without_lookup) { + auto ast = parse_ast("{{ (sep=comma) }}"); + // `sep` is parsed as the name + EXPECT_FALSE(ast.has_value()); + EXPECT_THAT( + diagnostics, + testing::ElementsAre(diagnostic( + diagnostic_level::error, + "expected identifier to precede `=` in named argument for function call 'sep'", + path_to_file(1), + 1))); +} + +TEST_F(ParserTest, function_call_positional_args_after_named_args) { + auto ast = parse_ast("{{ (str.concat sep=comma hello world) }}"); + EXPECT_FALSE(ast.has_value()); + EXPECT_THAT( + diagnostics, + testing::ElementsAre(diagnostic( + diagnostic_level::error, + "unexpected positional argument 'hello' after named arguments in function call 'str.concat'", + path_to_file(1), + 1))); +} + +TEST_F(ParserTest, function_call_named_args_not_identifier) { + auto ast = parse_ast("{{ (str.concat hello world word.sep=comma) }}"); + EXPECT_FALSE(ast.has_value()); + EXPECT_THAT( + diagnostics, + testing::ElementsAre(diagnostic( + diagnostic_level::error, + "expected identifier to precede `=` in named argument for function call 'str.concat'", + path_to_file(1), + 1))); +} + +TEST_F(ParserTest, function_call_named_args_for_builtins) { + { + auto ast = parse_ast("{{ (not foo sep=comma) }}"); + EXPECT_FALSE(ast.has_value()); + EXPECT_THAT( + diagnostics, + testing::ElementsAre(diagnostic( + diagnostic_level::error, + "named arguments not allowed for function 'not'", + path_to_file(1), + 1))); + } + { + auto ast = parse_ast("{{ (and foo bar sep=comma) }}"); + EXPECT_FALSE(ast.has_value()); + EXPECT_THAT( + diagnostics, + testing::ElementsAre(diagnostic( + diagnostic_level::error, + "named arguments not allowed for function 'and'", + path_to_file(2), + 1))); + } + { + auto ast = parse_ast("{{ (or foo bar sep=comma) }}"); + EXPECT_FALSE(ast.has_value()); + EXPECT_THAT( + diagnostics, + testing::ElementsAre(diagnostic( + diagnostic_level::error, + "named arguments not allowed for function 'or'", + path_to_file(3), + 1))); + } +} + TEST_F(ParserTest, basic_partial_apply) { auto ast = parse_ast("{{> path / to / file }}"); EXPECT_EQ( diff --git a/third-party/thrift/src/thrift/compiler/whisker/test/render_test.cc b/third-party/thrift/src/thrift/compiler/whisker/test/render_test.cc index 505bea2215c4f0..3a7e3cc5b6cfa3 100644 --- a/third-party/thrift/src/thrift/compiler/whisker/test/render_test.cc +++ b/third-party/thrift/src/thrift/compiler/whisker/test/render_test.cc @@ -655,6 +655,18 @@ TEST_F(RenderTest, and_or_short_circuit) { } } +TEST_F(RenderTest, user_defined_function) { + auto result = render("{{ (foo.bar hello world named=foo) }}\n", w::map({})); + EXPECT_FALSE(result.has_value()); + EXPECT_THAT( + diagnostics(), + testing::ElementsAre(diagnostic( + diagnostic_level::error, + "User-defined function 'foo.bar' not supported yet.", + path_to_file, + 1))); +} + TEST_F(RenderTest, let_statement) { auto result = render( "{{#let cond = (not false_value)}}\n"