diff --git a/zetasql/parser/ast_node_kind.h b/zetasql/parser/ast_node_kind.h index 926b21d2..3e04f3f2 100755 --- a/zetasql/parser/ast_node_kind.h +++ b/zetasql/parser/ast_node_kind.h @@ -45,7 +45,6 @@ enum ASTNodeKind { AST_ALTER_SCHEMA_STATEMENT, AST_ALTER_TABLE_STATEMENT, AST_ALTER_VIEW_STATEMENT, - AST_ALTER_USER_STATEMENT, AST_ANALYTIC_FUNCTION_CALL, AST_ANALYZE_STATEMENT, AST_AND_EXPR, @@ -88,7 +87,6 @@ enum ASTNodeKind { AST_CREATE_EXTERNAL_TABLE_STATEMENT, AST_CREATE_FUNCTION_STATEMENT, AST_CREATE_INDEX_STATEMENT, - AST_CREATE_USER_STATEMENT, AST_CREATE_MODEL_STATEMENT, AST_CREATE_PROCEDURE_STATEMENT, AST_CREATE_ROW_ACCESS_POLICY_STATEMENT, @@ -119,7 +117,6 @@ enum ASTNodeKind { AST_DROP_STATEMENT, AST_DROP_TABLE_FUNCTION_STATEMENT, AST_DROP_MATERIALIZED_VIEW_STATEMENT, - AST_DROP_USER_STATEMENT, AST_ELSEIF_CLAUSE, AST_ELSEIF_CLAUSE_LIST, AST_EXCEPTION_HANDLER, @@ -334,7 +331,12 @@ enum ASTNodeKind { AST_EXIT_STATEMENT, AST_ADD_PATH_ACTION, AST_DROP_PATH_ACTION, - kLastASTNodeKind = AST_DROP_PATH_ACTION, + AST_CREATE_USER_STATEMENT, + AST_ALTER_USER_STATEMENT, + AST_DROP_USER_STATEMENT, + AST_MAP_COLUMN_SCHEMA, + AST_MAP_TYPE, + kLastASTNodeKind = AST_MAP_TYPE, }; } // namespace zetasql diff --git a/zetasql/parser/bison_parser.y b/zetasql/parser/bison_parser.y index bb526cc3..36a359fc 100644 --- a/zetasql/parser/bison_parser.y +++ b/zetasql/parser/bison_parser.y @@ -718,6 +718,7 @@ using zetasql::ASTDropStatement; %token KW_RLIKE "RLIKE" %token KW_LIMIT "LIMIT" %token KW_LOOKUP "LOOKUP" +%token KW_MAP "MAP" %token KW_MERGE "MERGE" %token KW_MOD "MOD" %token KW_NATURAL "NATURAL" @@ -1130,6 +1131,8 @@ using zetasql::ASTDropStatement; %type json_literal %type lambda_argument %type lambda_argument_list +%type map_type +%type map_column_schema_inner %type merge_action %type merge_insert_value_list_or_source_row %type merge_source @@ -2836,10 +2839,18 @@ struct_column_schema_inner: | struct_column_schema_prefix ">" ; +map_column_schema_inner: + "MAP" "<" field_schema "," field_schema ">" + { + $$ = MAKE_NODE(ASTMapColumnSchema, @$, {$3, $5}); + } + ; + raw_column_schema_inner: simple_column_schema_inner | array_column_schema_inner | struct_column_schema_inner + | map_column_schema_inner ; column_schema_inner: @@ -6320,8 +6331,15 @@ struct_type: } ; +map_type: + "MAP" "<" type "," type ">" + { + $$ = MAKE_NODE(ASTMapType, @$, {$3, $5}); + } + ; + raw_type: - array_type | struct_type | type_name ; + array_type | struct_type | map_type | type_name ; type_parameter: integer_literal @@ -6382,6 +6400,10 @@ templated_parameter_kind: { $$ = zetasql::ASTTemplatedParameterType::ANY_ARRAY; } + | "MAP" + { + $$ = zetasql::ASTTemplatedParameterType::ANY_MAP; + } | identifier { const absl::string_view templated_type_string = $1->GetAsStringView(); @@ -6657,6 +6679,10 @@ function_name_from_keyword: { $$ = parser->MakeIdentifier(@1, parser->GetInputText(@1)); } + | "MAP" + { + $$ = parser->MakeIdentifier(@1, parser->GetInputText(@1)); + } ; // These rules have "expression" as their first part rather than @@ -7435,6 +7461,7 @@ reserved_keyword_rule: | "LIKE" | "LIMIT" | "LOOKUP" + | "MAP" | "MERGE" | "MOD" | "NATURAL" diff --git a/zetasql/parser/flex_tokenizer.l b/zetasql/parser/flex_tokenizer.l index 210251c5..dce0ba2c 100644 --- a/zetasql/parser/flex_tokenizer.l +++ b/zetasql/parser/flex_tokenizer.l @@ -40,7 +40,7 @@ the normal rules. */ %x DOT_IDENTIFIER -/* This inclusive state is for in ARRAY<...> and STRUCT<...>. It turns off the +/* This inclusive state is for in ARRAY<...>, MAP<...> and STRUCT<...>. It turns off the parsing of <<, >>, and <>, but leaves everything else the same. Doing this in the tokenizer avoids complicated rules and duplication at the parser level. @@ -519,6 +519,7 @@ limit { return BisonParserImpl::token::KW_LIMIT; } load { return BisonParserImpl::token::KW_LOAD; } lookup { return BisonParserImpl::token::KW_LOOKUP; } loop { return BisonParserImpl::token::KW_LOOP; } +map { return BisonParserImpl::token::KW_MAP; } match { return BisonParserImpl::token::KW_MATCH; } matched { return BisonParserImpl::token::KW_MATCHED; } materialized { return BisonParserImpl::token::KW_MATERIALIZED; } @@ -871,6 +872,7 @@ zone { return BisonParserImpl::token::KW_ZONE; } /* Don't recognize these in ARRAY<> or STRUCT<> context. */ "<>" { if (prev_token_ == BisonParserImpl::token::KW_ARRAY || + prev_token_ == BisonParserImpl::token::KW_MAP || prev_token_ == BisonParserImpl::token::KW_STRUCT) { // Match only the '<', and move to the same state that that production would // have moved to. @@ -890,6 +892,7 @@ zone { return BisonParserImpl::token::KW_ZONE; } } "<" { if (prev_token_ == BisonParserImpl::token::KW_ARRAY || + prev_token_ == BisonParserImpl::token::KW_MAP || prev_token_ == BisonParserImpl::token::KW_STRUCT) { // Switch to a mode that does not recognize >>. This only works as long as // there are no legal "independent" < and > inside array or struct types diff --git a/zetasql/parser/keywords.cc b/zetasql/parser/keywords.cc index e1b01aa4..b9fa451d 100644 --- a/zetasql/parser/keywords.cc +++ b/zetasql/parser/keywords.cc @@ -196,6 +196,7 @@ constexpr KeywordInfoPOD kAllKeywords[] = { {"load", KW_LOAD}, {"lookup", KW_LOOKUP, KeywordInfo::kReserved}, {"loop", KW_LOOP}, + {"map", KW_MAP, KeywordInfo::kReserved}, {"match", KW_MATCH}, {"matched", KW_MATCHED}, {"materialized", KW_MATERIALIZED}, diff --git a/zetasql/parser/keywords_test.cc b/zetasql/parser/keywords_test.cc index 05fe83d1..6ae76b2b 100644 --- a/zetasql/parser/keywords_test.cc +++ b/zetasql/parser/keywords_test.cc @@ -237,7 +237,7 @@ TEST(ParserTest, DontAddNewReservedKeywords) { // allows new queries to work that will not work on older code. // Before changing this, co-ordinate with all engines to make sure the change // is done safely. - EXPECT_EQ(106 /* CAUTION */, num_reserved); + EXPECT_EQ(107 /* CAUTION */, num_reserved); } } // namespace diff --git a/zetasql/parser/parse_tree.cc b/zetasql/parser/parse_tree.cc index 89beb53c..b4983f96 100644 --- a/zetasql/parser/parse_tree.cc +++ b/zetasql/parser/parse_tree.cc @@ -371,6 +371,8 @@ static absl::flat_hash_map CreateNodeNamesMap() { map[AST_LIKE_TABLE_CLAUSE] = "LikeTableClause"; map[AST_ADD_PATH_ACTION] = "AddOfflinePathAction"; map[AST_DROP_PATH_ACTION] = "DropOfflinePathAction"; + map[AST_MAP_COLUMN_SCHEMA] = "MapColumnSchema"; + map[AST_MAP_TYPE] = "MapType"; for (int kind = kFirstASTNodeKind; kind <= kLastASTNodeKind; ++kind) { ZETASQL_DCHECK(zetasql_base::ContainsKey(map, static_cast(kind))) diff --git a/zetasql/parser/parse_tree_manual.h b/zetasql/parser/parse_tree_manual.h index 4d170662..1dd96756 100644 --- a/zetasql/parser/parse_tree_manual.h +++ b/zetasql/parser/parse_tree_manual.h @@ -5380,6 +5380,30 @@ class ASTStructField final : public ASTNode { const ASTType* type_ = nullptr; }; +class ASTMapType final : public ASTType { + public: + static constexpr ASTNodeKind kConcreteNodeKind = AST_MAP_TYPE; + + ASTMapType() : ASTType(kConcreteNodeKind) {} + void Accept(ParseTreeVisitor* visitor, void* data) const override; + zetasql_base::StatusOr Accept( + NonRecursiveParseTreeVisitor* visitor) const override; + + const ASTType* key_type() const { return key_type_; } + const ASTType* value_type() const { return value_type_; } + + private: + void InitFields() final { + FieldLoader fl(this); + fl.AddRequired(&key_type_); + fl.AddRequired(&value_type_); + fl.AddOptional(mutable_type_parameters_ptr(), AST_TYPE_PARAMETER_LIST); + } + + const ASTType* key_type_ = nullptr; + const ASTType* value_type_ = nullptr; +}; + class ASTTemplatedParameterType final : public ASTNode { public: static constexpr ASTNodeKind kConcreteNodeKind = @@ -5392,6 +5416,7 @@ class ASTTemplatedParameterType final : public ASTNode { ANY_STRUCT, ANY_ARRAY, ANY_TABLE, + ANY_MAP, }; ASTTemplatedParameterType() : ASTNode(kConcreteNodeKind) {} @@ -5907,6 +5932,35 @@ class ASTArrayColumnSchema final : public ASTColumnSchema { const ASTColumnSchema* element_schema_ = nullptr; }; +class ASTMapColumnSchema final : public ASTColumnSchema { + public: + static constexpr ASTNodeKind kConcreteNodeKind = AST_MAP_COLUMN_SCHEMA; + + ASTMapColumnSchema() : ASTColumnSchema(kConcreteNodeKind) {} + void Accept(ParseTreeVisitor* visitor, void* data) const override; + zetasql_base::StatusOr Accept( + NonRecursiveParseTreeVisitor* visitor) const override; + + const ASTColumnSchema *key_schema() const { return key_schema_; } + const ASTColumnSchema *value_schema() const { return value_schema_; } + +private: + void InitFields() final { + FieldLoader fl(this); + fl.AddRequired(&key_schema_); + fl.AddRequired(&value_schema_); + fl.AddOptional(mutable_type_parameters_ptr(), AST_TYPE_PARAMETER_LIST); + fl.AddOptional(mutable_generated_column_info_ptr(), + AST_GENERATED_COLUMN_INFO); + fl.AddOptionalExpression(mutable_default_expression_ptr()); + fl.AddOptional(mutable_attributes_ptr(), AST_COLUMN_ATTRIBUTE_LIST); + fl.AddOptional(mutable_options_list_ptr(), AST_OPTIONS_LIST); + } + + const ASTColumnSchema* key_schema_ = nullptr; + const ASTColumnSchema* value_schema_ = nullptr; +}; + class ASTStructColumnSchema final : public ASTColumnSchema { public: static constexpr ASTNodeKind kConcreteNodeKind = AST_STRUCT_COLUMN_SCHEMA; diff --git a/zetasql/parser/testdata/create_table.test b/zetasql/parser/testdata/create_table.test index 11ee082e..0ca13c05 100644 --- a/zetasql/parser/testdata/create_table.test +++ b/zetasql/parser/testdata/create_table.test @@ -4766,3 +4766,41 @@ CREATE TABLE t CLONE t1 OPTIONS(expiration_timestamp = TIMESTAMP "2019-05-22 00:00:00 UTC", description = "mydataset.source_table clone", label = ARRAY["experiments"]) +== + +# create table with map type +# both map type and key/value type inside map accept optional column attribute and option list +create table t ( + c1 STRING, + c2 map NOT NULL +); +-- +CreateTableStatement [0-77] + PathExpression [13-14] + Identifier(t) [13-14] + TableElementList [15-77] + ColumnDefinition [19-28] + Identifier(c1) [19-21] + SimpleColumnSchema [22-28] + PathExpression [22-28] + Identifier(STRING) [22-28] + ColumnDefinition [32-75] + Identifier(c2) [32-34] + MapColumnSchema [35-75] + SimpleColumnSchema [39-45] + PathExpression [39-45] + Identifier(STRING) [39-45] + SimpleColumnSchema [47-65] + PathExpression [47-56] + Identifier(TIMESTAMP) [47-56] + ColumnAttributeList [57-65] + NotNullColumnAttribute [57-65] + ColumnAttributeList [67-75] + NotNullColumnAttribute [67-75] +-- +CREATE TABLE t +( + c1 STRING, + c2 MAP< STRING, TIMESTAMP NOT NULL > NOT NULL +) +== diff --git a/zetasql/parser/testdata/map.test b/zetasql/parser/testdata/map.test new file mode 100644 index 00000000..8a2c9ffa --- /dev/null +++ b/zetasql/parser/testdata/map.test @@ -0,0 +1,72 @@ +# construct map data type from map function +# access map value by []operator +# +# we represent '[]' in syntax tree with ArrayElement, but it can +# also apply to map values +select map("k", "v")["k"] +-- +QueryStatement [0-25] + Query [0-25] + Select [0-25] + SelectList [7-25] + SelectColumn [7-25] + ArrayElement [20-25] + FunctionCall [7-20] + PathExpression [7-10] + Identifier(`map`) [7-10] + StringLiteral("k") [11-14] + StringLiteral("v") [16-19] + StringLiteral("k") [21-24] +-- +SELECT + `map`("k", "v")["k"] +== + +select map("k", "v").k +-- +QueryStatement [0-22] + Query [0-22] + Select [0-22] + SelectList [7-22] + SelectColumn [7-22] + DotIdentifier [20-22] + FunctionCall [7-20] + PathExpression [7-10] + Identifier(`map`) [7-10] + StringLiteral("k") [11-14] + StringLiteral("v") [16-19] + Identifier(k) [21-22] +-- +SELECT + `map`("k", "v").k +== + +# casting to map type +select cast(col as MAP) +-- +QueryStatement [0-42] + Query [0-42] + Select [0-42] + SelectList [7-42] + SelectColumn [7-42] + CastExpression [7-42] + PathExpression [12-15] + Identifier(col) [12-15] + MapType [19-41] + SimpleType [23-29] + PathExpression [23-29] + Identifier(STRING) [23-29] + SimpleType [31-40] + PathExpression [31-40] + Identifier(TIMESTAMP) [31-40] +-- +SELECT + CAST(col AS MAP< STRING, TIMESTAMP >) +== + +select cast(col as MAP<>) +-- +ERROR: Syntax error: Unexpected ">" [at 1:24] +select cast(col as MAP<>) + ^ +== diff --git a/zetasql/parser/unparser.cc b/zetasql/parser/unparser.cc index 48e973d7..fa0e1735 100644 --- a/zetasql/parser/unparser.cc +++ b/zetasql/parser/unparser.cc @@ -3535,5 +3535,27 @@ void Unparser::visitASTDropOfflinePathAction(const ASTDropOfflinePathAction *nod node->path()->Accept(this, data); } +void Unparser::visitASTMapType(const ASTMapType *node, void *data) { + print("MAP<"); + node->key_type()->Accept(this, data); + print(","); + node->value_type()->Accept(this, data); + print(">"); + + if (node->type_parameters() != nullptr) { + node->type_parameters()->Accept(this, data); + } +} + +void Unparser::visitASTMapColumnSchema(const ASTMapColumnSchema *node, + void *data) { + print("MAP<"); + node->key_schema()->Accept(this, data); + print(","); + node->value_schema()->Accept(this, data); + print(">"); + UnparseColumnSchema(node, data); +} + } // namespace parser } // namespace zetasql diff --git a/zetasql/parser/unparser.h b/zetasql/parser/unparser.h index 6a24a4b9..509cca46 100644 --- a/zetasql/parser/unparser.h +++ b/zetasql/parser/unparser.h @@ -654,9 +654,12 @@ class Unparser : public ParseTreeVisitor { void visitASTAddOfflinePathAction(const ASTAddOfflinePathAction *node, void *data) override; void visitASTDropOfflinePathAction(const ASTDropOfflinePathAction *node, - void *data) override; + void *data) override; + void visitASTMapType(const ASTMapType *node, void *data) override; + void visitASTMapColumnSchema(const ASTMapColumnSchema *node, + void *data) override; - protected: +protected: // Set break_line to true if you want to print each child on a separate line. virtual void UnparseChildrenWithSeparator(const ASTNode* node, void* data, const std::string& separator,