Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: parse keywords as names if qualified by period #620

Merged
merged 2 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Formatting Changes and Bug Fixes

- sqlfmt will now parse unquoted reserved keywords as names if they are qualified by a period, e.g., `foo.select` or `foo.case` ([#599](https://github.com/tconbeer/sqlfmt/issues/599) - thank you [@matthieucan](https://github.com/matthieucan)!).

## [0.22.0] - 2024-07-25

### Formatting Changes and Bug Fixes
Expand Down
44 changes: 42 additions & 2 deletions src/sqlfmt/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,41 @@ def handle_number(analyzer: "Analyzer", source_string: str, match: re.Match) ->
)


def handle_nonreserved_keyword(
def handle_reserved_keyword(
analyzer: "Analyzer",
source_string: str,
match: re.Match,
action: Callable[["Analyzer", str, re.Match], None],
) -> None:
"""
Reserved keywords can be used in most dialects as table or column
names without quoting if they are qualified.
https://github.com/tconbeer/sqlfmt/issues/599

This checks if the previous token is a period, and if so, lexes
this token as a name; otherwise this action executes the passed
action (which likely adds the node as some kind of keyword).
"""
if analyzer.previous_node is None:
action(analyzer, source_string, match)
return

previous_token, _ = get_previous_token(analyzer.previous_node)
if previous_token is not None and previous_token.type is TokenType.DOT:
token = Token.from_match(source_string, match, token_type=TokenType.NAME)
if not token.prefix:
node = analyzer.node_manager.create_node(
token=token, previous_node=analyzer.previous_node
)
analyzer.node_buffer.append(node)
analyzer.pos = token.epos
return

# in all other cases, this is a keyword.
action(analyzer, source_string, match)


def handle_nonreserved_top_level_keyword(
analyzer: "Analyzer",
source_string: str,
match: re.Match,
Expand All @@ -317,6 +351,10 @@ def handle_nonreserved_keyword(
"""
Checks to see if we're at depth 0 (assuming this is a name); if so, then take the
passed action, otherwise lex it as a name.

For example, this allows us to lex these differently:
explain select 1;
select explain, 1;
"""
token = Token.from_match(source_string, match, token_type=TokenType.NAME)
node = analyzer.node_manager.create_node(
Expand All @@ -326,7 +364,9 @@ def handle_nonreserved_keyword(
analyzer.node_buffer.append(node)
analyzer.pos = token.epos
else:
action(analyzer, source_string, match)
handle_reserved_keyword(
analyzer=analyzer, source_string=source_string, match=match, action=action
)


def lex_ruleset(
Expand Down
83 changes: 62 additions & 21 deletions src/sqlfmt/rules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,23 @@
priority=1000,
pattern=group(r"case") + group(r"\W", r"$"),
action=partial(
actions.add_node_to_buffer, token_type=TokenType.STATEMENT_START
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.STATEMENT_START
),
),
),
Rule(
name="statement_end",
priority=1010,
pattern=group(r"end") + group(r"\W", r"$"),
action=partial(
actions.safe_add_node_to_buffer,
token_type=TokenType.STATEMENT_END,
fallback_token_type=TokenType.NAME,
actions.handle_reserved_keyword,
action=partial(
actions.safe_add_node_to_buffer,
token_type=TokenType.STATEMENT_END,
fallback_token_type=TokenType.NAME,
),
),
),
Rule(
Expand Down Expand Up @@ -65,7 +71,12 @@
r"within\s+group",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
Rule(
name="star_replace_exclude",
Expand All @@ -76,8 +87,10 @@
)
+ group(r"\s+\("),
action=partial(
actions.add_node_to_buffer,
token_type=TokenType.WORD_OPERATOR,
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
Rule(
Expand All @@ -88,13 +101,21 @@
name="join_using",
priority=1110,
pattern=group(r"using") + group(r"\s*\("),
action=partial(actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
Rule(
name="on",
priority=1120,
pattern=group(r"on") + group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.ON),
action=partial(
actions.handle_reserved_keyword,
action=partial(actions.add_node_to_buffer, token_type=TokenType.ON),
),
),
Rule(
name="boolean_operator",
Expand All @@ -106,8 +127,10 @@
)
+ group(r"\W", r"$"),
action=partial(
actions.add_node_to_buffer,
token_type=TokenType.BOOLEAN_OPERATOR,
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.BOOLEAN_OPERATOR
),
),
),
Rule(
Expand Down Expand Up @@ -154,15 +177,25 @@
r"returning",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
Rule(
name="frame_clause",
priority=1305,
pattern=group(r"(range|rows|groups)\s+")
+ group(r"(between\s+)?((unbounded|\d+)\s+(preceding|following)|current\s+row)")
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
Rule(
# BQ arrays use an offset(n) function for
Expand All @@ -172,7 +205,12 @@
name="offset_keyword",
priority=1310,
pattern=group(r"offset") + group(r"\s+", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
Rule(
name="set_operator",
Expand All @@ -181,15 +219,18 @@
r"(union|intersect|except|minus)(\s+(all|distinct))?(\s+by\s+name)?",
)
+ group(r"\W", r"$"),
action=actions.handle_set_operator,
action=partial(
actions.handle_reserved_keyword,
action=actions.handle_set_operator,
),
),
Rule(
name="explain",
priority=2000,
pattern=group(r"explain(\s+(analyze|verbose|using\s+(tabular|json|text)))?")
+ group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
Expand All @@ -200,7 +241,7 @@
priority=2010,
pattern=group(r"grant", r"revoke") + group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(actions.lex_ruleset, new_ruleset=GRANT),
),
),
Expand All @@ -209,7 +250,7 @@
priority=2015,
pattern=group(CREATE_CLONABLE + r"\s+.+?\s+clone") + group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(
actions.lex_ruleset,
new_ruleset=CLONE,
Expand All @@ -221,7 +262,7 @@
priority=2020,
pattern=group(CREATE_FUNCTION, ALTER_DROP_FUNCTION) + group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(
actions.lex_ruleset,
new_ruleset=FUNCTION,
Expand All @@ -237,7 +278,7 @@
)
+ group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(
actions.lex_ruleset,
new_ruleset=WAREHOUSE,
Expand Down Expand Up @@ -301,7 +342,7 @@
+ r"(?!\()"
+ group(r"\W", r"$"),
action=partial(
actions.handle_nonreserved_keyword,
actions.handle_nonreserved_top_level_keyword,
action=partial(actions.add_node_to_buffer, token_type=TokenType.FMT_OFF),
),
),
Expand Down
14 changes: 12 additions & 2 deletions src/sqlfmt/rules/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
name="unterm_keyword",
priority=1300,
pattern=group(CREATE_CLONABLE, r"clone") + group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
Rule(
name="word_operator",
Expand All @@ -22,6 +27,11 @@
r"before",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
]
19 changes: 16 additions & 3 deletions src/sqlfmt/rules/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
r"as",
)
+ group(r"\W", r"$"),
action=actions.handle_ddl_as,
action=partial(
actions.handle_reserved_keyword,
action=actions.handle_ddl_as,
),
),
Rule(
name="word_operator",
Expand All @@ -27,7 +30,12 @@
r"runtime_version",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.WORD_OPERATOR
),
),
),
Rule(
name="unterm_keyword",
Expand Down Expand Up @@ -85,6 +93,11 @@
r"(re)?set(\s+all)?",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
]
7 changes: 6 additions & 1 deletion src/sqlfmt/rules/grant.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
r"restrict",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
]
7 changes: 6 additions & 1 deletion src/sqlfmt/rules/warehouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@
r"rename\s+to",
)
+ group(r"\W", r"$"),
action=partial(actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD),
action=partial(
actions.handle_reserved_keyword,
action=partial(
actions.add_node_to_buffer, token_type=TokenType.UNTERM_KEYWORD
),
),
),
]
2 changes: 1 addition & 1 deletion src/sqlfmt_primer/primer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_projects() -> List[SQLProject]:
SQLProject(
name="gitlab",
git_url="https://github.com/tconbeer/gitlab-analytics-sqlfmt.git",
git_ref="b1935f4", # sqlfmt 54b8edd
git_ref="5cd49f6", # sqlfmt aed0f39
expected_changed=1,
expected_unchanged=2416,
expected_errored=0,
Expand Down
21 changes: 21 additions & 0 deletions tests/data/preformatted/008_reserved_names.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
with
"select" as (select * from foo.select),
"case" as (select * from foo.case),
"end" as (select * from foo.end),
"as" as (select * from foo.as),
"interval" as (select * from foo.interval),
"exclude" as (select * from foo.exclude),
"using" as (select * from foo.using),
"on" as (select * from foo.on),
"and" as (select * from foo.and),
"limit" as (select * from foo.limit),
"over" as (select * from foo.over),
"between" as (select * from foo.between),
"union" as (select * from foo.union),
"explain" as (select * from foo.explain),
"grant" as (select * from foo.grant),
"create" as (select * from foo.create),
"alter" as (select * from foo.alter),
"truncate" as (select * from foo.truncate),
"drop" as (select * from foo.drop)
select 1
Loading
Loading