Skip to content

Commit

Permalink
Handle comments with annotations
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 679548102
  • Loading branch information
AleksMat authored and copybara-github committed Oct 2, 2024
1 parent d61d957 commit d7701ec
Show file tree
Hide file tree
Showing 11 changed files with 475 additions and 184 deletions.
489 changes: 369 additions & 120 deletions patches/pyink.patch

Large diffs are not rendered by default.

24 changes: 23 additions & 1 deletion src/pyink/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,13 @@
from pyink.lines import EmptyLineTracker, LinesBlock
from pyink.mode import FUTURE_FLAG_TO_FEATURE, Feature, VERSION_TO_FEATURES
from pyink.mode import Mode as Mode # re-exported
from pyink.mode import Preview, QuoteStyle, TargetVersion, supports_feature
from pyink.mode import (
DEFAULT_ANNOTATION_PRAGMAS,
Preview,
QuoteStyle,
TargetVersion,
supports_feature,
)
from pyink.nodes import STARS, is_number_token, is_simple_decorator_expression, syms
from pyink.output import color_diff, diff, dump_to_file, err, ipynb_diff, out
from pyink.parsing import ( # noqa F401
Expand Down Expand Up @@ -368,6 +374,18 @@ def validate_regex(
" notebook."
),
)
@click.option(
"--pyink-annotation-pragmas",
type=str,
multiple=True,
help=(
"Pyink won't split too long lines if they contain a comment with any of"
" the given annotation pragmas because it doesn't know to which part of"
" the line the annotation applies. The default annotation pragmas are:"
f" {', '.join(DEFAULT_ANNOTATION_PRAGMAS)}."
),
default=[],
)
@click.option(
"--pyink-use-majority-quotes",
is_flag=True,
Expand Down Expand Up @@ -580,6 +598,7 @@ def main( # noqa: C901
pyink: bool,
pyink_indentation: str,
pyink_ipynb_indentation: str,
pyink_annotation_pragmas: List[str],
pyink_use_majority_quotes: bool,
quiet: bool,
verbose: bool,
Expand Down Expand Up @@ -681,6 +700,9 @@ def main( # noqa: C901
is_pyink=pyink,
pyink_indentation=int(pyink_indentation),
pyink_ipynb_indentation=int(pyink_ipynb_indentation),
pyink_annotation_pragmas=(
tuple(pyink_annotation_pragmas) or DEFAULT_ANNOTATION_PRAGMAS
),
quote_style=(
QuoteStyle.MAJORITY if pyink_use_majority_quotes else QuoteStyle.DOUBLE
),
Expand Down
5 changes: 3 additions & 2 deletions src/pyink/comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from functools import lru_cache
from typing import Collection, Final, Iterator, List, Optional, Tuple, Union

from pyink import ink_comments
from pyink.mode import Mode, Preview
from pyink.nodes import (
CLOSING_BRACKETS,
Expand Down Expand Up @@ -376,15 +377,15 @@ def children_contains_fmt_on(container: LN) -> bool:
return False


def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
def contains_pragma_comment(comment_list: List[Leaf], mode: Mode) -> bool:
"""
Returns:
True iff one of the comments in @comment_list is a pragma used by one
of the more common static analysis tools for python (e.g. mypy, flake8,
pylint).
"""
for comment in comment_list:
if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
if ink_comments.comment_contains_pragma(comment.value, mode):
return True

return False
Expand Down
30 changes: 30 additions & 0 deletions src/pyink/ink_comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Module that provides utilities related to comments.
This is separate from pyink.ink to avoid circular dependencies.
"""

import re

from pyink.mode import Mode


def comment_contains_pragma(comment: str, mode: Mode) -> bool:
"""Check if the given string contains one of the pragma forms.
A pragma form can appear at the beginning of a comment:
# pytype: disable=attribute-error
or somewhere in the middle:
# some comment # type: ignore # another comment
or the comments can even be separated by a semicolon:
# some comment; noqa: E111; another comment
Args:
comment: The comment to check.
mode: The mode that defines which pragma forms to check for.
Returns:
True if the comment contains one of the pragma forms.
"""
joined_pragma_expression = "|".join(mode.pyink_annotation_pragmas)
pragma_regex = re.compile(rf"([#|;] ?(?:{joined_pragma_expression}))")
return pragma_regex.search(comment) is not None
33 changes: 22 additions & 11 deletions src/pyink/linegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
else:
from typing import Final, Literal

from pyink import ink_comments
from pyink.brackets import (
COMMA_PRIORITY,
DOT_PRIORITY,
Expand Down Expand Up @@ -61,7 +62,6 @@
is_stub_body,
is_stub_suite,
is_tuple_containing_walrus,
is_type_ignore_comment_string,
is_vararg,
is_walrus_assignment,
is_yield,
Expand Down Expand Up @@ -272,6 +272,7 @@ def visit_dictsetmaker(self, node: Node) -> Iterator[Line]:
maybe_make_parens_invisible_in_atom(
child,
parent=node,
mode=self.mode,
remove_brackets_around_comma=False,
)
else:
Expand All @@ -292,6 +293,7 @@ def visit_funcdef(self, node: Node) -> Iterator[Line]:
if maybe_make_parens_invisible_in_atom(
child,
parent=node,
mode=self.mode,
remove_brackets_around_comma=False,
):
wrap_in_parentheses(node, child, visible=False)
Expand Down Expand Up @@ -395,7 +397,7 @@ def visit_power(self, node: Node) -> Iterator[Line]:
):
wrap_in_parentheses(node, leaf)

remove_await_parens(node)
remove_await_parens(node, mode=self.mode)

yield from self.visit_default(node)

Expand Down Expand Up @@ -442,7 +444,9 @@ def foo(a: int, b: float = 7): ...
def foo(a: (int), b: (float) = 7): ...
"""
assert len(node.children) == 3
if maybe_make_parens_invisible_in_atom(node.children[2], parent=node):
if maybe_make_parens_invisible_in_atom(
node.children[2], parent=node, mode=self.mode
):
wrap_in_parentheses(node, node.children[2], visible=False)

yield from self.visit_default(node)
Expand Down Expand Up @@ -688,7 +692,7 @@ def transform_line(

transformers: List[Transformer]
if (
not line.contains_uncollapsable_type_comments()
not line.contains_uncollapsable_pragma_comments()
and not line.should_split_rhs
and not line.magic_trailing_comma
and (
Expand Down Expand Up @@ -1451,15 +1455,17 @@ def normalize_invisible_parens( # noqa: C901
if maybe_make_parens_invisible_in_atom(
child,
parent=node,
mode=mode,
remove_brackets_around_comma=True,
):
wrap_in_parentheses(node, child, visible=False)
elif isinstance(child, Node) and node.type == syms.with_stmt:
remove_with_parens(child, node)
remove_with_parens(child, node, mode=mode)
elif child.type == syms.atom:
if maybe_make_parens_invisible_in_atom(
child,
parent=node,
mode=mode,
):
wrap_in_parentheses(node, child, visible=False)
elif is_one_tuple(child):
Expand Down Expand Up @@ -1511,7 +1517,7 @@ def _normalize_import_from(parent: Node, child: LN, index: int) -> None:
parent.append_child(Leaf(token.RPAR, ""))


def remove_await_parens(node: Node) -> None:
def remove_await_parens(node: Node, mode: Mode) -> None:
if node.children[0].type == token.AWAIT and len(node.children) > 1:
if (
node.children[1].type == syms.atom
Expand All @@ -1520,6 +1526,7 @@ def remove_await_parens(node: Node) -> None:
if maybe_make_parens_invisible_in_atom(
node.children[1],
parent=node,
mode=mode,
remove_brackets_around_comma=True,
):
wrap_in_parentheses(node, node.children[1], visible=False)
Expand Down Expand Up @@ -1588,7 +1595,7 @@ def _maybe_wrap_cms_in_parens(
node.insert_child(1, new_child)


def remove_with_parens(node: Node, parent: Node) -> None:
def remove_with_parens(node: Node, parent: Node, mode: Mode) -> None:
"""Recursively hide optional parens in `with` statements."""
# Removing all unnecessary parentheses in with statements in one pass is a tad
# complex as different variations of bracketed statements result in pretty
Expand All @@ -1610,21 +1617,23 @@ def remove_with_parens(node: Node, parent: Node) -> None:
if maybe_make_parens_invisible_in_atom(
node,
parent=parent,
mode=mode,
remove_brackets_around_comma=True,
):
wrap_in_parentheses(parent, node, visible=False)
if isinstance(node.children[1], Node):
remove_with_parens(node.children[1], node)
remove_with_parens(node.children[1], node, mode=mode)
elif node.type == syms.testlist_gexp:
for child in node.children:
if isinstance(child, Node):
remove_with_parens(child, node)
remove_with_parens(child, node, mode=mode)
elif node.type == syms.asexpr_test and not any(
leaf.type == token.COLONEQUAL for leaf in node.leaves()
):
if maybe_make_parens_invisible_in_atom(
node.children[0],
parent=node,
mode=mode,
remove_brackets_around_comma=True,
):
wrap_in_parentheses(node, node.children[0], visible=False)
Expand All @@ -1633,6 +1642,7 @@ def remove_with_parens(node: Node, parent: Node) -> None:
def maybe_make_parens_invisible_in_atom(
node: LN,
parent: LN,
mode: Mode,
remove_brackets_around_comma: bool = False,
) -> bool:
"""If it's safe, make the parens in the atom `node` invisible, recursively.
Expand Down Expand Up @@ -1682,7 +1692,7 @@ def maybe_make_parens_invisible_in_atom(
if (
# If the prefix of `middle` includes a type comment with
# ignore annotation, then we do not remove the parentheses
not is_type_ignore_comment_string(middle.prefix.strip())
not ink_comments.comment_contains_pragma(middle.prefix.strip(), mode)
):
first.value = ""
if first.prefix.strip():
Expand All @@ -1692,6 +1702,7 @@ def maybe_make_parens_invisible_in_atom(
maybe_make_parens_invisible_in_atom(
middle,
parent=parent,
mode=mode,
remove_brackets_around_comma=remove_brackets_around_comma,
)

Expand Down Expand Up @@ -1835,7 +1846,7 @@ def run_transformer(
or not line.bracket_tracker.invisible
or any(bracket.value for bracket in line.bracket_tracker.invisible)
or line.contains_multiline_strings()
or result[0].contains_uncollapsable_type_comments()
or result[0].contains_uncollapsable_pragma_comments()
or result[0].contains_unsplittable_type_ignore()
or is_line_short_enough(result[0], mode=mode)
# If any leaves have no parents (which _can_ occur since
Expand Down
52 changes: 9 additions & 43 deletions src/pyink/lines.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from enum import Enum, auto
import itertools
import math
import re
from dataclasses import dataclass, field
from typing import (
Callable,
Expand Down Expand Up @@ -30,7 +29,7 @@
is_multiline_string,
is_one_sequence_between,
is_type_comment,
is_type_ignore_comment,
is_pragma_comment,
is_with_or_async_with_stmt,
make_simple_prefix,
replace_child,
Expand All @@ -47,9 +46,6 @@
LeafID = int
LN = Union[Leaf, Node]

# This regex should contain a single capture group capturing the entire match.
_PRAGMA_REGEX = re.compile("( *# (?:pylint|pytype):)")


class Indentation(Enum):
SCOPE = auto() # Scope indentation.
Expand Down Expand Up @@ -293,7 +289,7 @@ def contains_implicit_multiline_string_with_comments(self) -> bool:
return True
return False

def contains_uncollapsable_type_comments(self) -> bool:
def contains_uncollapsable_pragma_comments(self) -> bool:
ignored_ids = set()
try:
last_leaf = self.leaves[-1]
Expand All @@ -318,11 +314,9 @@ def contains_uncollapsable_type_comments(self) -> bool:
comment_seen = False
for leaf_id, comments in self.comments.items():
for comment in comments:
if is_type_comment(comment):
if comment_seen or (
not is_type_ignore_comment(comment)
and leaf_id not in ignored_ids
):
is_pragma = is_pragma_comment(comment, self.mode)
if is_type_comment(comment) or is_pragma:
if comment_seen or (not is_pragma and leaf_id not in ignored_ids):
return True

comment_seen = True
Expand Down Expand Up @@ -357,34 +351,11 @@ def contains_unsplittable_type_ignore(self) -> bool:
# line.
for node in self.leaves[-2:]:
for comment in self.comments.get(id(node), []):
if is_type_ignore_comment(comment):
if is_pragma_comment(comment, self.mode):
return True

return False

def trailing_pragma_comment_length(self) -> int:
if not self.leaves:
return 0

trailing_comments = self.comments.get(id(self.leaves[-1]), [])
if (
not trailing_comments
and len(self.leaves) > 1
and self.leaves[-1].type == token.RPAR
and not self.leaves[-1].value
):
# When last leaf is an invisible paren, the trailing comment is
# attached to the leaf before.
trailing_comments = self.comments.get(id(self.leaves[-2]), [])
length = 0
for comment in trailing_comments:
# str(comment) contains the whitespace preceding the `#`
comment_str = str(comment)
parts = _PRAGMA_REGEX.split(comment_str, maxsplit=1)
if len(parts) == 3:
length += len(parts[1]) + len(parts[2])
return length

def contains_multiline_strings(self) -> bool:
return any(is_multiline_string(leaf) for leaf in self.leaves)

Expand Down Expand Up @@ -730,7 +701,7 @@ def _maybe_empty_lines(self, current_line: Line) -> Tuple[int, int]: # noqa: C9
or self.previous_line.is_fmt_pass_converted(
first_leaf_matches=is_import
)
)
)
and not current_line.is_import
and not (
# Should not add empty lines before a STANDALONE_COMMENT.
Expand Down Expand Up @@ -878,14 +849,9 @@ def is_line_short_enough( # noqa: C901
if not line_str:
line_str = line_to_string(line)

if line.mode.is_pyink:
effective_length = str_width(line_str) - line.trailing_pragma_comment_length()
else:
effective_length = str_width(line_str)

if Preview.multiline_string_handling not in mode:
return (
effective_length <= mode.line_length
str_width(line_str) <= mode.line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)
Expand All @@ -894,7 +860,7 @@ def is_line_short_enough( # noqa: C901
return False
if "\n" not in line_str:
# No multiline strings (MLS) present
return effective_length <= mode.line_length
return str_width(line_str) <= mode.line_length

first, *_, last = line_str.split("\n")
if str_width(first) > mode.line_length or str_width(last) > mode.line_length:
Expand Down
Loading

0 comments on commit d7701ec

Please sign in to comment.