Skip to content

Commit

Permalink
fix: broken directory expansion (#14)
Browse files Browse the repository at this point in the history
Python implementation doesn't handle globbing correctly, while the Rust
version did (sort of).

Turns out the Rust library's globbing doesn't match the git globbing, exactly, 
but it did a better job than the initial python implementation.

Ended up porting a [Go implementation][0] of code owners. Pretty much
line for line except some minor differences in Go's regex vs Python's.

fixes: #13

[0]: https://github.com/hmarr/codeowners/blob/d0452091447bd2a29ee508eebc5a79874fb5d4ff/match.go#L33
  • Loading branch information
sbdchd authored Jan 28, 2021
1 parent e874f73 commit b4bebd2
Show file tree
Hide file tree
Showing 3 changed files with 545 additions and 273 deletions.
165 changes: 110 additions & 55 deletions codeowners/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
"""
Python port of https://github.com/softprops/codeowners
"""
import re
from pathlib import PurePath
from typing import List, Optional, Pattern, Tuple

from typing_extensions import Literal
Expand All @@ -17,28 +13,104 @@
EMAIL = re.compile(r"^\S+@\S+")


def path_to_regex(path: str) -> Pattern[str]:
if path == "*":
return re.compile(".*")
def path_to_regex(pattern: str) -> Pattern[str]:
"""
ported from https://github.com/hmarr/codeowners/blob/d0452091447bd2a29ee508eebc5a79874fb5d4ff/match.go#L33
if path.endswith("/"):
end = ".*$"
elif path.endswith("*"):
path = path.rstrip("*")
end = "[^/]*"
else:
end = "$"

if path.startswith("/"):
path = path.lstrip("/")
start = "^/?"
elif path.startswith("*"):
path = path.lstrip("*")
start = ".*"
MIT License
Copyright (c) 2020 Harry Marr
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""
regex = ""

try:
slash_pos = pattern.index("/")
anchored = slash_pos != len(pattern) - 1
except ValueError:
anchored = False

if anchored:
regex += r"\A"
else:
start = ".*/?"
regex += r"(?:\A|/)"

matches_dir = pattern[-1] == "/"
pattern_trimmed = pattern.strip("/")

in_char_class = False
escaped = False

# NOTE: this is an ugly hack so we can skip a letter in the loop, maybe
# refactor using generators or similar?
i = -1
while i < len(pattern_trimmed) - 1:
i += 1
ch = pattern_trimmed[i]

if escaped:
regex += re.escape(ch)
escaped = False
continue

if ch == "\\":
escaped = True
elif ch == "*":
if i + 1 < len(pattern_trimmed) and pattern_trimmed[i + 1] == "*":
left_anchored = i == 0
leading_slash = i > 0 and pattern_trimmed[i - 1] == "/"
right_anchored = i + 2 == len(pattern_trimmed)
trailing_slash = (
i + 2 < len(pattern_trimmed) and pattern_trimmed[i + 2] == "/"
)

if (left_anchored or leading_slash) and (
right_anchored or trailing_slash
):
regex += ".*"

i += 2
continue
regex += "[^/]*"
elif ch == "?":
regex += "[^/]"
elif ch == "[":
in_char_class = True
regex += ch
elif ch == "]":
if in_char_class:
regex += ch
in_char_class = False
else:
regex += re.escape(ch)
else:
regex += re.escape(ch)

if in_char_class:
raise ValueError(f"unterminated character class in pattern {pattern}")

return re.compile(start + re.escape(path) + end)
if matches_dir:
regex += "/"
else:
regex += r"(?:\Z|/)"
return re.compile(regex)


def parse_owner(owner: str) -> Optional[OwnerTuple]:
Expand All @@ -51,44 +123,27 @@ def parse_owner(owner: str) -> Optional[OwnerTuple]:
return None


def pattern_matches(path: str, pattern: Pattern[str]) -> bool:
match = pattern.match(path)
# The regex we compile from the paths are required to match competely for
# the match to count.
return match is not None and match.span() == (0, len(path))


class CodeOwners:
def __init__(self, text: str) -> None:
paths: List[Tuple[Pattern[str], str, List[OwnerTuple]]] = []
paths: List[Tuple[Pattern[str], List[OwnerTuple]]] = []
for line in text.splitlines():
if line != "" and not line.startswith("#"):
elements = iter(line.split())
path = next(elements, None)
if path is not None:
owners: List[OwnerTuple] = []
for owner in elements:
owner_res = parse_owner(owner)
if owner_res is not None:
owners.append(owner_res)
paths.append((path_to_regex(path), path, owners))
if line == "" or line.startswith("#"):
continue
elements = iter(line.split())
path = next(elements, None)
if path is None:
continue
owners: List[OwnerTuple] = []
for owner in elements:
owner_res = parse_owner(owner)
if owner_res is not None:
owners.append(owner_res)
paths.append((path_to_regex(path), owners))
paths.reverse()
self.paths = paths

def of(self, filepath: str) -> List[OwnerTuple]:
for pattern, path, owners in self.paths:
if pattern_matches(filepath, pattern):
for pattern, owners in self.paths:
if pattern.search(filepath) is not None:
return owners
else:
if path.endswith("/*"):
continue
p = PurePath(filepath)
while True:
parent = p.parent
if parent == PurePath("/") or parent == PurePath("."):
break
if pattern_matches(str(parent), pattern):
return owners
else:
p = parent
return []
Loading

0 comments on commit b4bebd2

Please sign in to comment.