fix: broken directory expansion (#14)

Python implementation doesn't handle globbing correctly, while the Rust version did (sort of). Turns out the Rust library's globbing doesn't match the git globbing, exactly, but it did a better job than the initial python implementation. Ended up porting a [Go implementation][0] of code owners. Pretty much line for line except some minor differences in Go's regex vs Python's. fixes: #13 [0]: https://github.com/hmarr/codeowners/blob/d0452091447bd2a29ee508eebc5a79874fb5d4ff/match.go#L33
sbdchd · Jan 28, 2021 · b4bebd2 · b4bebd2
1 parent e874f73
commit b4bebd2
Show file tree

Hide file tree

Showing 3 changed files with 545 additions and 273 deletions.
diff --git a/codeowners/__init__.py b/codeowners/__init__.py
@@ -1,8 +1,4 @@
-"""
-Python port of https://github.com/softprops/codeowners
-"""
 import re
-from pathlib import PurePath
 from typing import List, Optional, Pattern, Tuple
 
 from typing_extensions import Literal
@@ -17,28 +13,104 @@
 EMAIL = re.compile(r"^\S+@\S+")
 
 
-def path_to_regex(path: str) -> Pattern[str]:
-    if path == "*":
-        return re.compile(".*")
+def path_to_regex(pattern: str) -> Pattern[str]:
+    """
+    ported from https://github.com/hmarr/codeowners/blob/d0452091447bd2a29ee508eebc5a79874fb5d4ff/match.go#L33
 
-    if path.endswith("/"):
-        end = ".*$"
-    elif path.endswith("*"):
-        path = path.rstrip("*")
-        end = "[^/]*"
-    else:
-        end = "$"
-
-    if path.startswith("/"):
-        path = path.lstrip("/")
-        start = "^/?"
-    elif path.startswith("*"):
-        path = path.lstrip("*")
-        start = ".*"
+    MIT License
+
+    Copyright (c) 2020 Harry Marr
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE.
+    """
+    regex = ""
+
+    try:
+        slash_pos = pattern.index("/")
+        anchored = slash_pos != len(pattern) - 1
+    except ValueError:
+        anchored = False
+
+    if anchored:
+        regex += r"\A"
     else:
-        start = ".*/?"
+        regex += r"(?:\A|/)"
+
+    matches_dir = pattern[-1] == "/"
+    pattern_trimmed = pattern.strip("/")
+
+    in_char_class = False
+    escaped = False
+
+    # NOTE: this is an ugly hack so we can skip a letter in the loop, maybe
+    # refactor using generators or similar?
+    i = -1
+    while i < len(pattern_trimmed) - 1:
+        i += 1
+        ch = pattern_trimmed[i]
+
+        if escaped:
+            regex += re.escape(ch)
+            escaped = False
+            continue
+
+        if ch == "\\":
+            escaped = True
+        elif ch == "*":
+            if i + 1 < len(pattern_trimmed) and pattern_trimmed[i + 1] == "*":
+                left_anchored = i == 0
+                leading_slash = i > 0 and pattern_trimmed[i - 1] == "/"
+                right_anchored = i + 2 == len(pattern_trimmed)
+                trailing_slash = (
+                    i + 2 < len(pattern_trimmed) and pattern_trimmed[i + 2] == "/"
+                )
+
+                if (left_anchored or leading_slash) and (
+                    right_anchored or trailing_slash
+                ):
+                    regex += ".*"
+
+                    i += 2
+                    continue
+            regex += "[^/]*"
+        elif ch == "?":
+            regex += "[^/]"
+        elif ch == "[":
+            in_char_class = True
+            regex += ch
+        elif ch == "]":
+            if in_char_class:
+                regex += ch
+                in_char_class = False
+            else:
+                regex += re.escape(ch)
+        else:
+            regex += re.escape(ch)
+
+    if in_char_class:
+        raise ValueError(f"unterminated character class in pattern {pattern}")
 
-    return re.compile(start + re.escape(path) + end)
+    if matches_dir:
+        regex += "/"
+    else:
+        regex += r"(?:\Z|/)"
+    return re.compile(regex)
 
 
 def parse_owner(owner: str) -> Optional[OwnerTuple]:
@@ -51,44 +123,27 @@ def parse_owner(owner: str) -> Optional[OwnerTuple]:
     return None
 
 
-def pattern_matches(path: str, pattern: Pattern[str]) -> bool:
-    match = pattern.match(path)
-    # The regex we compile from the paths are required to match competely for
-    # the match to count.
-    return match is not None and match.span() == (0, len(path))
-
-
 class CodeOwners:
     def __init__(self, text: str) -> None:
-        paths: List[Tuple[Pattern[str], str, List[OwnerTuple]]] = []
+        paths: List[Tuple[Pattern[str], List[OwnerTuple]]] = []
         for line in text.splitlines():
-            if line != "" and not line.startswith("#"):
-                elements = iter(line.split())
-                path = next(elements, None)
-                if path is not None:
-                    owners: List[OwnerTuple] = []
-                    for owner in elements:
-                        owner_res = parse_owner(owner)
-                        if owner_res is not None:
-                            owners.append(owner_res)
-                    paths.append((path_to_regex(path), path, owners))
+            if line == "" or line.startswith("#"):
+                continue
+            elements = iter(line.split())
+            path = next(elements, None)
+            if path is None:
+                continue
+            owners: List[OwnerTuple] = []
+            for owner in elements:
+                owner_res = parse_owner(owner)
+                if owner_res is not None:
+                    owners.append(owner_res)
+            paths.append((path_to_regex(path), owners))
         paths.reverse()
         self.paths = paths
 
     def of(self, filepath: str) -> List[OwnerTuple]:
-        for pattern, path, owners in self.paths:
-            if pattern_matches(filepath, pattern):
+        for pattern, owners in self.paths:
+            if pattern.search(filepath) is not None:
                 return owners
-            else:
-                if path.endswith("/*"):
-                    continue
-                p = PurePath(filepath)
-                while True:
-                    parent = p.parent
-                    if parent == PurePath("/") or parent == PurePath("."):
-                        break
-                    if pattern_matches(str(parent), pattern):
-                        return owners
-                    else:
-                        p = parent
         return []