Update Template

ghga-de · May 14, 2024 · 43df0ee · 43df0ee
1 parent d042e66
commit 43df0ee
Showing 1 changed file with 28 additions and 32 deletions.
diff --git a/scripts/check_license.py b/scripts/check_license.py
@@ -40,7 +40,7 @@
     ".git",
     ".github",
     ".flake8",
-    ".gitignore" ".pre-commit-config.yaml",
+    ".gitignore",
     ".mypy_cache",
     ".mypy.ini",
     ".pylintrc",
@@ -115,8 +115,8 @@
 See the License for the specific language governing permissions and
 limitations under the License."""
 
-# A list of all chars that may be used to introduce a comment:
-COMMENT_CHARS = ["#"]
+# A list of strings that may be used to introduce a line comment:
+LINE_COMMENTS = ["#"]
 
 AUTHOR = """Universität Tübingen, DKFZ, EMBL, and Universität zu Köln
 for the German Human Genome-Phenome Archive (GHGA)"""
@@ -202,24 +202,25 @@ def get_target_files(
         file_
         for file_ in all_files
         if not (
-            any([file_.is_relative_to(excl) for excl in exclude_normalized])
-            or any([str(file_).endswith(ending) for ending in exclude_endings])
-            or any([re.match(pattern, str(file_)) for pattern in exclude_pattern])
+            any(file_.is_relative_to(excl) for excl in exclude_normalized)
+            or any(str(file_).endswith(ending) for ending in exclude_endings)
+            or any(re.match(pattern, str(file_)) for pattern in exclude_pattern)
         )
     ]
     return target_files
 
 
-def normalized_line(line: str, chars_to_trim: list[str] = COMMENT_CHARS) -> str:
-    norm_line = line.strip()
+def normalized_line(line: str, line_comments: list[str] = LINE_COMMENTS) -> str:
+    line = line.strip()
+    for line_comment in line_comments:
+        line_without_comment = line.removeprefix(line_comment)
+        if line_without_comment != line:
+            line = line_without_comment.lstrip()
+            break
+    return line
 
-    for char in chars_to_trim:
-        norm_line = norm_line.strip(char)
 
-    return norm_line.strip("\n").strip("\t").strip()
-
-
-def normalized_text(text: str, chars_to_trim: list[str] = COMMENT_CHARS) -> str:
+def normalized_text(text: str, line_comments: list[str] = LINE_COMMENTS) -> str:
     "Normalize a license header text."
     lines = text.split("\n")
 
@@ -231,7 +232,7 @@ def normalized_text(text: str, chars_to_trim: list[str] = COMMENT_CHARS) -> str:
         if stripped_line.startswith("#!"):
             continue
 
-        norm_line = normalized_line(stripped_line)
+        norm_line = normalized_line(stripped_line, line_comments=line_comments)
 
         # exclude empty lines:
         if norm_line == "":
@@ -249,30 +250,25 @@ def format_copyright_template(copyright_template: str, author: str) -> str:
     return normalized_text(copyright_template.replace("{author}", author))
 
 
-def is_commented_line(line: str, comment_chars: list[str] = COMMENT_CHARS) -> bool:
+def is_commented_line(line: str, line_comments: list[str] = LINE_COMMENTS) -> bool:
     """Checks whether a line is a comment."""
-    line_stripped = line.strip()
-    for comment_char in comment_chars:
-        if line_stripped.startswith(comment_char):
-            return True
-
-    return False
+    return line.lstrip().startswith(tuple(line_comments))
 
 
 def is_empty_line(line: str) -> bool:
     """Checks whether a line is empty."""
-    return line.strip("\n").strip("\t").strip() == ""
+    return not line.strip()
 
 
-def get_header(file_path: Path, comment_chars: list[str] = COMMENT_CHARS):
+def get_header(file_path: Path, line_comments: list[str] = LINE_COMMENTS):
     """Extracts the header from a file and normalizes it."""
     header_lines: list[str] = []
 
     try:
         with open(file_path) as file:
             for line in file:
                 if is_commented_line(
-                    line, comment_chars=comment_chars
+                    line, line_comments=line_comments
                 ) or is_empty_line(line):
                     header_lines.append(line)
                 else:
@@ -282,7 +278,7 @@ def get_header(file_path: Path, comment_chars: list[str] = COMMENT_CHARS):
 
     # normalize the lines:
     header = "".join(header_lines)
-    return normalized_text(header, chars_to_trim=comment_chars)
+    return normalized_text(header, line_comments=line_comments)
 
 
 def validate_year_string(year_string: str, min_year: int = MIN_YEAR) -> bool:
@@ -317,7 +313,7 @@ def check_copyright_notice(
     global_copyright: GlobalCopyrightNotice,
     copyright_template: str = COPYRIGHT_TEMPLATE,
     author: str = AUTHOR,
-    comment_chars: list[str] = COMMENT_CHARS,
+    line_comments: list[str] = LINE_COMMENTS,
     min_year: int = MIN_YEAR,
 ) -> bool:
     """Checks the specified copyright text against a template.
@@ -385,7 +381,7 @@ def check_file_headers(
     exclude: list[str] = EXCLUDE,
     exclude_endings: list[str] = EXCLUDE_ENDINGS,
     exclude_pattern: list[str] = EXCLUDE_PATTERN,
-    comment_chars: list[str] = COMMENT_CHARS,
+    line_comments: list[str] = LINE_COMMENTS,
     min_year: int = MIN_YEAR,
 ) -> tuple[list[Path], list[Path]]:
     """Check files for presence of a license header and verify that
@@ -429,13 +425,13 @@ def check_file_headers(
 
     for target_file in target_files:
         try:
-            header = get_header(target_file, comment_chars=comment_chars)
+            header = get_header(target_file, line_comments=line_comments)
             if check_copyright_notice(
                 copyright=header,
                 global_copyright=global_copyright,
                 copyright_template=copyright_template,
                 author=author,
-                comment_chars=comment_chars,
+                line_comments=line_comments,
                 min_year=min_year,
             ):
                 passed_files.append(target_file)
@@ -453,7 +449,7 @@ def check_license_file(
     global_copyright: GlobalCopyrightNotice,
     copyright_template: str = COPYRIGHT_TEMPLATE,
     author: str = AUTHOR,
-    comment_chars: list[str] = COMMENT_CHARS,
+    line_comments: list[str] = LINE_COMMENTS,
     min_year: int = MIN_YEAR,
 ) -> bool:
     """Currently only checks if the copyright notice in the
@@ -495,7 +491,7 @@ def check_license_file(
         global_copyright=global_copyright,
         copyright_template=copyright_template,
         author=author,
-        comment_chars=comment_chars,
+        line_comments=line_comments,
         min_year=min_year,
     )