diff --git a/scripts/check_license.py b/scripts/check_license.py index 96a2c83..37103d8 100755 --- a/scripts/check_license.py +++ b/scripts/check_license.py @@ -40,7 +40,7 @@ ".git", ".github", ".flake8", - ".gitignore" ".pre-commit-config.yaml", + ".gitignore", ".mypy_cache", ".mypy.ini", ".pylintrc", @@ -115,8 +115,8 @@ See the License for the specific language governing permissions and limitations under the License.""" -# A list of all chars that may be used to introduce a comment: -COMMENT_CHARS = ["#"] +# A list of strings that may be used to introduce a line comment: +LINE_COMMENTS = ["#"] AUTHOR = """Universität Tübingen, DKFZ, EMBL, and Universität zu Köln for the German Human Genome-Phenome Archive (GHGA)""" @@ -202,24 +202,25 @@ def get_target_files( file_ for file_ in all_files if not ( - any([file_.is_relative_to(excl) for excl in exclude_normalized]) - or any([str(file_).endswith(ending) for ending in exclude_endings]) - or any([re.match(pattern, str(file_)) for pattern in exclude_pattern]) + any(file_.is_relative_to(excl) for excl in exclude_normalized) + or any(str(file_).endswith(ending) for ending in exclude_endings) + or any(re.match(pattern, str(file_)) for pattern in exclude_pattern) ) ] return target_files -def normalized_line(line: str, chars_to_trim: list[str] = COMMENT_CHARS) -> str: - norm_line = line.strip() +def normalized_line(line: str, line_comments: list[str] = LINE_COMMENTS) -> str: + line = line.strip() + for line_comment in line_comments: + line_without_comment = line.removeprefix(line_comment) + if line_without_comment != line: + line = line_without_comment.lstrip() + break + return line - for char in chars_to_trim: - norm_line = norm_line.strip(char) - return norm_line.strip("\n").strip("\t").strip() - - -def normalized_text(text: str, chars_to_trim: list[str] = COMMENT_CHARS) -> str: +def normalized_text(text: str, line_comments: list[str] = LINE_COMMENTS) -> str: "Normalize a license header text." lines = text.split("\n") @@ -231,7 +232,7 @@ def normalized_text(text: str, chars_to_trim: list[str] = COMMENT_CHARS) -> str: if stripped_line.startswith("#!"): continue - norm_line = normalized_line(stripped_line) + norm_line = normalized_line(stripped_line, line_comments=line_comments) # exclude empty lines: if norm_line == "": @@ -249,22 +250,17 @@ def format_copyright_template(copyright_template: str, author: str) -> str: return normalized_text(copyright_template.replace("{author}", author)) -def is_commented_line(line: str, comment_chars: list[str] = COMMENT_CHARS) -> bool: +def is_commented_line(line: str, line_comments: list[str] = LINE_COMMENTS) -> bool: """Checks whether a line is a comment.""" - line_stripped = line.strip() - for comment_char in comment_chars: - if line_stripped.startswith(comment_char): - return True - - return False + return line.lstrip().startswith(tuple(line_comments)) def is_empty_line(line: str) -> bool: """Checks whether a line is empty.""" - return line.strip("\n").strip("\t").strip() == "" + return not line.strip() -def get_header(file_path: Path, comment_chars: list[str] = COMMENT_CHARS): +def get_header(file_path: Path, line_comments: list[str] = LINE_COMMENTS): """Extracts the header from a file and normalizes it.""" header_lines: list[str] = [] @@ -272,7 +268,7 @@ def get_header(file_path: Path, comment_chars: list[str] = COMMENT_CHARS): with open(file_path) as file: for line in file: if is_commented_line( - line, comment_chars=comment_chars + line, line_comments=line_comments ) or is_empty_line(line): header_lines.append(line) else: @@ -282,7 +278,7 @@ def get_header(file_path: Path, comment_chars: list[str] = COMMENT_CHARS): # normalize the lines: header = "".join(header_lines) - return normalized_text(header, chars_to_trim=comment_chars) + return normalized_text(header, line_comments=line_comments) def validate_year_string(year_string: str, min_year: int = MIN_YEAR) -> bool: @@ -317,7 +313,7 @@ def check_copyright_notice( global_copyright: GlobalCopyrightNotice, copyright_template: str = COPYRIGHT_TEMPLATE, author: str = AUTHOR, - comment_chars: list[str] = COMMENT_CHARS, + line_comments: list[str] = LINE_COMMENTS, min_year: int = MIN_YEAR, ) -> bool: """Checks the specified copyright text against a template. @@ -385,7 +381,7 @@ def check_file_headers( exclude: list[str] = EXCLUDE, exclude_endings: list[str] = EXCLUDE_ENDINGS, exclude_pattern: list[str] = EXCLUDE_PATTERN, - comment_chars: list[str] = COMMENT_CHARS, + line_comments: list[str] = LINE_COMMENTS, min_year: int = MIN_YEAR, ) -> tuple[list[Path], list[Path]]: """Check files for presence of a license header and verify that @@ -429,13 +425,13 @@ def check_file_headers( for target_file in target_files: try: - header = get_header(target_file, comment_chars=comment_chars) + header = get_header(target_file, line_comments=line_comments) if check_copyright_notice( copyright=header, global_copyright=global_copyright, copyright_template=copyright_template, author=author, - comment_chars=comment_chars, + line_comments=line_comments, min_year=min_year, ): passed_files.append(target_file) @@ -453,7 +449,7 @@ def check_license_file( global_copyright: GlobalCopyrightNotice, copyright_template: str = COPYRIGHT_TEMPLATE, author: str = AUTHOR, - comment_chars: list[str] = COMMENT_CHARS, + line_comments: list[str] = LINE_COMMENTS, min_year: int = MIN_YEAR, ) -> bool: """Currently only checks if the copyright notice in the @@ -495,7 +491,7 @@ def check_license_file( global_copyright=global_copyright, copyright_template=copyright_template, author=author, - comment_chars=comment_chars, + line_comments=line_comments, min_year=min_year, )