forked from apache/doris-website
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_move.py
executable file
·126 lines (98 loc) · 4.23 KB
/
check_move.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import subprocess
import re
import os
import sys
from typing import AnyStr, List
from urllib.parse import urlparse
move_pairs = []
deletes = []
change_detected = False
def is_same_file(path1, path2):
return os.path.normpath(path1) == os.path.normpath(path2)
def remove_suffix(text: str, suffix: str):
return text.rsplit(suffix, 1)[0]
def process_md_file(file_path):
link_pattern = re.compile(r"\[.*?\]\((.*?)\)")
global change_detected
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
links = link_pattern.findall(content)
for link in links:
if not urlparse(link).scheme and not os.path.isabs(link):
full_path: str = os.path.normpath(
os.path.join(os.path.dirname(file_path), link)
)
if not full_path.endswith(".md") and not full_path.endswith(".mdx"):
full_path += ".md"
for [from_path, to_path] in move_pairs:
# In md, the link relative path starts from the directory where the document is located, not the document
relative_to_path = os.path.relpath(
to_path, os.path.dirname(file_path)
)
relative_to_path = remove_suffix(relative_to_path, ".md")
relative_to_path = remove_suffix(relative_to_path, ".mdx")
if is_same_file(full_path, from_path):
print(
f"{file_path} has a link moved by this commit: from {link} to {relative_to_path}"
)
change_detected = True
for deleted_path in deletes:
if is_same_file(full_path, deleted_path):
print(f"{file_path} has a link removed by this commit: {link}")
change_detected = True
def extract_file_changes(git_show_output: List[AnyStr]):
print(f"commit lines: {len(git_show_output)}")
content = b"".join(git_show_output).decode()
# print(content)
move_pattern = r"rename from (.+?)\nrename to (.+?)\n"
move_matches = re.findall(move_pattern, content, re.DOTALL | re.MULTILINE)
print(f"moved files: {len(move_matches)}")
delete_pattern = r"diff --git a/(\S+) b/\1\ndeleted file mode \d+\nindex .+"
delete_matches = re.findall(delete_pattern, content, re.DOTALL | re.MULTILINE)
print(f"deleted files: {len(delete_matches)}")
global move_pairs
global deletes
move_pairs = move_matches
deletes = delete_matches
def travel(root_path: str):
for root, dirs, files in os.walk(root_path):
for file in files:
if file.endswith(".md") or file.endswith(".mdx"):
md_file_path = os.path.join(root, file)
process_md_file(md_file_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Add commit id as arguments to check")
parser.add_argument("commit_id", type=str, help="id of the commit to check")
args = parser.parse_args()
# extract all move/delete files
p = subprocess.Popen(
"git show " + args.commit_id,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
extract_file_changes(p.stdout.readlines())
# check docs directories
travel("docs")
travel("i18n")
travel("versioned_docs")
if change_detected:
print("Failed!")
sys.exit(1)