-
Notifications
You must be signed in to change notification settings - Fork 18
/
trivial-validate.py
executable file
·161 lines (141 loc) · 5.15 KB
/
trivial-validate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python
import sys, re, os
try:
from lxml import etree
except ImportError:
sys.stderr.write("** Could not import lxml! Rule validation SKIPPED.\n")
sys.stderr.write("** Caution: A resulting build MAY CONTAIN INVALID RULES.\n")
sys.stderr.write("** Please install libxml2 and lxml to permit validation!\n")
sys.exit(0)
if len(sys.argv) > 1:
try:
os.chdir(sys.argv[1])
except:
sys.stderr.write("could not chdir to %s\n" % sys.argv[1])
sys.stderr.write("usage: %s directoryname\n" % sys.argv[0])
sys.exit(2)
def test_not_anchored(tree):
# Rules not anchored to the beginning of a line.
for f in tree.xpath("/ruleset/rule/@from"):
if not f or f[0] != "^":
return False
return True
def test_bad_regexp(tree):
# Rules with invalid regular expressions.
for f in tree.xpath("/ruleset/rule/@from") + \
tree.xpath("/ruleset/exclusion/@pattern") + \
tree.xpath("/ruleset/securecookie/@host"):
try:
re.compile(f)
except:
return False
return True
def test_unescaped_dots(tree):
# Rules containing unescaped dots outside of brackets and before slash.
# Note: this is meant to require example\.com instead of example.com,
# but it also forbids things like .* which usually ought to be replaced
# with something like ([^/:@\.]+)
for f in tree.xpath("/ruleset/rule/@from"):
escaped = False
bracketed = False
s = re.sub("^\^https?://", "", f)
for c in s:
if c == "\\":
escaped = not escaped
elif not escaped and c == "[":
bracketed = True
elif not escaped and c == "]":
bracketed = False
elif not escaped and not bracketed and c == ".":
return False
elif not bracketed and c == "/":
break
else:
escaped = False
return True
def test_space_in_to(tree):
# Rules where the to pattern contains a space.
for t in tree.xpath("/ruleset/rule/@to"):
if ' ' in t:
return False
return True
def test_unencrypted_to(tree):
# Rules that redirect to something other than https.
# This used to test for http: but testing for lack of https: will
# catch more kinds of mistakes.
for t in tree.xpath("/ruleset/rule/@to"):
if t[:6] != "https:":
return False
return True
def test_backslash_in_to(tree):
# Rules containing backslashes in to pattern.
for t in tree.xpath("/ruleset/rule/@to"):
if '\\' in t:
return False
return True
def test_no_trailing_slash(tree):
# Rules not containing trailing slash in from or to pattern.
for r in tree.xpath("/ruleset/rule"):
f, t = r.get("from"), r.get("to")
if not re.search("//.*/", f):
return False
if not re.search("//.*/", t):
return False
return True
def test_lacks_target_host(tree):
# Rules that lack at least one target host (target tag with host attr).
return not not tree.xpath("/ruleset/target/@host")
def test_bad_target_host(tree):
# Rules where a target host contains multiple wildcards or a slash.
for target in tree.xpath("/ruleset/target/@host"):
if "/" in target:
return False
if target.count("*") > 1:
return False
return True
def test_duplicated_target_host(tree):
# Rules where a single target host appears more than once.
targets = tree.xpath("/ruleset/target/@host")
return len(set(targets)) == len(targets)
printable_characters = set(map(chr, xrange(32, 127)))
def test_non_ascii(tree):
# Rules containing non-printable characters.
for t in tree.xpath("/ruleset/rule/@to"):
for c in t:
if c not in printable_characters:
return False
return True
tests = [test_not_anchored, test_bad_regexp, test_unescaped_dots,
test_space_in_to, test_unencrypted_to, test_backslash_in_to,
test_no_trailing_slash, test_lacks_target_host, test_bad_target_host,
test_duplicated_target_host, test_non_ascii]
failure = 0
seen_file = False
all_targets = set()
all_names = set()
for fi in os.listdir("."):
if fi[-4:] != ".xml": continue
try:
tree = etree.parse(fi)
seen_file = True
except Exception, oops:
failure = 1
sys.stdout.write("%s failed XML validity: %s\n" % (fi, oops))
ruleset_name = tree.xpath("/ruleset/@name")[0]
if ruleset_name in all_names:
failure = 1
sys.stdout.write("failure: duplicate ruleset name %s\n" % ruleset_name)
all_names.add(ruleset_name)
for test in tests:
if not test(tree):
failure = 1
sys.stdout.write("failure: %s failed test %s\n" % (fi, test))
for target in tree.xpath("/ruleset/target/@host"):
if target in all_targets:
sys.stdout.write("warning: duplicate target: %s\n" % target)
all_targets.add(target)
if not seen_file:
sys.stdout.write("There were no valid XML files in the current or ")
sys.stdout.write("specified directory.\n")
failure = 3
sys.exit(failure)