-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
executable file
·279 lines (246 loc) · 9.83 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#!/usr/bin/env python3
import argparse
import json
import pathlib
import sys
import pagegraph.commands
import pagegraph.serialize
from pagegraph import VERSION
def scripts_cmd(args):
return pagegraph.commands.scripts(args.input, args.frame, args.id,
args.source, args.omit_executors,
args.debug)
# def effects_cmd(args):
# return pagegraph.commands.effects(args.input, args.id, args.loose,
# args.debug)
def element_query_cmd(args):
return pagegraph.commands.element_query(args.input, args.id, args.depth,
args.debug)
def html_query_cmd(args):
return pagegraph.commands.html_query_cmd(args.input, args.frame,
args.at_serialization,
args.body_content, args.debug)
def js_calls_cmd(args):
return pagegraph.commands.js_calls(args.input, args.frame, args.cross,
args.method, args.id, args.debug)
def request_cmd(args):
return pagegraph.commands.requests(args.input, args.frame, args.debug)
def subframes_cmd(args):
return pagegraph.commands.subframes(args.input, args.local, args.debug)
def unknown_query_cmd(args):
return pagegraph.commands.unknown(args.input)
def validate_cmd(args):
return pagegraph.commands.validate(args.input)
PARSER = argparse.ArgumentParser(
prog="PageGraph Query",
description="Extracts information about a Web page's execution from "
" a PageGraph recordings.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
PARSER.add_argument(
"--version",
action="version",
version=f"%(prog)s {VERSION}")
PARSER.add_argument("--debug", action="store_true", default=False)
SUBPARSERS = PARSER.add_subparsers(required=True)
SUBFRAMES_PARSER = SUBPARSERS.add_parser(
"subframes",
help="Print information about subframes created and loaded by page.")
SUBFRAMES_PARSER.add_argument(
"input",
type=pathlib.Path,
help="Path to PageGraph recording.")
SUBFRAMES_PARSER.add_argument(
"-l", "--local",
action="store_true",
help="Only print information about about frames that are local to"
" the top level frame at serialization time.")
SUBFRAMES_PARSER.set_defaults(func=subframes_cmd)
VALIDATE_PARSER = SUBPARSERS.add_parser(
"validate",
help="Just runs all validation and structure checks against a graph.")
VALIDATE_PARSER.add_argument(
"input",
type=pathlib.Path,
help="Path to PageGraph recording.")
VALIDATE_PARSER.set_defaults(func=validate_cmd)
REQUEST_PARSER = SUBPARSERS.add_parser(
"requests",
help="Print information about requests made during page execution.")
REQUEST_PARSER.add_argument(
"input",
type=pathlib.Path,
help="Path to PageGraph recording.")
REQUEST_PARSER.add_argument(
"-f", "--frame",
default=None,
help="Only print information about requests made in a specific frame "
"(as described by PageGraph node ids, in the format 'n##').")
REQUEST_PARSER.set_defaults(func=request_cmd)
SCRIPTS_PARSER = SUBPARSERS.add_parser(
"scripts",
help="Print information about JS units executed during page execution.")
SCRIPTS_PARSER.add_argument(
"input",
type=pathlib.Path,
help="Path to PageGraph recording.")
SCRIPTS_PARSER.add_argument(
"-i", "--id",
default=None,
help="If provided, only print information about JS units with the given "
"ID (as described by PageGraph node ids, in the format 'n##').")
SCRIPTS_PARSER.add_argument(
"-s", "--source",
default=False,
action="store_true",
help="If included, also include script source in each report.")
SCRIPTS_PARSER.add_argument(
"-f", "--frame",
default=None,
help="Only include JS code units executed in a particular frame "
"context (as described by PageGraph node ids, in the format 'n##'). "
"Note that this filters on the calling frame context, not the "
"receiving frame context, which will differ in some cases, such as "
"same-origin cross-frame calls.")
SCRIPTS_PARSER.add_argument(
"-o", "--omit-executors",
default=False,
action="store_true",
help="If included, do not append information about why or how each script "
"was executed.")
SCRIPTS_PARSER.set_defaults(func=scripts_cmd)
JS_CALLS_PARSER = SUBPARSERS.add_parser(
"js-calls",
help="Print information about JS calls made during page execution.")
JS_CALLS_PARSER.add_argument(
"input",
type=pathlib.Path,
help="Path to PageGraph recording.")
JS_CALLS_PARSER.add_argument(
"-f", "--frame",
default=None,
help="Only include JS calls made by code running in this frame's context "
"(as described by PageGraph node ids, in the format 'n##'). "
"Note that this filters on the calling frame context, not the "
"receiving frame context, which will differ in some cases, such as "
"same-origin cross-frame calls.")
JS_CALLS_PARSER.add_argument(
"-c", "--cross",
default=False,
action="store_true",
help="Only include JS calls where the calling frame context and the "
"receiving frame context differ.")
JS_CALLS_PARSER.add_argument(
"-m", "--method",
default=None,
help="Only include JS calls where the function or method being called "
"includes this value as a substring.")
JS_CALLS_PARSER.add_argument(
"-i", "--id",
default=None,
help="If provided, only print information about JS calls made by the "
"Script node with the given ID "
"(as described by PageGraph node ids, in the format 'n##').")
JS_CALLS_PARSER.set_defaults(func=js_calls_cmd)
ELEMENT_QUERY_PARSER = SUBPARSERS.add_parser(
"elm",
help="Print information about a node or edge in the graph.")
ELEMENT_QUERY_PARSER.add_argument(
"input",
type=pathlib.Path,
help="Path to PageGraph recording.")
ELEMENT_QUERY_PARSER.add_argument(
"id",
help="The id of the node to print information about "
"(as described by PageGraph node ids, in the format 'n##')")
ELEMENT_QUERY_PARSER.add_argument(
"-d", "--depth",
default=0,
type=int,
help="Depth of the recursion to summarize in the graph. Defaults to 0 "
"(only print detailed information about target element).")
ELEMENT_QUERY_PARSER.set_defaults(func=element_query_cmd)
HTML_QUERY_PARSER = SUBPARSERS.add_parser(
"html",
help="Print information about the HTML elements in a document.")
HTML_QUERY_PARSER.add_argument(
"input",
type=pathlib.Path,
help="Path to PageGraph recording.")
HTML_QUERY_PARSER.add_argument(
"-f", "--frame",
default=None,
help="Only include HTML elements that were inserted into the document in "
"a given frame (as described by PageGraph node ids, in the format "
"'n##').")
HTML_QUERY_PARSER.add_argument(
"-s", "--at-serialization",
default=False,
action="store_true",
help="If passed, only include HTML elements that were presented in the "
"document when the document was serialized (i.e., they weren't "
"inserted and then later deleted.).")
HTML_QUERY_PARSER.add_argument(
"-b", "--body-content",
default=False,
action="store_true",
help="Only return elements that appear in the body of the document, "
"meaning elements that are a child of the <body> element.")
HTML_QUERY_PARSER.set_defaults(func=html_query_cmd)
UNKNOWN_QUERY_PARSER = SUBPARSERS.add_parser(
"unknown",
help="Print information about any events that occurred where we "
"could not attribute the script event to a running script. (note "
"this is different from the 'validate' command, which only checks "
"if the structure of the graph is as expected).")
UNKNOWN_QUERY_PARSER.add_argument(
"input",
type=pathlib.Path,
help="Path to PageGraph recording.")
UNKNOWN_QUERY_PARSER.set_defaults(func=unknown_query_cmd)
# EFFECTS_QUERY_PARSER = SUBPARSERS.add_parser(
# "effects",
# help="Print information about the effects the given element had on "
# "the page. By default only includes requests.")
# EFFECTS_QUERY_PARSER.add_argument(
# "input",
# type=pathlib.Path,
# help="Path to PageGraph recording.")
# EFFECTS_QUERY_PARSER.add_argument(
# "id",
# help="Id of a frame, script, request, or parser node "
# "(as described by PageGraph node ids, in the format 'n##').")
# EFFECTS_QUERY_PARSER.add_argument(
# "-l", "--loose",
# default=False,
# action="store_true",
# help="By default, the 'effects' query includes any action or element "
# "where the target node was the primary cause of the action (i.e., "
# "actions where the target node was the most immediate cause). "
# "Passing this flag loosens that, and includes any action or element "
# "that this node was involved with at all.")
# EFFECTS_QUERY_PARSER.add_argument(
# "--include-js-builtin-calls",
# default=False,
# action="store_true",
# help="Include calls to JS builtins that occurred because of the target "
# "node.")
# EFFECTS_QUERY_PARSER.add_argument(
# "--include-web-api-calls",
# default=False,
# action="store_true",
# help="Include calls to instrumented Web APIs that occurred because of "
# "the target node.")
# EFFECTS_QUERY_PARSER.add_argument(
# "--exclude-requests",
# default=False,
# action="store_true",
# help="Do not include requests that occurred because of the target node.")
# EFFECTS_QUERY_PARSER.set_defaults(func=effects_cmd)
try:
ARGS = PARSER.parse_args()
RESULT = ARGS.func(ARGS)
REPORT = pagegraph.serialize.to_jsonable(RESULT)
print(json.dumps(REPORT))
except ValueError as e:
print(f"Invalid argument: {e}", file=sys.stderr)
sys.exit(1)