From 8ff4fab8cb987daa8d710ea81ebb564aafeeec82 Mon Sep 17 00:00:00 2001 From: Yuh Shin Ong Date: Tue, 19 Mar 2024 00:34:03 -0700 Subject: [PATCH] Parser: Prefix origin callee ports with "[sink|source]:" Summary: Remove the whole "leaf" thing in `Port.from_json(...)` which used to determine when a port should be "source" or "sink". Instead, `Origin.from_json` (which is where this "leaf" thing is used) will determine what the port should be, i.e. specifically prefix it with "source:" or "sink:" which SAPP needs for traces to work, then append the actual port. Reviewed By: anwesht Differential Revision: D55001036 fbshipit-source-id: 8fd14c40a5497c9abf746061d8c1ae30a93796e7 --- sapp/pipeline/mariana_trench_parser.py | 2 +- .../pipeline/mariana_trench_parser_objects.py | 34 +++++++++---------- .../tests/test_mariana_trench_parser.py | 22 ++++++------ 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/sapp/pipeline/mariana_trench_parser.py b/sapp/pipeline/mariana_trench_parser.py index 5aea6039..fcbecdf3 100644 --- a/sapp/pipeline/mariana_trench_parser.py +++ b/sapp/pipeline/mariana_trench_parser.py @@ -486,7 +486,7 @@ def _parse_condition( for leaf_model in model.get(condition_model_key, []): caller = ConditionCall( method=caller_method, - port=mariana_trench.Port.from_json(leaf_model[port_key], leaf_kind), + port=mariana_trench.Port.from_json(leaf_model[port_key]), position=caller_position, ) for leaf_taint in leaf_model[leaf_model_key]: diff --git a/sapp/pipeline/mariana_trench_parser_objects.py b/sapp/pipeline/mariana_trench_parser_objects.py index df145c94..08dbbada 100644 --- a/sapp/pipeline/mariana_trench_parser_objects.py +++ b/sapp/pipeline/mariana_trench_parser_objects.py @@ -44,7 +44,8 @@ class Port(NamedTuple): def is_leaf(self) -> bool: return ( - self.value in ("source", "sink") + self.value.startswith("source") + or self.value.startswith("sink") or self.value.startswith("anchor:") or self.value.startswith("producer:") ) @@ -56,32 +57,26 @@ def to_crtex(port: str) -> str: return re.sub(r"argument\((-?\d+)\)", r"formal(\1)", port) @staticmethod - def from_json(port: str, leaf_kind: str) -> "Port": + def from_json(port: str) -> "Port": elements = port.split(".") if len(elements) == 0: raise sapp.ParseError(f"Invalid port: `{port}`.") elements[0] = elements[0].lower() - if elements[0] == "leaf": - elements[0] = leaf_kind - elif elements[0] == "return": + if elements[0] == "return": elements[0] = "result" elif elements[0] == "anchor": # Anchor port is of the form Anchor. # SAPP/CRTEX expects: "anchor:formal(0)" - canonical_port = Port.from_json( - ".".join(elements[1:]), "unreachable_leaf_kind_anchor" - ) + canonical_port = Port.from_json(".".join(elements[1:])) return Port(f"{elements[0]}:{Port.to_crtex(canonical_port.value)}") elif elements[0] == "producer" and len(elements) >= 3: # Producer port is of the form Producer... # SAPP/CRTEX expects: "producer::". root = elements[0] producer_id = elements[1] - canonical_port = Port.from_json( - ".".join(elements[2:]), "unreachable_leaf_kind_producer" - ) + canonical_port = Port.from_json(".".join(elements[2:])) return Port(f"{root}:{producer_id}:{Port.to_crtex(canonical_port.value)}") return Port(".".join(elements)) @@ -141,13 +136,18 @@ def from_json(leaf_json: Dict[str, Any], leaf_kind: str) -> "Origin": # The origin represents a call to a leaf/terminal trace. Its port should # indicate that, so that downstream trace reachability computation knows # when it has reached the end. See trace_graph.is_leaf_port(). Non-CRTEX - # ports should always be regardless of the JSON (e.g. method - # origins could indicate that the sink comes from "argument(1)"", but it - # needs to be "sink" in sapp). - callee_port = Port.from_json("leaf", leaf_kind) + # ports should always be [:]. if "canonical_name" in leaf_json: # All CRTEX ports are considered leaf ports. - callee_port = Port.from_json(leaf_json["port"], leaf_kind) + callee_port = Port.from_json(leaf_json["port"]) + else: + actual_callee_port = leaf_json.get("port") + if actual_callee_port is not None: + # Normalize the actual callee port as well. + actual_callee_port = Port.from_json(actual_callee_port).value + callee_port = Port.from_json(f"{leaf_kind}:{actual_callee_port}") + else: + callee_port = Port.from_json(leaf_kind) if not callee_port.is_leaf(): raise sapp.ParseError(f"Encountered non-leaf port in origin {leaf_json}") @@ -171,7 +171,7 @@ def from_json( callee = taint_json.get("resolves_to") method = Method.from_json(callee) if callee else None - port = Port.from_json(taint_json.get("port", "leaf"), leaf_kind) + port = Port.from_json(taint_json.get("port", leaf_kind)) position_json = taint_json.get("position") position = ( diff --git a/sapp/pipeline/tests/test_mariana_trench_parser.py b/sapp/pipeline/tests/test_mariana_trench_parser.py index 1728149b..e598c990 100644 --- a/sapp/pipeline/tests/test_mariana_trench_parser.py +++ b/sapp/pipeline/tests/test_mariana_trench_parser.py @@ -962,7 +962,7 @@ def testModelPostconditions(self) -> None: ], leaves=[("TestSource", 0)], caller_port="result", - callee_port="source", + callee_port="source:argument(1)", type_interval=None, features=[ ParseTraceFeature("always-via-obscure", []), @@ -1048,7 +1048,7 @@ def testModelPostconditions(self) -> None: ], leaves=[("TestSource", 1)], caller_port="result", - callee_port="source", + callee_port="source:argument(1)", type_interval=None, features=[], annotations=[], @@ -1181,7 +1181,7 @@ def testModelPostconditions(self) -> None: titos=[], leaves=[("TestSource", 0)], caller_port="result.x.y", - callee_port="source", + callee_port="source:result", type_interval=None, features=[ParseTraceFeature("via-source", [])], annotations=[], @@ -1298,7 +1298,7 @@ def testModelPostconditions(self) -> None: titos=[], leaves=[("TestSource", 0), ("TestSource2", 0)], caller_port="result", - callee_port="source", + callee_port="source:result", type_interval=None, features=[], annotations=[], @@ -1394,7 +1394,7 @@ def testModelPreconditions(self) -> None: titos=[], leaves=[("TestSink", 0)], caller_port="argument(1)", - callee_port="sink", + callee_port="sink:argument(1)", type_interval=None, features=[ParseTraceFeature("always-via-taint-in-taint-out", [])], annotations=[], @@ -1524,7 +1524,7 @@ def testModelPreconditions(self) -> None: titos=[], leaves=[("TestSink", 0)], caller_port="argument(1).x.y", - callee_port="sink", + callee_port="sink:argument(1)", type_interval=None, features=[], annotations=[], @@ -1641,7 +1641,7 @@ def testModelPreconditions(self) -> None: titos=[], leaves=[("TestSink", 0), ("TestSink2", 0)], caller_port="argument(2)", - callee_port="sink", + callee_port="sink:argument(1)", type_interval=None, features=[], annotations=[], @@ -1758,7 +1758,7 @@ def testModelParameterTypeOverrides(self) -> None: titos=[], leaves=[("TestSink", 0)], caller_port="argument(1)", - callee_port="sink", + callee_port="sink:argument(1)", type_interval=None, features=[], annotations=[], @@ -2184,7 +2184,7 @@ def testModelPropagations(self) -> None: titos=[], leaves=[("T2:LocalReturn", 0)], caller_port="argument(0)", - callee_port="sink", + callee_port="sink:argument(0)", type_interval=None, features=[], annotations=[], @@ -2416,7 +2416,7 @@ def testClassIntervals(self) -> None: titos=[], leaves=[("TestSink", 0)], caller_port="argument(1)", - callee_port="sink", + callee_port="sink:argument(1)", type_interval=ParseTypeInterval( start=1, finish=2, preserves_type_context=True ), @@ -2436,7 +2436,7 @@ def testClassIntervals(self) -> None: titos=[], leaves=[("TestSink", 0)], caller_port="argument(1)", - callee_port="sink", + callee_port="sink:argument(1)", type_interval=ParseTypeInterval( start=3, finish=4, preserves_type_context=True ),