diff --git a/sapp/pipeline/pysa_taint_parser.py b/sapp/pipeline/pysa_taint_parser.py index 53d14c6..8f3c4ad 100644 --- a/sapp/pipeline/pysa_taint_parser.py +++ b/sapp/pipeline/pysa_taint_parser.py @@ -99,6 +99,7 @@ class TraceFragmentKey(NamedTuple): port: str location: SourceLocationWithFilename type_interval: Optional[ParseTypeInterval] + features: FrozenSet[TraceFeature] trace_annotations: FrozenSet[ParseTraceAnnotation] @@ -109,7 +110,7 @@ class TraceFragment(NamedTuple): location: SourceLocationWithFilename leaves: List[LeafWithDistance] titos: List[SourceLocation] - features: List[TraceFeature] + features: FrozenSet[TraceFeature] type_interval: Optional[ParseTypeInterval] trace_annotations: FrozenSet[ParseTraceAnnotation] @@ -119,6 +120,7 @@ def key(self) -> TraceFragmentKey: port=self.port, location=self.location, type_interval=self.type_interval, + features=self.features, trace_annotations=self.trace_annotations, ) @@ -132,7 +134,7 @@ def merge(a: "TraceFragment", b: "TraceFragment") -> "TraceFragment": location=a.location, leaves=sorted(set(a.leaves) | set(b.leaves)), titos=sorted(set(a.titos) | set(b.titos)), - features=sorted(set(a.features) | set(b.features)), + features=a.features, type_interval=a.type_interval, trace_annotations=a.trace_annotations, ) @@ -258,7 +260,8 @@ def _parse_model_sources( callee_port=fragment.port, type_interval=fragment.type_interval, features=[ - feature.to_parse_feature() for feature in fragment.features + feature.to_parse_feature() + for feature in sorted(fragment.features) ], annotations=sorted(fragment.trace_annotations), ) @@ -281,7 +284,8 @@ def _parse_model_sinks( callee_port=fragment.port, type_interval=fragment.type_interval, features=[ - feature.to_parse_feature() for feature in fragment.features + feature.to_parse_feature() + for feature in sorted(fragment.features) ], annotations=sorted(fragment.trace_annotations), ) @@ -315,7 +319,7 @@ def _parse_issue(self, json: Dict[str, Any]) -> Iterable[ParseIssueTuple]: postconditions=postconditions, initial_sources=initial_sources, fix_info=None, - features=[feature.name for feature in features], + features=sorted(feature.name for feature in features), ) def _generate_issue_master_handle(self, issue: Dict[str, Any]) -> str: @@ -381,7 +385,8 @@ def _parse_issue_trace_fragments( leaves=[(leaf.kind, leaf.distance) for leaf in leaves], titos=fragment.titos, features=[ - feature.to_parse_feature() for feature in fragment.features + feature.to_parse_feature() + for feature in sorted(fragment.features) ], type_interval=fragment.type_interval, annotations=sorted(fragment.trace_annotations), @@ -420,9 +425,8 @@ def _parse_trace_fragment( self._parse_location(location) for location in trace.get("tito_positions", []) ] - local_features = self._parse_features(trace.get("local_features", [])) + shared_local_features = self._parse_features(trace.get("local_features", [])) type_interval = self._parse_type_interval(trace) - # The old syntax would store `extra_traces` here. We preserve this for backward compatibility. shared_trace_annotations = self._parse_extra_traces(trace) @@ -433,6 +437,9 @@ def _parse_trace_fragment( for flow_details in trace.get("kinds", []): kind = flow_details["kind"] distance = flow_details.get("length", 0) + local_features = shared_local_features | self._parse_features( + flow_details.get("local_features", []) + ) trace_annotations = ( self._parse_extra_traces(flow_details) | shared_trace_annotations ) @@ -456,6 +463,9 @@ def _parse_trace_fragment( for flow_details in trace.get("kinds", []): kind = flow_details["kind"] distance = flow_details.get("length", 0) + local_features = shared_local_features | self._parse_features( + flow_details.get("local_features", []) + ) trace_annotations = ( self._parse_extra_traces(flow_details) | shared_trace_annotations ) @@ -538,11 +548,11 @@ def _parse_extra_traces( return frozenset(trace_annotations) - def _parse_features(self, json: List[Dict[str, Any]]) -> List[TraceFeature]: - return [ + def _parse_features(self, json: List[Dict[str, Any]]) -> FrozenSet[TraceFeature]: + return frozenset( TraceFeature(name=feature.name) for feature in flatten_features_to_parse_trace_feature(json) - ] + ) def _parse_location_with_filename( self, json: Dict[str, Any] diff --git a/sapp/pipeline/tests/test_pysa_taint_parser.py b/sapp/pipeline/tests/test_pysa_taint_parser.py index ec2562a..7637733 100644 --- a/sapp/pipeline/tests/test_pysa_taint_parser.py +++ b/sapp/pipeline/tests/test_pysa_taint_parser.py @@ -1255,9 +1255,9 @@ def testIssueV3(self) -> None: initial_sources={("_user_controlled", "UserControlled", 0)}, final_sinks={(None, "RCE", 0)}, features=[ - "has:first-index", - "first-index:payload", "always-via:tito", + "first-index:payload", + "has:first-index", ], fix_info=None, ) @@ -2877,6 +2877,124 @@ def testSourceModelV3(self) -> None: ), ], ) + # Kind-specific breadcrumbs. + self.assertParsed( + version=3, + input=""" + { + "kind": "model", + "data": { + "callable": "foo.bar", + "sources": [ + { + "port": "result", + "taint": [ + { + "call": { + "position": { + "filename": "foo.py", + "line": 1, + "start": 2, + "end": 3 + }, + "resolves_to": [ + "foo.source" + ], + "port": "result" + }, + "tito_positions": [ + { "line": 10, "start": 11, "end": 12 }, + { "line": 13, "start": 14, "end": 15 } + ], + "local_features": [ + { "always-via": "shared-breadcrumb" }, + { "always-via": "other-shared-breadcrumb" } + ], + "kinds": [ + { + "kind": "UserControlled", + "length": 1, + "leaves": [ { "name": "_user_controlled" } ], + "features": [ { "always-via": "indirect-source" } ], + "local_features": [ { "always-via": "user-controlled-breadcrumb" } ] + }, + { + "kind": "Header", + "length": 2, + "leaves": [ { "name": "_header" } ], + "features": [ { "always-via": "indirect-source" } ], + "local_features": [ { "always-via": "header-breadcrumb" } ] + } + ], + "is_self_call": false + } + ] + } + ] + } + } + """, + expected=[ + ParseConditionTuple( + type=ParseType.POSTCONDITION, + caller="foo.bar", + callee="foo.source", + callee_location=SourceLocation( + line_no=1, + begin_column=3, + end_column=3, + ), + filename="foo.py", + titos=[ + SourceLocation(line_no=10, begin_column=12, end_column=12), + SourceLocation(line_no=13, begin_column=15, end_column=15), + ], + leaves=[("UserControlled", 1)], + caller_port="result", + callee_port="result", + type_interval=ParseTypeInterval( + start=0, + finish=sys.maxsize, + preserves_type_context=False, + ), + features=[ + ParseTraceFeature("always-via:other-shared-breadcrumb", []), + ParseTraceFeature("always-via:shared-breadcrumb", []), + ParseTraceFeature("always-via:user-controlled-breadcrumb", []), + ], + annotations=[], + ), + ParseConditionTuple( + type=ParseType.POSTCONDITION, + caller="foo.bar", + callee="foo.source", + callee_location=SourceLocation( + line_no=1, + begin_column=3, + end_column=3, + ), + filename="foo.py", + titos=[ + SourceLocation(line_no=10, begin_column=12, end_column=12), + SourceLocation(line_no=13, begin_column=15, end_column=15), + ], + leaves=[("Header", 2)], + caller_port="result", + callee_port="result", + type_interval=ParseTypeInterval( + start=0, + finish=sys.maxsize, + preserves_type_context=False, + ), + features=[ + ParseTraceFeature("always-via:header-breadcrumb", []), + ParseTraceFeature("always-via:other-shared-breadcrumb", []), + ParseTraceFeature("always-via:shared-breadcrumb", []), + ], + annotations=[], + ), + ], + ) def testSinkModelV3(self) -> None: # User-declared sink.