Skip to content

Commit

Permalink
Parse kind-specific local breadcrumbs
Browse files Browse the repository at this point in the history
Summary: Same as for D58582725, but for kind-specific local breadcrumbs

Reviewed By: tianhan0

Differential Revision: D58586552

fbshipit-source-id: d1a2d7554aebe2d1d46f732b16a4e33f61ce36eb
  • Loading branch information
arthaud authored and facebook-github-bot committed Jun 17, 2024
1 parent 1a5ca67 commit f60b49e
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 13 deletions.
32 changes: 21 additions & 11 deletions sapp/pipeline/pysa_taint_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class TraceFragmentKey(NamedTuple):
port: str
location: SourceLocationWithFilename
type_interval: Optional[ParseTypeInterval]
features: FrozenSet[TraceFeature]
trace_annotations: FrozenSet[ParseTraceAnnotation]


Expand All @@ -109,7 +110,7 @@ class TraceFragment(NamedTuple):
location: SourceLocationWithFilename
leaves: List[LeafWithDistance]
titos: List[SourceLocation]
features: List[TraceFeature]
features: FrozenSet[TraceFeature]
type_interval: Optional[ParseTypeInterval]
trace_annotations: FrozenSet[ParseTraceAnnotation]

Expand All @@ -119,6 +120,7 @@ def key(self) -> TraceFragmentKey:
port=self.port,
location=self.location,
type_interval=self.type_interval,
features=self.features,
trace_annotations=self.trace_annotations,
)

Expand All @@ -132,7 +134,7 @@ def merge(a: "TraceFragment", b: "TraceFragment") -> "TraceFragment":
location=a.location,
leaves=sorted(set(a.leaves) | set(b.leaves)),
titos=sorted(set(a.titos) | set(b.titos)),
features=sorted(set(a.features) | set(b.features)),
features=a.features,
type_interval=a.type_interval,
trace_annotations=a.trace_annotations,
)
Expand Down Expand Up @@ -258,7 +260,8 @@ def _parse_model_sources(
callee_port=fragment.port,
type_interval=fragment.type_interval,
features=[
feature.to_parse_feature() for feature in fragment.features
feature.to_parse_feature()
for feature in sorted(fragment.features)
],
annotations=sorted(fragment.trace_annotations),
)
Expand All @@ -281,7 +284,8 @@ def _parse_model_sinks(
callee_port=fragment.port,
type_interval=fragment.type_interval,
features=[
feature.to_parse_feature() for feature in fragment.features
feature.to_parse_feature()
for feature in sorted(fragment.features)
],
annotations=sorted(fragment.trace_annotations),
)
Expand Down Expand Up @@ -315,7 +319,7 @@ def _parse_issue(self, json: Dict[str, Any]) -> Iterable[ParseIssueTuple]:
postconditions=postconditions,
initial_sources=initial_sources,
fix_info=None,
features=[feature.name for feature in features],
features=sorted(feature.name for feature in features),
)

def _generate_issue_master_handle(self, issue: Dict[str, Any]) -> str:
Expand Down Expand Up @@ -381,7 +385,8 @@ def _parse_issue_trace_fragments(
leaves=[(leaf.kind, leaf.distance) for leaf in leaves],
titos=fragment.titos,
features=[
feature.to_parse_feature() for feature in fragment.features
feature.to_parse_feature()
for feature in sorted(fragment.features)
],
type_interval=fragment.type_interval,
annotations=sorted(fragment.trace_annotations),
Expand Down Expand Up @@ -420,9 +425,8 @@ def _parse_trace_fragment(
self._parse_location(location)
for location in trace.get("tito_positions", [])
]
local_features = self._parse_features(trace.get("local_features", []))
shared_local_features = self._parse_features(trace.get("local_features", []))
type_interval = self._parse_type_interval(trace)

# The old syntax would store `extra_traces` here. We preserve this for backward compatibility.
shared_trace_annotations = self._parse_extra_traces(trace)

Expand All @@ -433,6 +437,9 @@ def _parse_trace_fragment(
for flow_details in trace.get("kinds", []):
kind = flow_details["kind"]
distance = flow_details.get("length", 0)
local_features = shared_local_features | self._parse_features(
flow_details.get("local_features", [])
)
trace_annotations = (
self._parse_extra_traces(flow_details) | shared_trace_annotations
)
Expand All @@ -456,6 +463,9 @@ def _parse_trace_fragment(
for flow_details in trace.get("kinds", []):
kind = flow_details["kind"]
distance = flow_details.get("length", 0)
local_features = shared_local_features | self._parse_features(
flow_details.get("local_features", [])
)
trace_annotations = (
self._parse_extra_traces(flow_details) | shared_trace_annotations
)
Expand Down Expand Up @@ -538,11 +548,11 @@ def _parse_extra_traces(

return frozenset(trace_annotations)

def _parse_features(self, json: List[Dict[str, Any]]) -> List[TraceFeature]:
return [
def _parse_features(self, json: List[Dict[str, Any]]) -> FrozenSet[TraceFeature]:
return frozenset(
TraceFeature(name=feature.name)
for feature in flatten_features_to_parse_trace_feature(json)
]
)

def _parse_location_with_filename(
self, json: Dict[str, Any]
Expand Down
122 changes: 120 additions & 2 deletions sapp/pipeline/tests/test_pysa_taint_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,9 +1255,9 @@ def testIssueV3(self) -> None:
initial_sources={("_user_controlled", "UserControlled", 0)},
final_sinks={(None, "RCE", 0)},
features=[
"has:first-index",
"first-index:payload",
"always-via:tito",
"first-index:payload",
"has:first-index",
],
fix_info=None,
)
Expand Down Expand Up @@ -2877,6 +2877,124 @@ def testSourceModelV3(self) -> None:
),
],
)
# Kind-specific breadcrumbs.
self.assertParsed(
version=3,
input="""
{
"kind": "model",
"data": {
"callable": "foo.bar",
"sources": [
{
"port": "result",
"taint": [
{
"call": {
"position": {
"filename": "foo.py",
"line": 1,
"start": 2,
"end": 3
},
"resolves_to": [
"foo.source"
],
"port": "result"
},
"tito_positions": [
{ "line": 10, "start": 11, "end": 12 },
{ "line": 13, "start": 14, "end": 15 }
],
"local_features": [
{ "always-via": "shared-breadcrumb" },
{ "always-via": "other-shared-breadcrumb" }
],
"kinds": [
{
"kind": "UserControlled",
"length": 1,
"leaves": [ { "name": "_user_controlled" } ],
"features": [ { "always-via": "indirect-source" } ],
"local_features": [ { "always-via": "user-controlled-breadcrumb" } ]
},
{
"kind": "Header",
"length": 2,
"leaves": [ { "name": "_header" } ],
"features": [ { "always-via": "indirect-source" } ],
"local_features": [ { "always-via": "header-breadcrumb" } ]
}
],
"is_self_call": false
}
]
}
]
}
}
""",
expected=[
ParseConditionTuple(
type=ParseType.POSTCONDITION,
caller="foo.bar",
callee="foo.source",
callee_location=SourceLocation(
line_no=1,
begin_column=3,
end_column=3,
),
filename="foo.py",
titos=[
SourceLocation(line_no=10, begin_column=12, end_column=12),
SourceLocation(line_no=13, begin_column=15, end_column=15),
],
leaves=[("UserControlled", 1)],
caller_port="result",
callee_port="result",
type_interval=ParseTypeInterval(
start=0,
finish=sys.maxsize,
preserves_type_context=False,
),
features=[
ParseTraceFeature("always-via:other-shared-breadcrumb", []),
ParseTraceFeature("always-via:shared-breadcrumb", []),
ParseTraceFeature("always-via:user-controlled-breadcrumb", []),
],
annotations=[],
),
ParseConditionTuple(
type=ParseType.POSTCONDITION,
caller="foo.bar",
callee="foo.source",
callee_location=SourceLocation(
line_no=1,
begin_column=3,
end_column=3,
),
filename="foo.py",
titos=[
SourceLocation(line_no=10, begin_column=12, end_column=12),
SourceLocation(line_no=13, begin_column=15, end_column=15),
],
leaves=[("Header", 2)],
caller_port="result",
callee_port="result",
type_interval=ParseTypeInterval(
start=0,
finish=sys.maxsize,
preserves_type_context=False,
),
features=[
ParseTraceFeature("always-via:header-breadcrumb", []),
ParseTraceFeature("always-via:other-shared-breadcrumb", []),
ParseTraceFeature("always-via:shared-breadcrumb", []),
],
annotations=[],
),
],
)

def testSinkModelV3(self) -> None:
# User-declared sink.
Expand Down

0 comments on commit f60b49e

Please sign in to comment.