Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(profiling): Deobfuscate Android methods' signature #53427

Merged
merged 13 commits into from
Jul 25, 2023
Merged
94 changes: 94 additions & 0 deletions src/sentry/profiles/java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import List, Tuple

JAVA_BASE_TYPES = {
"Z": "boolean",
"B": "byte",
"C": "char",
"S": "short",
"I": "int",
"J": "long",
"F": "float",
"D": "double",
"V": "void",
}


# parse_obfuscated_signature will parse an obfuscated signatures into parameter
# and return types that can be then deobfuscated
def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]:
if signature[0] != "(":
return [], ""

signature = signature[1:]
parameter_types, return_type = signature.rsplit(")", 1)
types = []
i = 0
arrays = 0

while i < len(parameter_types):
t = parameter_types[i]

if t in JAVA_BASE_TYPES:
start_index = i - arrays
types.append(parameter_types[start_index : i + 1])
arrays = 0
elif t == "L":
start_index = i - arrays
end_index = parameter_types[i:].index(";")
types.append(parameter_types[start_index : i + end_index + 1])
arrays = 0
i += end_index
elif t == "[":
arrays += 1
else:
arrays = 0

i += 1

return types, return_type


# format_signature formats the types into a human-readable signature
def format_signature(parameter_java_types: List[str], return_java_type: str) -> str:
signature = f"({', '.join(parameter_java_types)})"
if return_java_type and return_java_type != "void":
signature += f": {return_java_type}"
return signature


def byte_code_type_to_java_type(mapper, byte_code_type: str) -> str:
if not byte_code_type:
return ""

token = byte_code_type[0]
if token in JAVA_BASE_TYPES:
return JAVA_BASE_TYPES[token]
elif token == "L":
obfuscated = byte_code_type[1 : len(byte_code_type) - 1].replace("/", ".")
phacops marked this conversation as resolved.
Show resolved Hide resolved
mapped = mapper.remap_class(obfuscated)
if mapped:
return mapped
return obfuscated
elif token == "[":
return f"{byte_code_type_to_java_type(mapper, byte_code_type[1:])}[]"
else:
return byte_code_type


# map_obfucated_signature will parse then deobfuscated a signature and
# format it appropriately
def deobfuscate_signature(mapper, signature: str) -> str:
if not signature:
return ""

parameter_types, return_type = parse_obfuscated_signature(signature)
if not (parameter_types or return_type):
return ""
Zylphrex marked this conversation as resolved.
Show resolved Hide resolved

parameter_java_types = []
for parameter_type in parameter_types:
new_class = byte_code_type_to_java_type(mapper, parameter_type)
parameter_java_types.append(new_class)

return_java_type = byte_code_type_to_java_type(mapper, return_type)
return format_signature(parameter_java_types, return_java_type)
45 changes: 29 additions & 16 deletions src/sentry/profiles/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sentry.lang.native.symbolicator import RetrySymbolication, Symbolicator, SymbolicatorTaskKind
from sentry.models import EventError, Organization, Project, ProjectDebugFile
from sentry.profiles.device import classify_device
from sentry.profiles.java import deobfuscate_signature
from sentry.profiles.utils import get_from_profiling_service
from sentry.signals import first_profile_received
from sentry.tasks.base import instrumented_task
Expand Down Expand Up @@ -618,35 +619,47 @@ def _deobfuscate(profile: Profile, project: Project) -> None:

with sentry_sdk.start_span(op="proguard.remap"):
for method in profile["profile"]["methods"]:
method.setdefault("data", {})

mapped = mapper.remap_frame(
method["class_name"], method["name"], method["source_line"] or 0
)
method.setdefault("data", {})
if len(mapped) == 1:
new_frame = mapped[0]
method.update(
{
"class_name": new_frame.class_name,
"name": new_frame.method,
"source_file": new_frame.file,
"source_line": new_frame.line,
}
)
method["data"]["deobfuscation_status"] = "deobfuscated"
elif len(mapped) > 1:

if "signature" in method and method["signature"]:
method["signature"] = deobfuscate_signature(mapper, method["signature"])

if len(mapped) >= 1:
new_frame = mapped[-1]
new_frame_attributes = {
"class_name": new_frame.class_name,
"name": new_frame.method,
"data": {"deobfuscation_status": "deobfuscated"},
}

if new_frame.file:
new_frame_attributes["source_file"] = new_frame.file

if new_frame.line:
new_frame_attributes["source_line"] = new_frame.line

method.update(new_frame_attributes)
phacops marked this conversation as resolved.
Show resolved Hide resolved

bottom_class = mapped[-1].class_name
method["inline_frames"] = [
{
"class_name": new_frame.class_name,
"data": {"deobfuscation_status": "deobfuscated"},
"name": new_frame.method,
"source_file": method["source_file"]
if bottom_class == new_frame.class_name
else None,
else "",
"source_line": new_frame.line,
"data": {"deobfuscation_status": "deobfuscated"},
}
for new_frame in mapped
for new_frame in reversed(mapped)
phacops marked this conversation as resolved.
Show resolved Hide resolved
]

if len(method["inline_frames"]) > 0:
method["inline_frames"][0]["signature"] = method.get("signature", "")
phacops marked this conversation as resolved.
Show resolved Hide resolved
else:
mapped_class = mapper.remap_class(method["class_name"])
if mapped_class:
Expand Down
50 changes: 50 additions & 0 deletions tests/sentry/profiles/test_java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from tempfile import mkstemp

import pytest
from symbolic.proguard import ProguardMapper

from sentry.profiles.java import deobfuscate_signature

PROGUARD_SOURCE = b"""\
# compiler: R8
# compiler_version: 2.0.74
# min_api: 16
# pg_map_id: 5b46fdc
# common_typos_disable
# {"id":"com.android.tools.r8.mapping","version":"1.0"}
org.slf4j.helpers.Util$ClassContextSecurityManager -> org.a.b.g$a:
65:65:void <init>() -> <init>
67:67:java.lang.Class[] getClassContext() -> a
69:69:java.lang.Class[] getExtraClassContext() -> a
65:65:void <init>(org.slf4j.helpers.Util$1) -> <init>
"""


@pytest.fixture
def mapper():
_, mapping_file_path = mkstemp()
with open(mapping_file_path, "wb") as f:
f.write(PROGUARD_SOURCE)
mapper = ProguardMapper.open(mapping_file_path)
assert mapper.has_line_info
return mapper


@pytest.mark.parametrize(
["obfuscated", "expected"],
[
# invalid signatures
("", ""),
("()", ""),
# valid signatures
("()V", "()"),
("([I)V", "(int[])"),
("(III)V", "(int, int, int)"),
("([Ljava/lang/String;)V", "(java.lang.String[])"),
("([[J)V", "(long[][])"),
("(I)I", "(int): int"),
("([B)V", "(byte[])"),
],
)
def test_deobfuscate_signature(mapper, obfuscated, expected):
assert deobfuscate_signature(mapper, obfuscated) == expected
25 changes: 16 additions & 9 deletions tests/sentry/profiles/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,16 +127,18 @@ def test_basic_deobfuscation(self):
"profile": {
"methods": [
{
"name": "a",
"abs_path": None,
"class_name": "org.a.b.g$a",
"name": "a",
"signature": "()V",
"source_file": None,
"source_line": 67,
},
{
"name": "a",
"abs_path": None,
"class_name": "org.a.b.g$a",
"name": "a",
"signature": "()V",
"source_file": None,
"source_line": 69,
},
Expand Down Expand Up @@ -178,16 +180,18 @@ def test_inline_deobfuscation(self):
"profile": {
"methods": [
{
"name": "onClick",
"abs_path": None,
"class_name": "e.a.c.a",
"name": "onClick",
"signature": "()V",
"source_file": None,
"source_line": 2,
},
{
"name": "t",
"abs_path": None,
"class_name": "io.sentry.sample.MainActivity",
"name": "t",
"signature": "()V",
"source_file": "MainActivity.java",
"source_line": 1,
},
Expand All @@ -200,21 +204,24 @@ def test_inline_deobfuscation(self):
_deobfuscate(profile, project)
frames = profile["profile"]["methods"]

assert sum(len(f.get("inline_frames", [{}])) for f in frames) == 4
assert sum(len(f.get("inline_frames", [])) for f in frames) == 3

assert frames[0]["name"] == "onClick"
assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4"

assert frames[1]["inline_frames"][0]["name"] == "onClickHandler"
assert frames[1]["inline_frames"][0]["source_line"] == 40
assert frames[1]["inline_frames"][0]["source_file"] == "MainActivity.java"
assert frames[1]["inline_frames"][0]["class_name"] == "io.sentry.sample.MainActivity"
assert frames[1]["inline_frames"][0]["name"] == "bar"
assert frames[1]["inline_frames"][0]["source_line"] == 54
assert frames[1]["inline_frames"][0]["signature"] == "()"

assert frames[1]["inline_frames"][1]["name"] == "foo"
assert frames[1]["inline_frames"][1]["source_line"] == 44
assert frames[1]["inline_frames"][2]["name"] == "onClickHandler"
assert frames[1]["inline_frames"][2]["source_line"] == 40

assert frames[1]["inline_frames"][2]["source_file"] == "MainActivity.java"
assert frames[1]["inline_frames"][2]["class_name"] == "io.sentry.sample.MainActivity"
assert frames[1]["inline_frames"][2]["name"] == "bar"
assert frames[1]["inline_frames"][2]["source_line"] == 54

def test_error_on_resolving(self):
out = BytesIO()
Expand Down
Loading