diff --git a/src/sentry/profiles/java.py b/src/sentry/profiles/java.py new file mode 100644 index 0000000000000..c9fa782a3ea5b --- /dev/null +++ b/src/sentry/profiles/java.py @@ -0,0 +1,97 @@ +from typing import List, Tuple + +JAVA_BASE_TYPES = { + "Z": "boolean", + "B": "byte", + "C": "char", + "S": "short", + "I": "int", + "J": "long", + "F": "float", + "D": "double", + "V": "void", +} + + +# parse_obfuscated_signature will parse an obfuscated signatures into parameter +# and return types that can be then deobfuscated +def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]: + if signature[0] != "(": + return [], "" + + signature = signature[1:] + parameter_types, return_type = signature.rsplit(")", 1) + types = [] + i = 0 + arrays = 0 + + while i < len(parameter_types): + t = parameter_types[i] + + if t in JAVA_BASE_TYPES: + start_index = i - arrays + types.append(parameter_types[start_index : i + 1]) + arrays = 0 + elif t == "L": + start_index = i - arrays + end_index = parameter_types[i:].index(";") + types.append(parameter_types[start_index : i + end_index + 1]) + arrays = 0 + i += end_index + elif t == "[": + arrays += 1 + else: + arrays = 0 + + i += 1 + + return types, return_type + + +# format_signature formats the types into a human-readable signature +def format_signature(parameter_java_types: List[str], return_java_type: str) -> str: + signature = f"({', '.join(parameter_java_types)})" + if return_java_type and return_java_type != "void": + signature += f": {return_java_type}" + return signature + + +def byte_code_type_to_java_type(mapper, byte_code_type: str) -> str: + if not byte_code_type: + return "" + + token = byte_code_type[0] + if token in JAVA_BASE_TYPES: + return JAVA_BASE_TYPES[token] + elif token == "L": + # invalid signature + if byte_code_type[-1] != ";": + return byte_code_type + obfuscated = byte_code_type[1:-1].replace("/", ".") + mapped = mapper.remap_class(obfuscated) + if mapped: + return mapped + return obfuscated + elif token == "[": + return f"{byte_code_type_to_java_type(mapper, byte_code_type[1:])}[]" + else: + return byte_code_type + + +# map_obfucated_signature will parse then deobfuscated a signature and +# format it appropriately +def deobfuscate_signature(mapper, signature: str) -> str: + if not signature: + return "" + + parameter_types, return_type = parse_obfuscated_signature(signature) + if not (parameter_types or return_type): + return "" + + parameter_java_types = [] + for parameter_type in parameter_types: + new_class = byte_code_type_to_java_type(mapper, parameter_type) + parameter_java_types.append(new_class) + + return_java_type = byte_code_type_to_java_type(mapper, return_type) + return format_signature(parameter_java_types, return_java_type) diff --git a/src/sentry/profiles/task.py b/src/sentry/profiles/task.py index 9de1fa3a315e3..3ce5385e5f124 100644 --- a/src/sentry/profiles/task.py +++ b/src/sentry/profiles/task.py @@ -18,6 +18,7 @@ from sentry.lang.native.symbolicator import RetrySymbolication, Symbolicator, SymbolicatorTaskKind from sentry.models import EventError, Organization, Project, ProjectDebugFile from sentry.profiles.device import classify_device +from sentry.profiles.java import deobfuscate_signature from sentry.profiles.utils import get_from_profiling_service from sentry.signals import first_profile_received from sentry.tasks.base import instrumented_task @@ -618,35 +619,52 @@ def _deobfuscate(profile: Profile, project: Project) -> None: with sentry_sdk.start_span(op="proguard.remap"): for method in profile["profile"]["methods"]: + method.setdefault("data", {}) + mapped = mapper.remap_frame( method["class_name"], method["name"], method["source_line"] or 0 ) - method.setdefault("data", {}) - if len(mapped) == 1: - new_frame = mapped[0] - method.update( - { - "class_name": new_frame.class_name, - "name": new_frame.method, - "source_file": new_frame.file, - "source_line": new_frame.line, - } - ) - method["data"]["deobfuscation_status"] = "deobfuscated" - elif len(mapped) > 1: + + if "signature" in method and method["signature"]: + method["signature"] = deobfuscate_signature(mapper, method["signature"]) + + if len(mapped) >= 1: + new_frame = mapped[-1] + method["class_name"] = new_frame.class_name + method["name"] = new_frame.method + method["data"] = { + "deobfuscation_status": "deobfuscated" + if method.get("signature", None) + else "partial" + } + + if new_frame.file: + method["source_file"] = new_frame.file + + if new_frame.line: + method["source_line"] = new_frame.line + bottom_class = mapped[-1].class_name method["inline_frames"] = [ { "class_name": new_frame.class_name, + "data": {"deobfuscation_status": "deobfuscated"}, "name": new_frame.method, "source_file": method["source_file"] if bottom_class == new_frame.class_name - else None, + else "", "source_line": new_frame.line, - "data": {"deobfuscation_status": "deobfuscated"}, } - for new_frame in mapped + for new_frame in reversed(mapped) ] + + # vroom will only take into account frames in this list + # if it exists. since symbolic does not return a signature for + # the frame we deobfuscated, we update it to set + # the deobfuscated signature. + if len(method["inline_frames"]) > 0: + method["inline_frames"][0]["data"] = method["data"] + method["inline_frames"][0]["signature"] = method.get("signature", "") else: mapped_class = mapper.remap_class(method["class_name"]) if mapped_class: diff --git a/tests/sentry/profiles/test_java.py b/tests/sentry/profiles/test_java.py new file mode 100644 index 0000000000000..c61a032faf5ed --- /dev/null +++ b/tests/sentry/profiles/test_java.py @@ -0,0 +1,50 @@ +from tempfile import mkstemp + +import pytest +from symbolic.proguard import ProguardMapper + +from sentry.profiles.java import deobfuscate_signature + +PROGUARD_SOURCE = b"""\ +# compiler: R8 +# compiler_version: 2.0.74 +# min_api: 16 +# pg_map_id: 5b46fdc +# common_typos_disable +# {"id":"com.android.tools.r8.mapping","version":"1.0"} +org.slf4j.helpers.Util$ClassContextSecurityManager -> org.a.b.g$a: + 65:65:void () -> + 67:67:java.lang.Class[] getClassContext() -> a + 69:69:java.lang.Class[] getExtraClassContext() -> a + 65:65:void (org.slf4j.helpers.Util$1) -> +""" + + +@pytest.fixture +def mapper(): + _, mapping_file_path = mkstemp() + with open(mapping_file_path, "wb") as f: + f.write(PROGUARD_SOURCE) + mapper = ProguardMapper.open(mapping_file_path) + assert mapper.has_line_info + return mapper + + +@pytest.mark.parametrize( + ["obfuscated", "expected"], + [ + # invalid signatures + ("", ""), + ("()", ""), + # valid signatures + ("()V", "()"), + ("([I)V", "(int[])"), + ("(III)V", "(int, int, int)"), + ("([Ljava/lang/String;)V", "(java.lang.String[])"), + ("([[J)V", "(long[][])"), + ("(I)I", "(int): int"), + ("([B)V", "(byte[])"), + ], +) +def test_deobfuscate_signature(mapper, obfuscated, expected): + assert deobfuscate_signature(mapper, obfuscated) == expected diff --git a/tests/sentry/profiles/test_task.py b/tests/sentry/profiles/test_task.py index deb321d9f32dc..b55b34d193825 100644 --- a/tests/sentry/profiles/test_task.py +++ b/tests/sentry/profiles/test_task.py @@ -127,16 +127,18 @@ def test_basic_deobfuscation(self): "profile": { "methods": [ { - "name": "a", "abs_path": None, "class_name": "org.a.b.g$a", + "name": "a", + "signature": "()V", "source_file": None, "source_line": 67, }, { - "name": "a", "abs_path": None, "class_name": "org.a.b.g$a", + "name": "a", + "signature": "()V", "source_file": None, "source_line": 69, }, @@ -178,16 +180,18 @@ def test_inline_deobfuscation(self): "profile": { "methods": [ { - "name": "onClick", "abs_path": None, "class_name": "e.a.c.a", + "name": "onClick", + "signature": "()V", "source_file": None, "source_line": 2, }, { - "name": "t", "abs_path": None, "class_name": "io.sentry.sample.MainActivity", + "name": "t", + "signature": "()V", "source_file": "MainActivity.java", "source_line": 1, }, @@ -200,21 +204,24 @@ def test_inline_deobfuscation(self): _deobfuscate(profile, project) frames = profile["profile"]["methods"] - assert sum(len(f.get("inline_frames", [{}])) for f in frames) == 4 + assert sum(len(f.get("inline_frames", [])) for f in frames) == 3 assert frames[0]["name"] == "onClick" assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4" + assert frames[1]["inline_frames"][0]["name"] == "onClickHandler" + assert frames[1]["inline_frames"][0]["source_line"] == 40 assert frames[1]["inline_frames"][0]["source_file"] == "MainActivity.java" assert frames[1]["inline_frames"][0]["class_name"] == "io.sentry.sample.MainActivity" - assert frames[1]["inline_frames"][0]["name"] == "bar" - assert frames[1]["inline_frames"][0]["source_line"] == 54 + assert frames[1]["inline_frames"][0]["signature"] == "()" + assert frames[1]["inline_frames"][1]["name"] == "foo" assert frames[1]["inline_frames"][1]["source_line"] == 44 - assert frames[1]["inline_frames"][2]["name"] == "onClickHandler" - assert frames[1]["inline_frames"][2]["source_line"] == 40 + assert frames[1]["inline_frames"][2]["source_file"] == "MainActivity.java" assert frames[1]["inline_frames"][2]["class_name"] == "io.sentry.sample.MainActivity" + assert frames[1]["inline_frames"][2]["name"] == "bar" + assert frames[1]["inline_frames"][2]["source_line"] == 54 def test_error_on_resolving(self): out = BytesIO()