From 1088e62618cd949b7dae0c575c9173be2d01fbd4 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Mon, 24 Jul 2023 09:28:23 -0400 Subject: [PATCH 01/13] feat(profiling): Deobfuscate Android methods' signature --- src/sentry/profiles/java.py | 86 ++++++++++++++++++++++++++++++ src/sentry/profiles/task.py | 3 ++ tests/sentry/profiles/test_java.py | 48 +++++++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 src/sentry/profiles/java.py create mode 100644 tests/sentry/profiles/test_java.py diff --git a/src/sentry/profiles/java.py b/src/sentry/profiles/java.py new file mode 100644 index 00000000000000..1ee93fd7b83a53 --- /dev/null +++ b/src/sentry/profiles/java.py @@ -0,0 +1,86 @@ +from typing import List, Tuple + +JAVA_BASE_TYPES = { + "Z": "boolean", + "B": "byte", + "C": "char", + "S": "short", + "I": "int", + "J": "long", + "F": "float", + "D": "double", + "V": "void", +} + + +# parse_obfuscated_signature will parse an obfuscated signatures into parameter +# and return types that can be then deobfuscated +def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]: + signature = signature[1:] + parameter_types, return_type = signature.rsplit(")", 1) + types = [] + i = 0 + is_array = False + + while i < len(parameter_types): + t = parameter_types[i] + + if t in JAVA_BASE_TYPES: + start_index = i - int(is_array) + types.append(parameter_types[start_index : i + 1]) + is_array = False + elif t == "L": + start_index = i - int(is_array) + end_index = parameter_types[i:].index(";") + types.append(parameter_types[start_index : i + end_index + 1]) + is_array = False + i += end_index + elif t == "[": + is_array = True + else: + is_array = False + + i += 1 + + return types, return_type + + +# format_signature formats the types into a human-readable signature +def format_signature(parameter_java_types: List[str], return_java_type: str) -> str: + signature = f"({', '.join(parameter_java_types)})" + if return_java_type != "void": + signature += f": {return_java_type}" + return signature + + +def byte_code_type_to_java_type(byte_code_type: str) -> str: + token = byte_code_type[0] + if token in JAVA_BASE_TYPES: + return JAVA_BASE_TYPES[token] + elif token == "L": + return byte_code_type[1 : len(byte_code_type) - 1].replace("/", ".") + elif token == "[": + return f"{byte_code_type_to_java_type(byte_code_type[1:])}[]" + else: + return byte_code_type + + +# map_obfucated_signature will parse then deobfuscated a signature and +# format it appropriately +def map_obfuscated_signature(mapper, signature: str) -> str: + parameter_types, return_type = parse_obfuscated_signature(signature) + parameter_java_types = [] + + for parameter_type in parameter_types: + new_class = byte_code_type_to_java_type(parameter_type) + mapped = mapper.remap_class(new_class) + if mapped: + new_class = mapped + parameter_java_types.append(new_class) + + return_java_type = byte_code_type_to_java_type(return_type) + mapped = mapper.remap_class(return_java_type) + if mapped: + return_java_type = mapped + + return format_signature(parameter_java_types, return_java_type) diff --git a/src/sentry/profiles/task.py b/src/sentry/profiles/task.py index 9de1fa3a315e39..b78e680fa24362 100644 --- a/src/sentry/profiles/task.py +++ b/src/sentry/profiles/task.py @@ -18,6 +18,7 @@ from sentry.lang.native.symbolicator import RetrySymbolication, Symbolicator, SymbolicatorTaskKind from sentry.models import EventError, Organization, Project, ProjectDebugFile from sentry.profiles.device import classify_device +from sentry.profiles.java import map_obfuscated_signature from sentry.profiles.utils import get_from_profiling_service from sentry.signals import first_profile_received from sentry.tasks.base import instrumented_task @@ -655,6 +656,8 @@ def _deobfuscate(profile: Profile, project: Project) -> None: else: method["data"]["deobfuscation_status"] = "missing" + method["signature"] = map_obfuscated_signature(mapper, method["signature"]) + @metrics.wraps("process_profile.track_outcome") def _track_outcome( diff --git a/tests/sentry/profiles/test_java.py b/tests/sentry/profiles/test_java.py new file mode 100644 index 00000000000000..c26eb2780646e4 --- /dev/null +++ b/tests/sentry/profiles/test_java.py @@ -0,0 +1,48 @@ +from tempfile import mkstemp + +import pytest +from symbolic.proguard import ProguardMapper + +from sentry.profiles.java import map_obfuscated_signature + +PROGUARD_SOURCE = b"""\ +# compiler: R8 +# compiler_version: 2.0.74 +# min_api: 16 +# pg_map_id: 5b46fdc +# common_typos_disable +# {"id":"com.android.tools.r8.mapping","version":"1.0"} +org.slf4j.helpers.Util$ClassContextSecurityManager -> org.a.b.g$a: + 65:65:void () -> + 67:67:java.lang.Class[] getClassContext() -> a + 69:69:java.lang.Class[] getExtraClassContext() -> a + 65:65:void (org.slf4j.helpers.Util$1) -> +""" + + +@pytest.fixture +def mapper(): + _, mapping_file_path = mkstemp() + f = open(mapping_file_path, "wb") + f.write(PROGUARD_SOURCE) + mapper = ProguardMapper.open(mapping_file_path) + assert mapper.has_line_info + f.close() + return mapper + + +@pytest.mark.parametrize( + ["obfuscated", "expected"], + [ + ("", ""), + ("()", "()"), + ("([I)", "(int[])"), + ("(III)", "(int[], int[], int[])"), + ("([Ljava/lang/String;)", "(java.lang.String)"), + ("([[J)", "(long[][])"), + ("(I)I", "(int): int"), + ], +) +def test_map_obfuscated_signature(mapper, obfuscated, expected): + result = map_obfuscated_signature(mapper, obfuscated) + assert result == expected From cfb07eedbeccf9b08c9a274e0afb970aca3ef54e Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Mon, 24 Jul 2023 12:41:13 -0400 Subject: [PATCH 02/13] Close mapping file before we read it again --- tests/sentry/profiles/test_java.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/sentry/profiles/test_java.py b/tests/sentry/profiles/test_java.py index c26eb2780646e4..64ab70195eb8b8 100644 --- a/tests/sentry/profiles/test_java.py +++ b/tests/sentry/profiles/test_java.py @@ -23,11 +23,10 @@ @pytest.fixture def mapper(): _, mapping_file_path = mkstemp() - f = open(mapping_file_path, "wb") - f.write(PROGUARD_SOURCE) + with open(mapping_file_path, "wb") as f: + f.write(PROGUARD_SOURCE) mapper = ProguardMapper.open(mapping_file_path) assert mapper.has_line_info - f.close() return mapper From 4dea25cd4a4e8b2cbde0a29cf1e88dc8a6fea10d Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Mon, 24 Jul 2023 12:47:49 -0400 Subject: [PATCH 03/13] Handle corner cases --- src/sentry/profiles/java.py | 21 ++++++++++++--------- tests/sentry/profiles/test_java.py | 4 ++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/sentry/profiles/java.py b/src/sentry/profiles/java.py index 1ee93fd7b83a53..abaedfef8e5717 100644 --- a/src/sentry/profiles/java.py +++ b/src/sentry/profiles/java.py @@ -20,25 +20,25 @@ def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]: parameter_types, return_type = signature.rsplit(")", 1) types = [] i = 0 - is_array = False + arrays = 0 while i < len(parameter_types): t = parameter_types[i] if t in JAVA_BASE_TYPES: - start_index = i - int(is_array) + start_index = i - arrays types.append(parameter_types[start_index : i + 1]) - is_array = False + arrays = 0 elif t == "L": - start_index = i - int(is_array) + start_index = i - arrays end_index = parameter_types[i:].index(";") types.append(parameter_types[start_index : i + end_index + 1]) - is_array = False + arrays = 0 i += end_index elif t == "[": - is_array = True + arrays += 1 else: - is_array = False + arrays = 0 i += 1 @@ -48,7 +48,7 @@ def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]: # format_signature formats the types into a human-readable signature def format_signature(parameter_java_types: List[str], return_java_type: str) -> str: signature = f"({', '.join(parameter_java_types)})" - if return_java_type != "void": + if return_java_type and return_java_type != "void": signature += f": {return_java_type}" return signature @@ -68,6 +68,9 @@ def byte_code_type_to_java_type(byte_code_type: str) -> str: # map_obfucated_signature will parse then deobfuscated a signature and # format it appropriately def map_obfuscated_signature(mapper, signature: str) -> str: + if not signature: + return "" + parameter_types, return_type = parse_obfuscated_signature(signature) parameter_java_types = [] @@ -78,7 +81,7 @@ def map_obfuscated_signature(mapper, signature: str) -> str: new_class = mapped parameter_java_types.append(new_class) - return_java_type = byte_code_type_to_java_type(return_type) + return_java_type = byte_code_type_to_java_type(return_type) if return_type else "" mapped = mapper.remap_class(return_java_type) if mapped: return_java_type = mapped diff --git a/tests/sentry/profiles/test_java.py b/tests/sentry/profiles/test_java.py index 64ab70195eb8b8..71d83dc9b1aa11 100644 --- a/tests/sentry/profiles/test_java.py +++ b/tests/sentry/profiles/test_java.py @@ -36,8 +36,8 @@ def mapper(): ("", ""), ("()", "()"), ("([I)", "(int[])"), - ("(III)", "(int[], int[], int[])"), - ("([Ljava/lang/String;)", "(java.lang.String)"), + ("(III)", "(int, int, int)"), + ("([Ljava/lang/String;)", "(java.lang.String[])"), ("([[J)", "(long[][])"), ("(I)I", "(int): int"), ], From bde4bb4874852646c82dd0590bb2dd8387e4fee8 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Mon, 24 Jul 2023 13:49:20 -0400 Subject: [PATCH 04/13] Add a test case for void return type --- src/sentry/profiles/java.py | 2 +- src/sentry/profiles/task.py | 4 ++-- tests/sentry/profiles/test_java.py | 7 ++++--- tests/sentry/profiles/test_task.py | 6 ++++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/sentry/profiles/java.py b/src/sentry/profiles/java.py index abaedfef8e5717..1fa84994f49548 100644 --- a/src/sentry/profiles/java.py +++ b/src/sentry/profiles/java.py @@ -67,7 +67,7 @@ def byte_code_type_to_java_type(byte_code_type: str) -> str: # map_obfucated_signature will parse then deobfuscated a signature and # format it appropriately -def map_obfuscated_signature(mapper, signature: str) -> str: +def deobfuscate_signature(mapper, signature: str) -> str: if not signature: return "" diff --git a/src/sentry/profiles/task.py b/src/sentry/profiles/task.py index b78e680fa24362..1915729cdb2438 100644 --- a/src/sentry/profiles/task.py +++ b/src/sentry/profiles/task.py @@ -18,7 +18,7 @@ from sentry.lang.native.symbolicator import RetrySymbolication, Symbolicator, SymbolicatorTaskKind from sentry.models import EventError, Organization, Project, ProjectDebugFile from sentry.profiles.device import classify_device -from sentry.profiles.java import map_obfuscated_signature +from sentry.profiles.java import deobfuscate_signature from sentry.profiles.utils import get_from_profiling_service from sentry.signals import first_profile_received from sentry.tasks.base import instrumented_task @@ -656,7 +656,7 @@ def _deobfuscate(profile: Profile, project: Project) -> None: else: method["data"]["deobfuscation_status"] = "missing" - method["signature"] = map_obfuscated_signature(mapper, method["signature"]) + method["signature"] = deobfuscate_signature(mapper, method["signature"]) @metrics.wraps("process_profile.track_outcome") diff --git a/tests/sentry/profiles/test_java.py b/tests/sentry/profiles/test_java.py index 71d83dc9b1aa11..c7fea14f9c2d70 100644 --- a/tests/sentry/profiles/test_java.py +++ b/tests/sentry/profiles/test_java.py @@ -3,7 +3,7 @@ import pytest from symbolic.proguard import ProguardMapper -from sentry.profiles.java import map_obfuscated_signature +from sentry.profiles.java import deobfuscate_signature PROGUARD_SOURCE = b"""\ # compiler: R8 @@ -40,8 +40,9 @@ def mapper(): ("([Ljava/lang/String;)", "(java.lang.String[])"), ("([[J)", "(long[][])"), ("(I)I", "(int): int"), + ("([B)V", "(byte[])"), ], ) -def test_map_obfuscated_signature(mapper, obfuscated, expected): - result = map_obfuscated_signature(mapper, obfuscated) +def test_deobfuscate_signature(mapper, obfuscated, expected): + result = deobfuscate_signature(mapper, obfuscated) assert result == expected diff --git a/tests/sentry/profiles/test_task.py b/tests/sentry/profiles/test_task.py index deb321d9f32dce..064633d2a021c7 100644 --- a/tests/sentry/profiles/test_task.py +++ b/tests/sentry/profiles/test_task.py @@ -127,16 +127,18 @@ def test_basic_deobfuscation(self): "profile": { "methods": [ { - "name": "a", "abs_path": None, "class_name": "org.a.b.g$a", + "name": "a", + "signature": "()V", "source_file": None, "source_line": 67, }, { - "name": "a", "abs_path": None, "class_name": "org.a.b.g$a", + "name": "a", + "signature": "()V", "source_file": None, "source_line": 69, }, From 03dfbc6d0a2ab2a98bb3d85ac5984e2181d89768 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Mon, 24 Jul 2023 15:37:57 -0400 Subject: [PATCH 05/13] Use the first frame returned to update obfuscated method --- src/sentry/profiles/task.py | 38 +++++++++++++++++------------- tests/sentry/profiles/test_task.py | 33 ++++++++++++++++---------- 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/src/sentry/profiles/task.py b/src/sentry/profiles/task.py index 1915729cdb2438..6a9ee679487577 100644 --- a/src/sentry/profiles/task.py +++ b/src/sentry/profiles/task.py @@ -619,34 +619,39 @@ def _deobfuscate(profile: Profile, project: Project) -> None: with sentry_sdk.start_span(op="proguard.remap"): for method in profile["profile"]["methods"]: + method.setdefault("data", {}) + mapped = mapper.remap_frame( method["class_name"], method["name"], method["source_line"] or 0 ) - method.setdefault("data", {}) - if len(mapped) == 1: + if len(mapped) >= 1: new_frame = mapped[0] - method.update( - { - "class_name": new_frame.class_name, - "name": new_frame.method, - "source_file": new_frame.file, - "source_line": new_frame.line, - } - ) - method["data"]["deobfuscation_status"] = "deobfuscated" - elif len(mapped) > 1: + new_frame_attributes = { + "class_name": new_frame.class_name, + "name": new_frame.method, + "data": {"deobfuscation_status": "deobfuscated"}, + } + + if new_frame.file: + new_frame_attributes["source_file"] = new_frame.file + + if new_frame.line: + new_frame_attributes["source_line"] = new_frame.line + + method.update(new_frame_attributes) + bottom_class = mapped[-1].class_name method["inline_frames"] = [ { "class_name": new_frame.class_name, + "data": {"deobfuscation_status": "deobfuscated"}, "name": new_frame.method, "source_file": method["source_file"] if bottom_class == new_frame.class_name - else None, + else "", "source_line": new_frame.line, - "data": {"deobfuscation_status": "deobfuscated"}, } - for new_frame in mapped + for new_frame in mapped[1:] ] else: mapped_class = mapper.remap_class(method["class_name"]) @@ -656,7 +661,8 @@ def _deobfuscate(profile: Profile, project: Project) -> None: else: method["data"]["deobfuscation_status"] = "missing" - method["signature"] = deobfuscate_signature(mapper, method["signature"]) + if "signature" in method and method["signature"]: + method["signature"] = deobfuscate_signature(mapper, method["signature"]) @metrics.wraps("process_profile.track_outcome") diff --git a/tests/sentry/profiles/test_task.py b/tests/sentry/profiles/test_task.py index 064633d2a021c7..b495ed9a1d3db0 100644 --- a/tests/sentry/profiles/test_task.py +++ b/tests/sentry/profiles/test_task.py @@ -180,16 +180,18 @@ def test_inline_deobfuscation(self): "profile": { "methods": [ { - "name": "onClick", "abs_path": None, "class_name": "e.a.c.a", + "name": "onClick", + "signature": "()V", "source_file": None, "source_line": 2, }, { - "name": "t", "abs_path": None, "class_name": "io.sentry.sample.MainActivity", + "name": "t", + "signature": "()V", "source_file": "MainActivity.java", "source_line": 1, }, @@ -202,21 +204,26 @@ def test_inline_deobfuscation(self): _deobfuscate(profile, project) frames = profile["profile"]["methods"] - assert sum(len(f.get("inline_frames", [{}])) for f in frames) == 4 + assert sum(len(f.get("inline_frames", [{}])) for f in frames) == 3 - assert frames[0]["name"] == "onClick" assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4" + assert frames[0]["name"] == "onClick" + assert frames[0]["signature"] == "()" + + assert frames[1]["class_name"] == "io.sentry.sample.MainActivity" + assert frames[1]["name"] == "bar" + assert frames[1]["signature"] == "()" + assert frames[1]["source_file"] == "MainActivity.java" + assert frames[1]["source_line"] == 54 - assert frames[1]["inline_frames"][0]["source_file"] == "MainActivity.java" assert frames[1]["inline_frames"][0]["class_name"] == "io.sentry.sample.MainActivity" - assert frames[1]["inline_frames"][0]["name"] == "bar" - assert frames[1]["inline_frames"][0]["source_line"] == 54 - assert frames[1]["inline_frames"][1]["name"] == "foo" - assert frames[1]["inline_frames"][1]["source_line"] == 44 - assert frames[1]["inline_frames"][2]["name"] == "onClickHandler" - assert frames[1]["inline_frames"][2]["source_line"] == 40 - assert frames[1]["inline_frames"][2]["source_file"] == "MainActivity.java" - assert frames[1]["inline_frames"][2]["class_name"] == "io.sentry.sample.MainActivity" + assert frames[1]["inline_frames"][0]["name"] == "foo" + assert frames[1]["inline_frames"][0]["source_line"] == 44 + + assert frames[1]["inline_frames"][1]["class_name"] == "io.sentry.sample.MainActivity" + assert frames[1]["inline_frames"][1]["name"] == "onClickHandler" + assert frames[1]["inline_frames"][1]["source_file"] == "MainActivity.java" + assert frames[1]["inline_frames"][1]["source_line"] == 40 def test_error_on_resolving(self): out = BytesIO() From 3e588ec80da2adf930c4081ddf513b9d61d623f6 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Mon, 24 Jul 2023 21:34:36 -0400 Subject: [PATCH 06/13] Fix inline frames deobfuscation --- src/sentry/profiles/task.py | 14 +++++++++----- tests/sentry/profiles/test_task.py | 28 +++++++++++++--------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/src/sentry/profiles/task.py b/src/sentry/profiles/task.py index 6a9ee679487577..432d8772d4a3b1 100644 --- a/src/sentry/profiles/task.py +++ b/src/sentry/profiles/task.py @@ -624,8 +624,12 @@ def _deobfuscate(profile: Profile, project: Project) -> None: mapped = mapper.remap_frame( method["class_name"], method["name"], method["source_line"] or 0 ) + + if "signature" in method and method["signature"]: + method["signature"] = deobfuscate_signature(mapper, method["signature"]) + if len(mapped) >= 1: - new_frame = mapped[0] + new_frame = mapped[-1] new_frame_attributes = { "class_name": new_frame.class_name, "name": new_frame.method, @@ -651,8 +655,11 @@ def _deobfuscate(profile: Profile, project: Project) -> None: else "", "source_line": new_frame.line, } - for new_frame in mapped[1:] + for new_frame in reversed(mapped) ] + + if len(method["inline_frames"]) > 0: + method["inline_frames"][0]["signature"] = method.get("signature", "") else: mapped_class = mapper.remap_class(method["class_name"]) if mapped_class: @@ -661,9 +668,6 @@ def _deobfuscate(profile: Profile, project: Project) -> None: else: method["data"]["deobfuscation_status"] = "missing" - if "signature" in method and method["signature"]: - method["signature"] = deobfuscate_signature(mapper, method["signature"]) - @metrics.wraps("process_profile.track_outcome") def _track_outcome( diff --git a/tests/sentry/profiles/test_task.py b/tests/sentry/profiles/test_task.py index b495ed9a1d3db0..b55b34d1938250 100644 --- a/tests/sentry/profiles/test_task.py +++ b/tests/sentry/profiles/test_task.py @@ -204,26 +204,24 @@ def test_inline_deobfuscation(self): _deobfuscate(profile, project) frames = profile["profile"]["methods"] - assert sum(len(f.get("inline_frames", [{}])) for f in frames) == 3 + assert sum(len(f.get("inline_frames", [])) for f in frames) == 3 - assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4" assert frames[0]["name"] == "onClick" - assert frames[0]["signature"] == "()" - - assert frames[1]["class_name"] == "io.sentry.sample.MainActivity" - assert frames[1]["name"] == "bar" - assert frames[1]["signature"] == "()" - assert frames[1]["source_file"] == "MainActivity.java" - assert frames[1]["source_line"] == 54 + assert frames[0]["class_name"] == "io.sentry.sample.-$$Lambda$r3Avcbztes2hicEObh02jjhQqd4" + assert frames[1]["inline_frames"][0]["name"] == "onClickHandler" + assert frames[1]["inline_frames"][0]["source_line"] == 40 + assert frames[1]["inline_frames"][0]["source_file"] == "MainActivity.java" assert frames[1]["inline_frames"][0]["class_name"] == "io.sentry.sample.MainActivity" - assert frames[1]["inline_frames"][0]["name"] == "foo" - assert frames[1]["inline_frames"][0]["source_line"] == 44 + assert frames[1]["inline_frames"][0]["signature"] == "()" + + assert frames[1]["inline_frames"][1]["name"] == "foo" + assert frames[1]["inline_frames"][1]["source_line"] == 44 - assert frames[1]["inline_frames"][1]["class_name"] == "io.sentry.sample.MainActivity" - assert frames[1]["inline_frames"][1]["name"] == "onClickHandler" - assert frames[1]["inline_frames"][1]["source_file"] == "MainActivity.java" - assert frames[1]["inline_frames"][1]["source_line"] == 40 + assert frames[1]["inline_frames"][2]["source_file"] == "MainActivity.java" + assert frames[1]["inline_frames"][2]["class_name"] == "io.sentry.sample.MainActivity" + assert frames[1]["inline_frames"][2]["name"] == "bar" + assert frames[1]["inline_frames"][2]["source_line"] == 54 def test_error_on_resolving(self): out = BytesIO() From cc42a8051962a8719791c134c997d7c7caf2a196 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 25 Jul 2023 08:39:50 -0400 Subject: [PATCH 07/13] Guard against invalid signatures --- src/sentry/profiles/java.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/sentry/profiles/java.py b/src/sentry/profiles/java.py index 1fa84994f49548..66937ec0ef4a98 100644 --- a/src/sentry/profiles/java.py +++ b/src/sentry/profiles/java.py @@ -16,6 +16,9 @@ # parse_obfuscated_signature will parse an obfuscated signatures into parameter # and return types that can be then deobfuscated def parse_obfuscated_signature(signature: str) -> Tuple[List[str], str]: + if signature[0] != "(": + return [], "" + signature = signature[1:] parameter_types, return_type = signature.rsplit(")", 1) types = [] @@ -53,7 +56,10 @@ def format_signature(parameter_java_types: List[str], return_java_type: str) -> return signature -def byte_code_type_to_java_type(byte_code_type: str) -> str: +def byte_code_type_to_java_type(mapper, byte_code_type: str) -> str: + if not byte_code_type: + return "" + token = byte_code_type[0] if token in JAVA_BASE_TYPES: return JAVA_BASE_TYPES[token] @@ -72,8 +78,10 @@ def deobfuscate_signature(mapper, signature: str) -> str: return "" parameter_types, return_type = parse_obfuscated_signature(signature) - parameter_java_types = [] + if not (parameter_types or return_type): + return "" + parameter_java_types = [] for parameter_type in parameter_types: new_class = byte_code_type_to_java_type(parameter_type) mapped = mapper.remap_class(new_class) From 2bdb5219eaebcb3a66f7ccc9549ff81abefdbf64 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 25 Jul 2023 08:40:46 -0400 Subject: [PATCH 08/13] Fix class deobfuscation in case of an array --- src/sentry/profiles/java.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/sentry/profiles/java.py b/src/sentry/profiles/java.py index 66937ec0ef4a98..c724ce293fc9f8 100644 --- a/src/sentry/profiles/java.py +++ b/src/sentry/profiles/java.py @@ -64,9 +64,13 @@ def byte_code_type_to_java_type(mapper, byte_code_type: str) -> str: if token in JAVA_BASE_TYPES: return JAVA_BASE_TYPES[token] elif token == "L": - return byte_code_type[1 : len(byte_code_type) - 1].replace("/", ".") + obfuscated = byte_code_type[1 : len(byte_code_type) - 1].replace("/", ".") + mapped = mapper.remap_class(obfuscated) + if mapped: + return mapped + return obfuscated elif token == "[": - return f"{byte_code_type_to_java_type(byte_code_type[1:])}[]" + return f"{byte_code_type_to_java_type(mapper, byte_code_type[1:])}[]" else: return byte_code_type @@ -83,15 +87,8 @@ def deobfuscate_signature(mapper, signature: str) -> str: parameter_java_types = [] for parameter_type in parameter_types: - new_class = byte_code_type_to_java_type(parameter_type) - mapped = mapper.remap_class(new_class) - if mapped: - new_class = mapped + new_class = byte_code_type_to_java_type(mapper, parameter_type) parameter_java_types.append(new_class) - return_java_type = byte_code_type_to_java_type(return_type) if return_type else "" - mapped = mapper.remap_class(return_java_type) - if mapped: - return_java_type = mapped - + return_java_type = byte_code_type_to_java_type(mapper, return_type) return format_signature(parameter_java_types, return_java_type) From a68718ab32ba0bd7724b0b9caafe64b02a67e4ac Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 25 Jul 2023 08:42:44 -0400 Subject: [PATCH 09/13] Separate valid from invalid signatures --- tests/sentry/profiles/test_java.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/sentry/profiles/test_java.py b/tests/sentry/profiles/test_java.py index c7fea14f9c2d70..c61a032faf5ed0 100644 --- a/tests/sentry/profiles/test_java.py +++ b/tests/sentry/profiles/test_java.py @@ -33,16 +33,18 @@ def mapper(): @pytest.mark.parametrize( ["obfuscated", "expected"], [ + # invalid signatures ("", ""), - ("()", "()"), - ("([I)", "(int[])"), - ("(III)", "(int, int, int)"), - ("([Ljava/lang/String;)", "(java.lang.String[])"), - ("([[J)", "(long[][])"), + ("()", ""), + # valid signatures + ("()V", "()"), + ("([I)V", "(int[])"), + ("(III)V", "(int, int, int)"), + ("([Ljava/lang/String;)V", "(java.lang.String[])"), + ("([[J)V", "(long[][])"), ("(I)I", "(int): int"), ("([B)V", "(byte[])"), ], ) def test_deobfuscate_signature(mapper, obfuscated, expected): - result = deobfuscate_signature(mapper, obfuscated) - assert result == expected + assert deobfuscate_signature(mapper, obfuscated) == expected From b1c241c712b387a978d397dc54b8a445042a328e Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 25 Jul 2023 10:41:11 -0400 Subject: [PATCH 10/13] Return the raw value if we don't support the bytecode --- src/sentry/profiles/java.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/sentry/profiles/java.py b/src/sentry/profiles/java.py index c724ce293fc9f8..c9fa782a3ea5b8 100644 --- a/src/sentry/profiles/java.py +++ b/src/sentry/profiles/java.py @@ -64,7 +64,10 @@ def byte_code_type_to_java_type(mapper, byte_code_type: str) -> str: if token in JAVA_BASE_TYPES: return JAVA_BASE_TYPES[token] elif token == "L": - obfuscated = byte_code_type[1 : len(byte_code_type) - 1].replace("/", ".") + # invalid signature + if byte_code_type[-1] != ";": + return byte_code_type + obfuscated = byte_code_type[1:-1].replace("/", ".") mapped = mapper.remap_class(obfuscated) if mapped: return mapped From 248fc9f88735a4d7aade745e1f01bd2dda8242f0 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 25 Jul 2023 10:41:31 -0400 Subject: [PATCH 11/13] Remove the use of the update method --- src/sentry/profiles/task.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/sentry/profiles/task.py b/src/sentry/profiles/task.py index 432d8772d4a3b1..7bfc67efa77fe9 100644 --- a/src/sentry/profiles/task.py +++ b/src/sentry/profiles/task.py @@ -630,19 +630,15 @@ def _deobfuscate(profile: Profile, project: Project) -> None: if len(mapped) >= 1: new_frame = mapped[-1] - new_frame_attributes = { - "class_name": new_frame.class_name, - "name": new_frame.method, - "data": {"deobfuscation_status": "deobfuscated"}, - } + method["class_name"] = new_frame.class_name + method["name"] = new_frame.method + method["data"] = {"deobfuscation_status": "deobfuscated"} if new_frame.file: - new_frame_attributes["source_file"] = new_frame.file + method["source_file"] = new_frame.file if new_frame.line: - new_frame_attributes["source_line"] = new_frame.line - - method.update(new_frame_attributes) + method["source_line"] = new_frame.line bottom_class = mapped[-1].class_name method["inline_frames"] = [ From 0f1d2777fa8fa8d66a915961784f32d8867db741 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 25 Jul 2023 10:41:44 -0400 Subject: [PATCH 12/13] Add a comment to clarify why we do this --- src/sentry/profiles/task.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sentry/profiles/task.py b/src/sentry/profiles/task.py index 7bfc67efa77fe9..6fd4665f770f26 100644 --- a/src/sentry/profiles/task.py +++ b/src/sentry/profiles/task.py @@ -654,6 +654,10 @@ def _deobfuscate(profile: Profile, project: Project) -> None: for new_frame in reversed(mapped) ] + # vroom will only take into account frames in this list + # if it exists. since symbolic does not return a signature for + # the frame we deobfuscated, we update it to set + # the deobfuscated signature. if len(method["inline_frames"]) > 0: method["inline_frames"][0]["signature"] = method.get("signature", "") else: From 54ff842238408da9ba812040f198e4f6390be05d Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 25 Jul 2023 10:55:50 -0400 Subject: [PATCH 13/13] Adjust the deobfuscation status to partial if the signature is not properly deobfuscated --- src/sentry/profiles/task.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/sentry/profiles/task.py b/src/sentry/profiles/task.py index 6fd4665f770f26..3ce5385e5f1241 100644 --- a/src/sentry/profiles/task.py +++ b/src/sentry/profiles/task.py @@ -632,7 +632,11 @@ def _deobfuscate(profile: Profile, project: Project) -> None: new_frame = mapped[-1] method["class_name"] = new_frame.class_name method["name"] = new_frame.method - method["data"] = {"deobfuscation_status": "deobfuscated"} + method["data"] = { + "deobfuscation_status": "deobfuscated" + if method.get("signature", None) + else "partial" + } if new_frame.file: method["source_file"] = new_frame.file @@ -659,6 +663,7 @@ def _deobfuscate(profile: Profile, project: Project) -> None: # the frame we deobfuscated, we update it to set # the deobfuscated signature. if len(method["inline_frames"]) > 0: + method["inline_frames"][0]["data"] = method["data"] method["inline_frames"][0]["signature"] = method.get("signature", "") else: mapped_class = mapper.remap_class(method["class_name"])