Skip to content

Commit

Permalink
Merge pull request #214 from ivanyu/ivanyu/gh-210-check-python-target
Browse files Browse the repository at this point in the history
dumper: make sure target process is Python
  • Loading branch information
ivanyu authored Dec 5, 2022
2 parents 60fe347 + 7773798 commit 1e4ff64
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 22 deletions.
6 changes: 4 additions & 2 deletions pyheap/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ all: check dist
dist: dist/pyheap_dump
@echo "Distribution built: $^"

dist/pyheap_dump: build/requirements.txt build/src/*.py
dist/pyheap_dump: build/requirements.txt build-src
mkdir -p dist
poetry run pex \
--requirement=build/requirements.txt \
Expand All @@ -31,8 +31,10 @@ build/requirements.txt: poetry.lock
mkdir -p build/
poetry export --without-hashes --format=requirements.txt > build/requirements.txt

build/src/%.py: src/%.py
.PHONY: build-src
build-src: src/*.py
git ls-files --error-unmatch $^
rm -rf build/src
mkdir -p build/src
cp $^ build/src

Expand Down
14 changes: 13 additions & 1 deletion pyheap/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyheap/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ license = "Apache License 2.0"

[tool.poetry.dependencies]
python = ">=3.8,<3.12"
pyelftools = "^0.29"

[tool.poetry.dev-dependencies]
pytest = "^7.1"
Expand Down
18 changes: 18 additions & 0 deletions pyheap/src/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Copyright 2022 Ivan Yurchenko
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations

PY_EVAL_EVAL_FRAME_DEFAULT = "_PyEval_EvalFrameDefault"
29 changes: 11 additions & 18 deletions pyheap/src/gdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,23 @@
from typing import List, Optional, Iterator

import mount
from proc import proc_maps


def solib_search_paths(pid: int, pid_in_ns: int) -> List[str]:
# libc (isn't used in e.g. Alpine Linux) and libpthread (maybe not loaded) are optional.
libc_path: Optional[str] = None
libpthread_path: Optional[str] = None
try:
with open(f"/proc/{pid}/maps", "r") as f:
for l in f.readlines():
parts = re.split("\s+", l.strip())
if len(parts) != 6:
continue

path = parts[-1]
if libc_path is None and re.search(r"libc(-[\d.]+)?\.so(\.|$)", path):
libc_path = path
if libpthread_path is None and re.search(
r"libpthread(-[\d.]+)?\.so(\.|$)", path
):
libpthread_path = path
except PermissionError as e:
raise Exception(
"Hint: the target process is likely run under a different user, use sudo"
) from e
for parts in proc_maps(pid):
if len(parts) != 6:
continue
path = parts[-1]
if libc_path is None and re.search(r"libc(-[\d.]+)?\.so(\.|$)", path):
libc_path = path
if libpthread_path is None and re.search(
r"libpthread(-[\d.]+)?\.so(\.|$)", path
):
libpthread_path = path

dirs = set()
if libc_path is not None:
Expand Down
29 changes: 29 additions & 0 deletions pyheap/src/proc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#
# Copyright 2022 Ivan Yurchenko
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import re
from typing import Iterator, Tuple


def proc_maps(pid: int) -> Iterator[Tuple[str, ...]]:
try:
with open(f"/proc/{pid}/maps", "r") as f:
for l in f.readlines():
parts = re.split("\s+", l.strip())
yield parts
except PermissionError as e:
raise Exception(
"Hint: the target process is likely run under a different user, use sudo"
) from e
23 changes: 22 additions & 1 deletion pyheap/src/pyheap_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
ContextManager,
)

from constants import PY_EVAL_EVAL_FRAME_DEFAULT
from docker import get_container_pid
from gdb import solib_search_paths, bind_gdb_exe, shadow_target_exe_dir_for_gdb
from namespaces import (
Expand All @@ -46,6 +47,7 @@
two_processes_in_same_pid_namespace,
pid_in_own_namespace,
)
from python_checker import check_if_python


def dump_heap(args: argparse.Namespace) -> int:
Expand All @@ -71,6 +73,17 @@ def dump_heap(args: argparse.Namespace) -> int:
print(f"Target process PID in its own namespace: {target_pid_in_ns}")
nsenter_needed = True

if not check_if_python(target_pid):
if args.ignore_compatibility_checks:
print(
"Target process does not look like Python. --ignore-compatibility-checks is specified, ignoring"
)
else:
print(
"Target process does not look like Python. If you are sure, use --ignore-compatibility-checks"
)
return 1

solid_search_paths = ":".join(solib_search_paths(target_pid, target_pid_in_ns))

injector_code = _load_code("injector.py")
Expand Down Expand Up @@ -117,7 +130,7 @@ def dump_heap(args: argparse.Namespace) -> int:
"-iex",
"set debuginfod enabled off",
"-ex",
"break _PyEval_EvalFrameDefault",
f"break {PY_EVAL_EVAL_FRAME_DEFAULT}",
"-ex",
"continue",
"-ex",
Expand Down Expand Up @@ -295,6 +308,14 @@ def main() -> None:
help="max length of string representation of objects (-1 disables it)",
default=1000,
)

parser.add_argument(
"--ignore-compatibility-checks",
action="store_true",
default=False,
help="ignore various compatibility checks for the target process",
)

parser.set_defaults(func=dump_heap)

args = parser.parse_args()
Expand Down
70 changes: 70 additions & 0 deletions pyheap/src/python_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# Copyright 2022 Ivan Yurchenko
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import re
from typing import Optional

from elftools.elf import elffile

from constants import PY_EVAL_EVAL_FRAME_DEFAULT
from proc import proc_maps


def check_if_python(pid: int) -> bool:
"""Checks (as far as the heuristics go) that the target process is CPython, and we can work with it."""

# First check if the executable itself has the _PyEval_EvalFrameDefault symbol.
# Consider it Python if it does.
if _check_has_eval_symbol(f"/proc/{pid}/exe"):
return True

# Then, check if it uses libpython and if this library has _PyEval_EvalFrameDefault.
# Consider it Python if it does.
libpython_path = _get_libpython_path(pid)
if not libpython_path:
return False
return _check_has_eval_symbol(libpython_path)


def _check_has_eval_symbol(path: str) -> bool:
try:
with open(path, "rb") as f:
elf = elffile.ELFFile(f)
dynsym = elf.get_section_by_name(".dynsym")
if dynsym.is_null():
return False
pyeval_sym = dynsym.get_symbol_by_name(PY_EVAL_EVAL_FRAME_DEFAULT)
return pyeval_sym is not None
except PermissionError as e:
raise Exception(
"Hint: the target process is likely run under a different user, use sudo"
) from e


def _get_libpython_path(pid: int) -> Optional[str]:
try:
for parts in proc_maps(pid):
if len(parts) != 6:
continue

path = parts[-1]
if re.search(r"libpython([\d.]+)?\.so(\.|$)", path):
return f"/proc/{pid}/root{path}"
else:
return None
except PermissionError as e:
raise Exception(
"Hint: the target process is likely run under a different user, use sudo"
) from e
Loading

0 comments on commit 1e4ff64

Please sign in to comment.