From a979efa5b8d1bda7c0f859c79682c55e04e427f6 Mon Sep 17 00:00:00 2001 From: Ivan Yurchenko Date: Tue, 6 Dec 2022 17:36:35 +0200 Subject: [PATCH] dumper: support running as a Docker container and build Docker image Closes #158 --- .github/workflows/pr_and_main_push.yml | 41 +++++- Makefile | 54 ++++---- README.md | 21 +++ integration_tests/Makefile | 2 +- integration_tests/manual_test_e2e.py | 172 ++++++++++++++++--------- pyheap/Dockerfile | 28 ++++ pyheap/Makefile | 4 + pyheap/src/gdb.py | 4 +- pyheap/src/pyheap_dump.py | 17 ++- 9 files changed, 241 insertions(+), 102 deletions(-) create mode 100644 pyheap/Dockerfile diff --git a/.github/workflows/pr_and_main_push.yml b/.github/workflows/pr_and_main_push.yml index 0852031..31f3e79 100644 --- a/.github/workflows/pr_and_main_push.yml +++ b/.github/workflows/pr_and_main_push.yml @@ -101,8 +101,8 @@ jobs: working-directory: ./integration_tests run: poetry run pytest -vv - build_pyheap: - name: Build PyHeap distribution and release if needed + build_and_upload_pyheap: + name: Build PyHeap distribution and upload if needed runs-on: ubuntu-latest needs: integration_tests steps: @@ -132,8 +132,41 @@ jobs: files: | ./pyheap/dist/pyheap_dump - build_and_publish_pyheap_ui_docker_image: - name: Build and publish PyHeap UI Docker image + - name: Prepare Docker build + working-directory: ./pyheap-ui + run: make docker-prepare + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v4 + with: + images: ivanyu/pyheap-dumper + tags: | + type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', 'main') }} + type=semver,pattern={{version}} + + - uses: docker/login-action@v2 + name: Login to Docker Hub + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - uses: docker/build-push-action@v3 + name: Build and push + with: + push: ${{ github.event_name == 'push' || github.event_name == 'release' }} + context: ./pyheap + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + build_and_upload_pyheap_ui: + name: Build PyHeap UI distribution and upload if needed runs-on: ubuntu-latest needs: integration_tests permissions: diff --git a/Makefile b/Makefile index 318b3be..f9e600b 100644 --- a/Makefile +++ b/Makefile @@ -17,45 +17,39 @@ .PHONY: clean clean: (cd pyheap && $(MAKE) clean) + (cd integration_tests && $(MAKE) clean) + +pyheap/dist/pyheap_dump: + (cd pyheap && $(MAKE) dist) + +.PHONY: dumper-docker-image +dumper-docker-image: + (cd pyheap && $(MAKE) docker-image) .PHONY: integration-tests integration-tests: integration-tests-3-8 integration-tests-3-9 integration-tests-3-10 integration-tests-3-11 -pyheap/dist/pyheap_dump: - (cd pyheap && $(MAKE) dist) +define run_integration_test + cd integration_tests && \ + $(MAKE) "$2" && \ + PYENV_VERSION="$1" poetry env use python && \ + poetry run pip install -e ../pyheap-ui/ && \ + poetry install && \ + poetry run pytest -vv ./*.py +endef .PHONY: integration-tests-3-8 -integration-tests-3-8: pyheap/dist/pyheap_dump - (cd integration_tests && \ - $(MAKE) test-target-docker-images-3-8 && \ - PYENV_VERSION=3.8 poetry env use python && \ - poetry run pip install -e ../pyheap-ui/ && \ - poetry install && \ - poetry run pytest -vv ./*.py) +integration-tests-3-8: pyheap/dist/pyheap_dump dumper-docker-image + $(call run_integration_test,3.8,test-target-docker-images-3-8) .PHONY: integration-tests-3-9 -integration-tests-3-9: pyheap/dist/pyheap_dump - (cd integration_tests && \ - $(MAKE) test-target-docker-images-3-9 && \ - PYENV_VERSION=3.9 poetry env use python && \ - poetry run pip install -e ../pyheap-ui/ && \ - poetry install && \ - poetry run pytest -vv ./*.py) +integration-tests-3-9: pyheap/dist/pyheap_dump dumper-docker-image + $(call run_integration_test,3.9,test-target-docker-images-3-9) .PHONY: integration-tests-3-10 -integration-tests-3-10: pyheap/dist/pyheap_dump - (cd integration_tests && \ - $(MAKE) test-target-docker-images-3-10 && \ - PYENV_VERSION=3.10 poetry env use python && \ - poetry run pip install -e ../pyheap-ui/ && \ - poetry install && \ - poetry run pytest -vv ./*.py) +integration-tests-3-10: pyheap/dist/pyheap_dump dumper-docker-image + $(call run_integration_test,3.10,test-target-docker-images-3-10) .PHONY: integration-tests-3-11 -integration-tests-3-11: pyheap/dist/pyheap_dump - (cd integration_tests && \ - $(MAKE) test-target-docker-images-3-11 && \ - PYENV_VERSION=3.11 poetry env use python && \ - poetry run pip install -e ../pyheap-ui/ && \ - poetry install && \ - poetry run pytest -vv ./*.py) +integration-tests-3-11: pyheap/dist/pyheap_dump dumper-docker-image + $(call run_integration_test,3.11,test-target-docker-images-3-11) diff --git a/README.md b/README.md index 3de74ae..87cc7fa 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,27 @@ $ python3 pyheap_dump -h ``` for additional options. +#### Running in a Docker Container + +The dumper also can be run in a Docker container. + +If the target process is also running in a Docker container, it's possible to attach the dumper container directly to it: + +```bash +docker run \ + --rm \ + --pid=container: \ + --cap-add=SYS_PTRACE \ + --volume $(pwd):/heap-dumps \ + ivanyu/pyheap-dumper:latest \ + --pid 1 \ + --file /heap-dumps/heap.pyheap +``` + +You can replace `latest` with a release version. + +If you need to run it against a process on the host, use `--pid=host` instead. + ### Containers and Namespaces PyHeap can attach to targets that are running in Linux namespaces. Docker containers is the most common example of this situation. diff --git a/integration_tests/Makefile b/integration_tests/Makefile index 30e05a0..7c94664 100644 --- a/integration_tests/Makefile +++ b/integration_tests/Makefile @@ -16,7 +16,7 @@ .PHONY: clean clean: - rm e2e_docker/inferior-simple.py + rm -f e2e_docker/inferior-simple.py define build_image docker build e2e_docker \ diff --git a/integration_tests/manual_test_e2e.py b/integration_tests/manual_test_e2e.py index dbe5004..493171c 100644 --- a/integration_tests/manual_test_e2e.py +++ b/integration_tests/manual_test_e2e.py @@ -19,32 +19,41 @@ import sys import time from contextlib import contextmanager, closing -from typing import Iterator, Union, Optional +from pathlib import Path +from typing import Iterator, List import pytest from _pytest.tmpdir import TempPathFactory from pyheap_ui.heap_reader import HeapReader -@pytest.mark.parametrize("docker_base", ["alpine", "debian", "ubuntu", "fedora", None]) -def test_e2e(docker_base: Optional[str], test_heap_path: str) -> None: - is_docker = docker_base is not None - with _inferior_process(docker_base) as ip_pid_or_container, _dumper_process( - test_heap_path, ip_pid_or_container, is_docker - ) as dp: - print(f"Inferior process/container {ip_pid_or_container}") - print(f"Dumper process {dp.pid}") - dp.wait(10) - assert dp.returncode == 0 +def test_e2e_target_host_dumper_host(test_heap_path: str) -> None: + with _target_process_host() as pid: + _dumper_on_host_for_host(test_heap_path, pid) + _check_heap_file(test_heap_path) - assert os.path.exists(test_heap_path) - with open(test_heap_path, "rb") as f: - mm = mmap.mmap(f.fileno(), length=0, access=mmap.ACCESS_READ) - with closing(mm): - reader = HeapReader(mm) - reader.read() - # Check that we have read everything. - assert reader._offset == mm.size() +@pytest.mark.parametrize("target_docker_base", ["alpine", "debian", "ubuntu", "fedora"]) +def test_e2e_target_docker_dumper_host( + target_docker_base: str, test_heap_path: str +) -> None: + with _target_process_docker(target_docker_base) as container_id: + _dumper_on_host_for_docker(test_heap_path, container_id) + _check_heap_file(test_heap_path) + + +def test_e2e_target_host_dumper_docker(test_heap_path: str) -> None: + with _target_process_host() as pid: + _dumper_on_docker_for_host(test_heap_path, pid) + _check_heap_file(test_heap_path) + + +@pytest.mark.parametrize("target_docker_base", ["alpine", "debian", "ubuntu", "fedora"]) +def test_e2e_target_docker_dumper_docker( + target_docker_base: str, test_heap_path: str +) -> None: + with _target_process_docker(target_docker_base) as container_id: + _dumper_on_docker_for_docker(test_heap_path, container_id) + _check_heap_file(test_heap_path) @pytest.fixture(scope="function") @@ -56,8 +65,19 @@ def test_heap_path(tmp_path_factory: TempPathFactory) -> str: os.remove(r) +def _check_heap_file(test_heap_path: str) -> None: + assert os.path.exists(test_heap_path) + with open(test_heap_path, "rb") as f: + mm = mmap.mmap(f.fileno(), length=0, access=mmap.ACCESS_READ) + with closing(mm): + reader = HeapReader(mm) + reader.read() + # Check that we have read everything. + assert reader._offset == mm.size() + + @contextmanager -def _inferior_process_plain() -> Iterator[int]: +def _target_process_host() -> Iterator[int]: inferior_proc = subprocess.Popen( [sys.executable, "inferior-simple.py"], stdout=subprocess.PIPE, @@ -72,7 +92,7 @@ def _inferior_process_plain() -> Iterator[int]: @contextmanager -def _inferior_process_docker(docker_base: str) -> Iterator[str]: +def _target_process_docker(docker_base: str) -> Iterator[str]: python_version = f"{sys.version_info.major}.{sys.version_info.minor}" docker_proc = subprocess.run( [ @@ -84,69 +104,99 @@ def _inferior_process_docker(docker_base: str) -> Iterator[str]: ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, + text=True, + encoding="utf-8", ) if docker_proc.returncode != 0: - print(docker_proc.stdout.decode("utf-8")) - print(docker_proc.stderr.decode("utf-8")) + print(docker_proc.stdout) + print(docker_proc.stderr) assert docker_proc.returncode == 0 - container_id = docker_proc.stdout.decode("utf-8").strip() + container_id = docker_proc.stdout.strip() try: yield container_id finally: - subprocess.run( - ["docker", "kill", container_id], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) + subprocess.check_call(["docker", "kill", container_id]) @contextmanager -def _inferior_process(docker_base: Optional[str]) -> Iterator[Union[int, str]]: - if docker_base is not None: - with _inferior_process_docker(docker_base) as r: - yield r - else: - with _inferior_process_plain() as r: - yield r +def _dumper_on_host_for_host(test_heap_path: str, pid: int) -> None: + cmd = [sys.executable, "../pyheap/dist/pyheap_dump"] + cmd += ["--pid", str(pid)] + cmd += ["--file", test_heap_path] + _run_dumper(cmd, False, test_heap_path) @contextmanager -def _dumper_process( - test_heap_path: str, pid_or_container: Union[int, str], docker: bool -) -> Iterator[subprocess.Popen]: - sudo_required = docker - cmd = [] - if sudo_required: - cmd = ["sudo"] - cmd += [sys.executable, "dist/pyheap_dump"] - - if docker: - cmd += ["--docker-container", str(pid_or_container)] - else: - cmd += ["--pid", str(pid_or_container)] +def _dumper_on_host_for_docker(test_heap_path: str, container_id: str) -> None: + cmd = ["sudo"] + cmd += [sys.executable, "../pyheap/dist/pyheap_dump"] + cmd += ["--docker-container", container_id] cmd += ["--file", test_heap_path] + _run_dumper(cmd, True, test_heap_path) + +@contextmanager +def _dumper_on_docker_for_host(test_heap_path: str, pid: int) -> None: + test_heap_path_dir = Path(test_heap_path).parent + cmd = [ + "docker", + "run", + "--rm", + "--pid=host", + "--cap-add=SYS_PTRACE", + "--volume", + f"{test_heap_path_dir}:/heap-dir", + "ivanyu/pyheap-dumper", + "--pid", + str(pid), + "--file", + "/heap-dir/heap.pyheap", + ] + _run_dumper(cmd, True, test_heap_path) + + +@contextmanager +def _dumper_on_docker_for_docker(test_heap_path: str, container_id: str) -> None: + test_heap_path_dir = Path(test_heap_path).parent + cmd = ["docker", "run", "--rm"] + cmd += [ + f"--pid=container:{container_id}", + "--cap-add=SYS_PTRACE", + "--volume", + f"{test_heap_path_dir}:/heap-dir", + "ivanyu/pyheap-dumper", + "--pid", + "1", + "--file", + "/heap-dir/heap.pyheap", + ] + _run_dumper(cmd, True, test_heap_path) + + +def _run_dumper(cmd: List[str], chown: bool, test_heap_path: str) -> None: + print(cmd) dumper_proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd="../pyheap", + text=True, + encoding="utf-8", ) + try: - yield dumper_proc - finally: + dumper_proc.wait(10) + except subprocess.TimeoutExpired as e: dumper_proc.kill() - out, err = dumper_proc.communicate(timeout=5) - print(out.decode("utf-8")) - print(err.decode("utf-8")) + raise e + + if dumper_proc.returncode != 0: + print(dumper_proc.stdout.read()) + print(dumper_proc.stderr.read()) + assert dumper_proc.returncode == 0 - if sudo_required: - chown_proc = subprocess.run( + if chown: + subprocess.check_call( ["sudo", "chown", f"{os.getuid()}:{os.getgid()}", test_heap_path] ) - if chown_proc.returncode != 0: - print(chown_proc.stdout.decode("utf-8")) - print(chown_proc.stderr.decode("utf-8")) - assert chown_proc.returncode == 0 diff --git a/pyheap/Dockerfile b/pyheap/Dockerfile new file mode 100644 index 0000000..64cdb1d --- /dev/null +++ b/pyheap/Dockerfile @@ -0,0 +1,28 @@ +# +# Copyright 2022 Ivan Yurchenko +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +FROM python:3.10.8-alpine3.16 + +RUN apk add --no-cache gdb + +# Prevent Python 3.10 from being discoverable by GDB. +RUN mkdir /pyheap_python \ + && mv /usr/bin/python3 /pyheap_python \ + && mv /usr/bin/python3.10 /pyheap_python + +ADD /dist/pyheap_dump /pyheap_dump +RUN /pyheap_python/python3 /pyheap_dump >/dev/null 2>&1 || true # unzip (warm up cache) + +ENTRYPOINT ["/pyheap_python/python3", "/pyheap_dump"] diff --git a/pyheap/Makefile b/pyheap/Makefile index 6e84332..235e3e2 100644 --- a/pyheap/Makefile +++ b/pyheap/Makefile @@ -38,6 +38,10 @@ build-src: src/*.py mkdir -p build/src cp $^ build/src +.PHONY: docker-image +docker-image: dist/pyheap_dump + docker build . -f Dockerfile -t ivanyu/pyheap-dumper + clean: rm -rf build rm -rf dist diff --git a/pyheap/src/gdb.py b/pyheap/src/gdb.py index 0101438..f458801 100644 --- a/pyheap/src/gdb.py +++ b/pyheap/src/gdb.py @@ -63,7 +63,7 @@ def bind_gdb_exe(gdb_exe: str, temp_dir: str) -> Iterator[str]: mount.umount(mounted) -def shadow_target_exe_dir_for_gdb(target_pid: int, temp_dir: str) -> None: +def shadow_target_exe_dir_for_gdb(target_pid: int, temp_dir: str, force: bool) -> None: """Shadows the target executable directory for GDB. There may be a situation, where ``self/pid/exe`` points to an executable inside @@ -86,6 +86,8 @@ def shadow_target_exe_dir_for_gdb(target_pid: int, temp_dir: str) -> None: print( f"Target exe link resolves to {target_exe}, which exists in our namespace" ) + elif force: + print("Shadowing is forced") else: return dir_to_shadow = str(Path(target_exe).parent) diff --git a/pyheap/src/pyheap_dump.py b/pyheap/src/pyheap_dump.py index 79e592c..8a6ed8c 100644 --- a/pyheap/src/pyheap_dump.py +++ b/pyheap/src/pyheap_dump.py @@ -89,7 +89,7 @@ def dump_heap(args: argparse.Namespace) -> int: injector_code = _load_code("injector.py") dumper_code = _prepare_dumper_code() - if nsenter_needed: + if nsenter_needed or args.force_shadow: nsenter_to_pid_ns_with_fork(target_pid) unshare_and_mount_proc() @@ -97,13 +97,13 @@ def dump_heap(args: argparse.Namespace) -> int: dumper_temp_dir = stack.enter_context(TemporaryDirectory(prefix="pyheap-")) gdb_exe = os.path.realpath(shutil.which("gdb")) - if nsenter_needed: + if nsenter_needed or args.force_shadow: gdb_exe = stack.enter_context( cast(ContextManager[str], bind_gdb_exe(gdb_exe, dumper_temp_dir)) ) - - if nsenter_needed: - shadow_target_exe_dir_for_gdb(target_pid_in_ns, dumper_temp_dir) + shadow_target_exe_dir_for_gdb( + target_pid_in_ns, dumper_temp_dir, force=args.force_shadow is True + ) target_temp_dir = stack.enter_context( closing(TargetTemporaryDirectory(target_pid_in_ns)) @@ -316,6 +316,13 @@ def main() -> None: help="ignore various compatibility checks for the target process", ) + parser.add_argument( + "--force-shadow", + action="store_true", + default=False, + help="force shadowing of the Python executable directory (e.g. /usr/bin); nsenter + unshare will also be forced", + ) + parser.set_defaults(func=dump_heap) args = parser.parse_args()