From f68ef1cd760cad19e1f3c291a87c88f1ed07ba9d Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 15:22:41 -0400 Subject: [PATCH 01/19] Moved all Python code into the `polytracker` package --- CMakeLists.txt | 3 --- Dockerfile | 15 +++++---------- polybuild/polybuild++.py | 1 - polyprocess/__init__.py | 1 - polytracker/__init__.py | 0 polytracker/polybuild/__init__.py | 0 .../polybuild/__main__.py | 0 polytracker/polyprocess/__init__.py | 1 + .../polyprocess}/__main__.py | 0 polytracker/polyprocess/mimid | 1 + .../polyprocess}/polyprocess.py | 0 setup.py | 11 ++++++++--- tests/test_polyprocess.py | 2 +- tests/test_polytracker.py | 2 +- 14 files changed, 17 insertions(+), 20 deletions(-) delete mode 120000 polybuild/polybuild++.py delete mode 100644 polyprocess/__init__.py create mode 100644 polytracker/__init__.py create mode 100644 polytracker/polybuild/__init__.py rename polybuild/polybuild.py => polytracker/polybuild/__main__.py (100%) create mode 100644 polytracker/polyprocess/__init__.py rename {polyprocess => polytracker/polyprocess}/__main__.py (100%) create mode 120000 polytracker/polyprocess/mimid rename {polyprocess => polytracker/polyprocess}/polyprocess.py (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 77cdd011..4dbe6ce5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,3 @@ set(POLYTRACK_PASS_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/pass") set(POLYTRACK_CXX_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/") add_subdirectory(polytracker) -install(DIRECTORY "./polybuild" DESTINATION ${POLYTRACK_CXX_DIR}) - - diff --git a/Dockerfile b/Dockerfile index c335a5af..4a99f6a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -y update \ libgraphviz-dev \ graphviz -RUN python3.7 -m pip install pip +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.7 10 +RUN python3 -m pip install pip RUN go get github.com/SRI-CSL/gllvm/cmd/... @@ -36,12 +37,7 @@ COPY . /polytracker WORKDIR /polytracker -RUN python3.7 -m pip install pytest - -RUN python3.7 -m pip install . - -RUN rm /usr/bin/python3 -RUN cp /usr/bin/python3.7 /usr/bin/python3 +RUN pip3 install pytest . RUN rm -rf build && mkdir -p build @@ -50,10 +46,9 @@ WORKDIR /polytracker/build ENV PATH="/usr/lib/llvm-7/bin:${PATH}" RUN cmake -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_VERBOSE_MAKEFILE=TRUE .. && ninja install -ENV CC=/polytracker/build/bin/polytracker/polybuild/polybuild.py -ENV CXX=/polytracker/build/bin/polytracker/polybuild/polybuild++.py +ENV CC=polybuild +ENV CXX=polybuild++ ENV LLVM_COMPILER=clang -RUN chmod +x ${CC} RUN mkdir -p "/build_artifacts" # Set the BC store path to the /cxx_libs/bitcode/bitcode_store} diff --git a/polybuild/polybuild++.py b/polybuild/polybuild++.py deleted file mode 120000 index 6f30471b..00000000 --- a/polybuild/polybuild++.py +++ /dev/null @@ -1 +0,0 @@ -polybuild.py \ No newline at end of file diff --git a/polyprocess/__init__.py b/polyprocess/__init__.py deleted file mode 100644 index 38ab3e4f..00000000 --- a/polyprocess/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from polyprocess.polyprocess import PolyProcess diff --git a/polytracker/__init__.py b/polytracker/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/polytracker/polybuild/__init__.py b/polytracker/polybuild/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/polybuild/polybuild.py b/polytracker/polybuild/__main__.py similarity index 100% rename from polybuild/polybuild.py rename to polytracker/polybuild/__main__.py diff --git a/polytracker/polyprocess/__init__.py b/polytracker/polyprocess/__init__.py new file mode 100644 index 00000000..f09f967b --- /dev/null +++ b/polytracker/polyprocess/__init__.py @@ -0,0 +1 @@ +from .polyprocess import * diff --git a/polyprocess/__main__.py b/polytracker/polyprocess/__main__.py similarity index 100% rename from polyprocess/__main__.py rename to polytracker/polyprocess/__main__.py diff --git a/polytracker/polyprocess/mimid b/polytracker/polyprocess/mimid new file mode 120000 index 00000000..500b4302 --- /dev/null +++ b/polytracker/polyprocess/mimid @@ -0,0 +1 @@ +../third_party/mimid/mimid \ No newline at end of file diff --git a/polyprocess/polyprocess.py b/polytracker/polyprocess/polyprocess.py similarity index 100% rename from polyprocess/polyprocess.py rename to polytracker/polyprocess/polyprocess.py diff --git a/setup.py b/setup.py index 80b16ef0..dd5d8128 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ from setuptools import setup, find_packages setup( - name='polyprocess', - description='A library and utility for processing and analyzing PolyTracker output', + name='polytracker', + description='API and Library for operating and interacting with PolyTracker', url='https://github.com/trailofbits/polytracker', author='Trail of Bits', version="0.1.0", @@ -16,9 +16,14 @@ 'pydot', 'typing_extensions' ], + extras_require={ + "dev": ["black", "mypy", "pytest"] + }, entry_points={ 'console_scripts': [ - 'polyprocess = polyprocess.__main__:main' + 'polyprocess = polytracker.polyprocess.__main__:main', + 'polybuild = polytracker.polybuild.__main__:main', + 'polybuild++ = polytracker.polybuild.__main__:main' ] }, classifiers=[ diff --git a/tests/test_polyprocess.py b/tests/test_polyprocess.py index d1d92b0b..ba8af655 100644 --- a/tests/test_polyprocess.py +++ b/tests/test_polyprocess.py @@ -1,4 +1,4 @@ -from polyprocess import PolyProcess +from polytracker.polyprocess import PolyProcess import pytest import logging from typing import List diff --git a/tests/test_polytracker.py b/tests/test_polytracker.py index 7c4f86c8..19cdd14e 100644 --- a/tests/test_polytracker.py +++ b/tests/test_polytracker.py @@ -1,6 +1,6 @@ import pytest import os -from polyprocess import PolyProcess +from polytracker.polyprocess import PolyProcess import subprocess TEST_DIR = os.path.realpath(os.path.dirname(__file__)) From 2c0b52102f6f69e79de53c0d6af6d703628a5a7e Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 15:25:22 -0400 Subject: [PATCH 02/19] Publish to PyPI on release --- .github/workflows/pythonpublish.yml | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/pythonpublish.yml diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml new file mode 100644 index 00000000..b8b4fadd --- /dev/null +++ b/.github/workflows/pythonpublish.yml @@ -0,0 +1,31 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [published] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* From cdb2ef7b385a4ed66e4f36bc1ac93649deb22960 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 15:47:02 -0400 Subject: [PATCH 03/19] Fix Python linting --- .github/workflows/dockerimage.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dockerimage.yml b/.github/workflows/dockerimage.yml index 67444856..d60e12b2 100644 --- a/.github/workflows/dockerimage.yml +++ b/.github/workflows/dockerimage.yml @@ -37,9 +37,9 @@ jobs: --exclude 'polytracker/src/dfsan_rt/interception/*' - name: Python lint/typecheck run: | - black --check polybuild polyprocess tests --line-length=127 - mypy --ignore-missing-imports polybuild polyprocess tests - - name: Build the base image + black --check polytracker tests --exclude '/(polytracker/src|polytracker/scripts)/' --line-length=127 + mypy --ignore-missing-imports polytracker tests + - name: Build the base image run: docker build . --file Dockerfile --tag trailofbits/polytracker --no-cache - name: Poly* tests run: | From 358340954e4a625f491f75fc4b55c301357483a4 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 17:41:58 -0400 Subject: [PATCH 04/19] Cleaning up the CMake install. polybuild is likely broken now though --- CMakeLists.txt | 16 +++---- Dockerfile | 1 + format.sh | 4 +- polytracker/CMakeLists.txt | 26 ++++++++---- .../__main__.py => scripts/polybuild.py} | 42 +++++++++---------- setup.py | 4 +- 6 files changed, 52 insertions(+), 41 deletions(-) rename polytracker/{polybuild/__main__.py => scripts/polybuild.py} (91%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dbe6ce5..1b25d147 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,15 +5,15 @@ project(TAPP) # If there is no explicit -DCMAKE_INSTALL_PREFIX=DIR setting given, # then install underneath the build directory if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - set (CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/bin CACHE PATH "default install path" FORCE) + set (CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR} CACHE PATH "default install path" FORCE) endif() -set(POLYTRACK_BIN_DIR "${CMAKE_INSTALL_PREFIX}/polytracker") -set(POLYTRACK_LIB_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/lib") -set(POLYTRACK_TRACK_LIB_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/track") -set(POLYTRACK_RULE_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/abi_lists") -set(POLYTRACK_TESTS_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/tests") -set(POLYTRACK_PASS_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/pass") -set(POLYTRACK_CXX_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/") +set(POLYTRACK_BIN_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/bin") +set(POLYTRACK_LIB_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/lib") +set(POLYTRACK_TRACK_LIB_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/track") +set(POLYTRACK_RULE_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/abi_lists") +set(POLYTRACK_TESTS_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/tests") +set(POLYTRACK_PASS_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/pass") +set(POLYTRACK_CXX_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker") add_subdirectory(polytracker) diff --git a/Dockerfile b/Dockerfile index 4a99f6a0..b3a1e563 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,6 +46,7 @@ WORKDIR /polytracker/build ENV PATH="/usr/lib/llvm-7/bin:${PATH}" RUN cmake -G Ninja -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_VERBOSE_MAKEFILE=TRUE .. && ninja install +ENV PATH="/polytracker/build/bin/:${PATH}" ENV CC=polybuild ENV CXX=polybuild++ ENV LLVM_COMPILER=clang diff --git a/format.sh b/format.sh index 08a02709..7c12c15e 100755 --- a/format.sh +++ b/format.sh @@ -10,5 +10,5 @@ clang-format -i polytracker/include/polyclang/*.h clang-format -i polytracker/include/dfsan/*.h # Black to auto format code, mypy for type checking -black polybuild polyprocess tests --line-length=127 -mypy --ignore-missing-imports polybuild polyprocess tests +black polytracker tests --exclude '/(polytracker/src|polytracker/scripts)/' --line-length=127 +mypy --ignore-missing-imports polytracker tests diff --git a/polytracker/CMakeLists.txt b/polytracker/CMakeLists.txt index e7bebc65..13c9e4df 100644 --- a/polytracker/CMakeLists.txt +++ b/polytracker/CMakeLists.txt @@ -2,15 +2,27 @@ project(polytracker LANGUAGES C CXX ASM) include_directories(include) -set(POLYTRACK_BIN_DIR "${CMAKE_INSTALL_PREFIX}/polytracker") -set(POLYTRACK_LIB_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/lib") -set(POLYTRACK_TRACK_LIB_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/track") -set(POLYTRACK_RULE_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/abi_lists") -set(POLYTRACK_TESTS_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/tests") -set(POLYTRACK_PASS_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/pass") -set(POLYTRACK_CXX_DIR "${CMAKE_INSTALL_PREFIX}/polytracker/") +set(POLYTRACK_BIN_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/bin") +set(POLYTRACK_LIB_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/lib") +set(POLYTRACK_TRACK_LIB_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/track") +set(POLYTRACK_RULE_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/abi_lists") +set(POLYTRACK_TESTS_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/tests") +set(POLYTRACK_PASS_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/pass") +set(POLYTRACK_CXX_DIR "${CMAKE_INSTALL_PREFIX}/share/polytracker/") add_subdirectory(src) #add_subdirectory(custom_abi) install(DIRECTORY "./cxx_libs" DESTINATION ${POLYTRACK_CXX_DIR}) install(DIRECTORY "./abi_lists" DESTINATION ${POLYTRACK_CXX_DIR}) + +install(PROGRAMS "scripts/polybuild.py" DESTINATION ${POLYTRACK_BIN_DIR} RENAME "polybuild") +install(PROGRAMS "scripts/polybuild.py" DESTINATION ${POLYTRACK_BIN_DIR} RENAME "polybuild++") + +macro(install_symlink filepath sympath) + install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${filepath} ${sympath})") + install(CODE "message(\"-- Created symlink: ${sympath} -> ${filepath}\")") +endmacro(install_symlink) + +install(DIRECTORY DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") +install_symlink(../share/polytracker/bin/polybuild ${CMAKE_INSTALL_PREFIX}/bin/polybuild) +install_symlink(../share/polytracker/bin/polybuild ${CMAKE_INSTALL_PREFIX}/bin/polybuild++) diff --git a/polytracker/polybuild/__main__.py b/polytracker/scripts/polybuild.py similarity index 91% rename from polytracker/polybuild/__main__.py rename to polytracker/scripts/polybuild.py index ec73d852..016fe78a 100644 --- a/polytracker/polybuild/__main__.py +++ b/polytracker/scripts/polybuild.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.7 +#!/usr/bin/env python3 """ This code is inspired by Angora's angora-clang @@ -29,6 +29,15 @@ from dataclasses import dataclass +SCRIPT_DIR: str = os.path.dirname(os.path.realpath(__file__)) +COMPILER_DIR: str = os.path.realpath(os.path.join(SCRIPT_DIR, "..")) + + +if not os.path.isdir(COMPILER_DIR): + sys.stderr.write(f"Error: did not find polytracker directory at {COMPILER_DIR}\n\n") + sys.exit(1) + + @dataclass class CompilerMeta: is_cxx: bool @@ -37,7 +46,7 @@ class CompilerMeta: class PolyBuilder: def __init__(self, is_cxx): - self.meta = CompilerMeta(is_cxx, self.poly_find_dir(os.path.realpath(__file__)) + "/") + self.meta = CompilerMeta(is_cxx, COMPILER_DIR) def poly_check_cxx(self, compiler: str) -> bool: """ @@ -47,16 +56,6 @@ def poly_check_cxx(self, compiler: str) -> bool: return True return False - def poly_find_dir(self, compiler_path: str) -> str: - """ - Discover compiler install directory - Checks to see if the path is local directory, if not gives the entire path - """ - last_slash: int = compiler_path.rfind("/") - if last_slash == -1: - return "." - return compiler_path[0:last_slash] - def poly_is_linking(self, argv) -> bool: nonlinking_options = ["-E", "-fsyntax-only", "-S", "-c"] for option in argv: @@ -68,7 +67,7 @@ def poly_add_inst_lists(self, directory: str) -> Optional[List[str]]: """ Adds a directory of lists to the instrumentation """ - dir_path = self.meta.compiler_dir + "../abi_lists/" + directory + "/" + dir_path = os.path.join(self.meta.compiler_dir, "abi_lists", directory) file_list = [] if not os.path.exists(dir_path): print(f"Error! {dir_path} not found!") @@ -76,7 +75,7 @@ def poly_add_inst_lists(self, directory: str) -> Optional[List[str]]: dir_ents = os.listdir(dir_path) for file in dir_ents: if file != "." and file != "..": - file_list.append(dir_path + file) + file_list.append(os.path.join(dir_path, file)) return file_list def poly_compile(self, bitcode_path: str, output_path: str, libs: List[str]) -> bool: @@ -84,8 +83,8 @@ def poly_compile(self, bitcode_path: str, output_path: str, libs: List[str]) -> This function builds the compile command to instrument the whole program bitcode """ compile_command = [] - source_dir = self.meta.compiler_dir + "../lib/libTaintSources.a" - rt_dir = self.meta.compiler_dir + "../lib/libdfsan_rt-x86_64.a" + source_dir = os.path.join(self.meta.compiler_dir, "lib", "libTaintSources.a") + rt_dir = os.path.join(self.meta.compiler_dir, "lib", "libdfsan_rt-x86_64.a") if self.meta.is_cxx: compile_command.append("clang++") else: @@ -112,7 +111,8 @@ def poly_compile(self, bitcode_path: str, output_path: str, libs: List[str]) -> return True def poly_opt(self, input_file: str, bitcode_file: str) -> bool: - opt_command = ["opt", "-O0", "-load", self.meta.compiler_dir + "../pass/libDataFlowSanitizerPass.so"] + opt_command = ["opt", "-O0", "-load", + os.path.join(self.meta.compiler_dir, "pass", "libDataFlowSanitizerPass.so")] ignore_list_files: Optional[List[str]] = self.poly_add_inst_lists("ignore_lists") if ignore_list_files is None: print("Error! Failed to add ignore lists") @@ -157,8 +157,8 @@ def poly_build(self, argv) -> bool: if self.meta.is_cxx: compile_command.append("-stdlib=libc++") compile_command.append("-nostdinc++") - compile_command.append("-I" + self.meta.compiler_dir + "/../cxx_libs/include/c++/v1/") - compile_command.append("-L" + self.meta.compiler_dir + "/../cxx_libs/lib/") + compile_command.append("-I" + os.path.join(self.meta.compiler_dir, "cxx_libs", "include", "c++", "v1")) + compile_command.append("-L" + os.path.join(self.meta.compiler_dir, "cxx_libs", "lib")) for arg in argv[1:]: if arg == "-Wall" or arg == "-Wextra" or arg == "-Wno-unused-parameter" or arg == "-Werror": continue @@ -241,7 +241,7 @@ def main(): ) # TODO add verbosity flag poly_build = PolyBuilder("++" in sys.argv[0]) - if sys.argv[1] == "--instrument-bitcode": + if len(sys.argv) > 1 and sys.argv[1] == "--instrument-bitcode": args = parser.parse_args(sys.argv[1:]) if not os.path.exists(args.input_file): print("Error! Input file could not be found!") @@ -257,7 +257,7 @@ def main(): sys.exit(1) # do Build and opt/Compile for simple C/C++ program with no libs, just ease of use - elif sys.argv[1] == "--instrument-target": + elif len(sys.argv) > 1 and sys.argv[1] == "--instrument-target": # Find the output file output_file = "" for i, arg in enumerate(sys.argv): diff --git a/setup.py b/setup.py index dd5d8128..f8e0dc50 100644 --- a/setup.py +++ b/setup.py @@ -21,9 +21,7 @@ }, entry_points={ 'console_scripts': [ - 'polyprocess = polytracker.polyprocess.__main__:main', - 'polybuild = polytracker.polybuild.__main__:main', - 'polybuild++ = polytracker.polybuild.__main__:main' + 'polyprocess = polytracker.polyprocess.__main__:main' ] }, classifiers=[ From bb9857f23fc3d8a2d51c39a94ddcb231899ac776 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 21:35:32 -0400 Subject: [PATCH 05/19] Update `POLYROOT`, even though I am not sure if it is used anymore --- polytracker/scripts/gllvm_build_libcxx.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/polytracker/scripts/gllvm_build_libcxx.sh b/polytracker/scripts/gllvm_build_libcxx.sh index ebde9dbc..f87f2611 100755 --- a/polytracker/scripts/gllvm_build_libcxx.sh +++ b/polytracker/scripts/gllvm_build_libcxx.sh @@ -1,7 +1,12 @@ +#!/usr/bin/env bash + mkdir build; cd build; -POLYROOT="/polytracker/build/bin/polytracker/" +SCRIPT=$(realpath "$0") +SCRIPTPATH=$(dirname "$SCRIPT") + +POLYROOT="$SCRIPTPATH/../../build/share/polytracker/" export CC="gclang -Xclang -disable-O0-optnone" echo $CC From 6c3d60099bf7237cd012f044ae2b76f37cd3e64e Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 21:37:36 -0400 Subject: [PATCH 06/19] Update `WLLVM_BC_STORE` to the new location --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index b3a1e563..d7ea717f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,6 +53,6 @@ ENV LLVM_COMPILER=clang RUN mkdir -p "/build_artifacts" # Set the BC store path to the /cxx_libs/bitcode/bitcode_store} -ENV WLLVM_BC_STORE="/polytracker/build/bin/polytracker/cxx_libs/bitcode/bitcode_store" +ENV WLLVM_BC_STORE="/polytracker/build/share/polytracker/cxx_libs/bitcode/bitcode_store" ENV WLLVM_ARTIFACT_STORE="/build_artifacts" WORKDIR /polytracker From 929c0f8c5484e553cd5f1f3ab742e4d5902e8a87 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 21:55:18 -0400 Subject: [PATCH 07/19] Python packaging and ported the CFG module from polymerge --- polytracker/__init__.py | 1 + polytracker/cfg.py | 120 +++++++++++++++++++++ polytracker/src/polytracker.py | 189 +++++++++++++++++++++++++++++++++ setup.py | 5 +- 4 files changed, 313 insertions(+), 2 deletions(-) create mode 100644 polytracker/cfg.py create mode 100644 polytracker/src/polytracker.py diff --git a/polytracker/__init__.py b/polytracker/__init__.py index e69de29b..f5e1a75a 100644 --- a/polytracker/__init__.py +++ b/polytracker/__init__.py @@ -0,0 +1 @@ +from .polytracker import * diff --git a/polytracker/cfg.py b/polytracker/cfg.py new file mode 100644 index 00000000..2b9c2cc2 --- /dev/null +++ b/polytracker/cfg.py @@ -0,0 +1,120 @@ +import math + +from typing import Any, Callable, Optional + +import graphviz +import networkx as nx + + +def roots(graph): + return (n for n, d in graph.in_degree() if d == 0) + + +class DiGraph(nx.DiGraph): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._dominator_forest: Optional[DiGraph] = None + self._roots = None + self._path_lengths = None + + def path_length(self, from_node, to_node): + if self._path_lengths is None: + self._path_lengths = dict(nx.all_pairs_shortest_path_length(self, cutoff=None)) + if from_node not in self._path_lengths or to_node not in self._path_lengths[from_node]: + return math.inf + else: + return self._path_lengths[from_node][to_node] + + def set_roots(self, roots): + self._roots = roots + + @property + def roots(self): + if self._roots is None: + self._roots = tuple(roots(self)) + return self._roots + + def depth(self, node): + return min(self.path_length(root, node) for root in self.roots) + + def ancestors(self, node) -> set: + return nx.ancestors(self, node) + + def descendants(self, node) -> frozenset: + return frozenset(nx.dfs_successors(self, node).keys()) + + @property + def dominator_forest(self): + if self._dominator_forest is not None: + return self._dominator_forest + self._dominator_forest = DAG() + for root in self.roots: + for node, dominated_by in nx.immediate_dominators(self, root).items(): + if node != dominated_by: + self._dominator_forest.add_edge(dominated_by, node) + return self._dominator_forest + + def to_dot(self, comment: str = None, labeler=Callable[[Any], str], node_filter=None) -> graphviz.Digraph: + if comment is not None: + dot = graphviz.Digraph(comment=comment) + else: + dot = graphviz.Digraph() + node_ids = {node: i for i, node in enumerate(self.nodes)} + for node in self.nodes: + if node_filter is None or node_filter(node): + dot.node(f'func{node_ids[node]}', label=labeler(node)) + for caller, callee in self.edges: + if node_filter is None or (node_filter(caller) and node_filter(callee)): + dot.edge(f'func{node_ids[caller]}', f'func{node_ids[callee]}') + return dot + + +class DAG(DiGraph): + def vertex_induced_subgraph(self, vertices): + vertices = frozenset(vertices) + subgraph = self.copy() + to_remove = set(self.nodes) - vertices + for v in vertices: + node = v + parent = None + while True: + parents = tuple(subgraph.predecessors(node)) + if not parents: + if parent is not None: + subgraph.remove_edge(parent, v) + subgraph.add_edge(node, v) + break + assert len(parents) == 1 + ancestor = parents[0] + if parent is None: + parent = ancestor + if ancestor in vertices: + to_remove.add(v) + break + node = ancestor + subgraph.remove_nodes_from(to_remove) + return subgraph + + +class CFG(DiGraph): + def __init__(self): + super().__init__() + + def to_dot(self, + comment='PolyTracker Program Trace', + merged_json_obj=None, + only_labeled_functions=False, + labeler=None, + **kwargs + ) -> graphviz.Digraph: + function_labels = {} + + def func_labeler(f): + if labeler is not None: + return labeler(f) + elif f.name in function_labels: + return f"{f.name} ({function_labels[f.name]})" + else: + return f.name + + return super().to_dot(comment, labeler=func_labeler, **kwargs) diff --git a/polytracker/src/polytracker.py b/polytracker/src/polytracker.py new file mode 100644 index 00000000..8dd1f723 --- /dev/null +++ b/polytracker/src/polytracker.py @@ -0,0 +1,189 @@ +from typing import Dict, Iterable, List, Set, Tuple + +from polyfile import logger + +from .cfg import CFG + +log = logger.getStatusLogger('PolyTracker') + + +class FunctionInfo: + def __init__(self, name: str, cmp_bytes: Dict[str, List[int]], input_bytes: Dict[str, List[int]] = None, called_from: Iterable[str] = ()): + self.name = name + self.called_from = frozenset(called_from) + self.cmp_bytes = cmp_bytes + if input_bytes is None: + self.input_bytes = cmp_bytes + else: + self.input_bytes = input_bytes + + @property + def taint_sources(self) -> Set[str]: + return self.input_bytes.keys() + + def __getitem__(self, input_source_name): + return self.input_bytes[input_source_name] + + def __iter__(self): + return self.taint_sources + + def items(self): + return self.input_bytes.items() + + def __hash__(self): + return hash(self.name) + + def __str__(self): + return self.name + + def __repr__(self): + return f"{self.__class__.__name__}(name={self.name!r}, cmp_bytes={self.cmp_bytes!r}, input_bytes={self.input_bytes!r}, called_from={self.called_from!r})" + + +class ProgramTrace: + def __init__(self, polytracker_version: tuple, function_data: Iterable[FunctionInfo]): + self.polytracker_version = polytracker_version + self.functions: Dict[str, FunctionInfo] = {f.name: f for f in function_data} + self._cfg = None + + @property + def cfg(self) -> CFG: + if self._cfg is not None: + return self._cfg + self._cfg = CFG(self) + self._cfg.add_nodes_from(self.functions.values()) + for f in list(self.functions.values()): + for caller in f.called_from: + if caller not in self.functions: + info = FunctionInfo(caller, {}) + self.functions[caller] = info + self._cfg.add_node(info) + self._cfg.add_edge(info, f) + else: + self._cfg.add_edge(self.functions[caller], f) + return self._cfg + + def __repr__(self): + return f"{self.__class__.__name__}(polytracker_version={self.polytracker_version!r}, function_data={list(self.functions.values())!r})" + + +POLYTRACKER_JSON_FORMATS = [] + + +def normalize_version(*version) -> Tuple[str]: + version = tuple(str(v) for v in version) + version = tuple(version) + ('0',) * (3 - len(version)) + version = tuple(version) + ('',) * (4 - len(version)) + return version + + +def polytracker_version(*version): + def wrapper(func): + POLYTRACKER_JSON_FORMATS.append((normalize_version(*version), func)) + POLYTRACKER_JSON_FORMATS.sort(reverse=True) + return func + return wrapper + + +def parse(polytracker_json_obj: dict) -> ProgramTrace: + if 'version' in polytracker_json_obj: + version = normalize_version(*polytracker_json_obj['version'].split('.')) + if len(version) > 4: + log.warn(f"Unexpectedly long PolyTracker version: {polytracker_json_obj['version']!r}") + for i, (known_version, parser) in enumerate(POLYTRACKER_JSON_FORMATS): + # POLYTRACKER_JSON_FORMATS is auto-sorted in decreasing order + if version >= known_version: + if i == 0 and version > known_version: + log.warn(f"PolyTracker version {polytracker_json_obj['version']!r} is newer than the latest supported by PolyMerge ({'.'.join(known_version)})") + return parser(polytracker_json_obj) + raise ValueError(f"Unsupported PolyTracker version {polytracker_json_obj['version']!r}") + for function_name, function_data in polytracker_json_obj.items(): + if isinstance(function_data, dict) and 'called_from' in function_data: + # this is the second version of the output format + return parse_format_v2(polytracker_json_obj) + else: + return parse_format_v1(polytracker_json_obj) + + +@polytracker_version(0, 0, 1, '') +def parse_format_v1(polytracker_json_obj: dict) -> ProgramTrace: + return ProgramTrace( + polytracker_version=(0, 0, 1), + function_data=[FunctionInfo( + function_name, + {None: taint_bytes} + ) for function_name, taint_bytes in polytracker_json_obj.items() + ] + ) + + +@polytracker_version(0, 0, 1, 'alpha2.1') +def parse_format_v2(polytracker_json_obj: dict) -> ProgramTrace: + function_data = [] + for function_name, data in polytracker_json_obj.items(): + if 'input_bytes' not in data: + if 'cmp_bytes' in data: + input_bytes = data['cmp_bytes'] + else: + input_bytes = {} + else: + input_bytes = data['input_bytes'] + if 'cmp_bytes' in data: + cmp_bytes = data['cmp_bytes'] + else: + cmp_bytes = input_bytes + if 'called_from' in data: + called_from = data['called_from'] + else: + called_from = () + function_data.append(FunctionInfo( + name=function_name, + cmp_bytes=cmp_bytes, + input_bytes=input_bytes, + called_from=called_from + )) + return ProgramTrace( + polytracker_version=(0, 0, 1, 'alpha2.1'), + function_data=function_data + ) + + +@polytracker_version(1, 0, 1) +def parse_format_v3(polytracker_json_obj: dict) -> ProgramTrace: + version = polytracker_json_obj['version'].split('.') + function_data = [] + tainted_functions = set() + for function_name, data in polytracker_json_obj['tainted_functions'].items(): + if 'input_bytes' not in data: + if 'cmp_bytes' in data: + input_bytes = data['cmp_bytes'] + else: + input_bytes = {} + else: + input_bytes = data['input_bytes'] + if 'cmp_bytes' in data: + cmp_bytes = data['cmp_bytes'] + else: + cmp_bytes = input_bytes + if function_name in polytracker_json_obj['runtime_cfg']: + called_from = frozenset(polytracker_json_obj['runtime_cfg'][function_name]) + else: + called_from = frozenset() + function_data.append(FunctionInfo( + name=function_name, + cmp_bytes=cmp_bytes, + input_bytes=input_bytes, + called_from=called_from + )) + tainted_functions.add(function_name) + # Add any additional functions from the CFG that didn't operate on tainted bytes + for function_name in polytracker_json_obj['runtime_cfg'].keys() - tainted_functions: + function_data.append(FunctionInfo( + name=function_name, + cmp_bytes={}, + called_from=polytracker_json_obj['runtime_cfg'][function_name] + )) + return ProgramTrace( + polytracker_version=version, + function_data=function_data + ) diff --git a/setup.py b/setup.py index f8e0dc50..a0fbf1eb 100644 --- a/setup.py +++ b/setup.py @@ -9,11 +9,12 @@ packages=find_packages(), python_requires='>=3.7', install_requires=[ + 'graphviz', 'matplotlib', - 'networkX', - 'tqdm', + 'networkx', 'pygraphviz', 'pydot', + 'tqdm', 'typing_extensions' ], extras_require={ From 49003a4bcb09afee478f760a3b65dd80e5130bca Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 21:59:14 -0400 Subject: [PATCH 08/19] Had the polymerge polytracker module in the wrong place --- polytracker/{src => }/polytracker.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename polytracker/{src => }/polytracker.py (100%) diff --git a/polytracker/src/polytracker.py b/polytracker/polytracker.py similarity index 100% rename from polytracker/src/polytracker.py rename to polytracker/polytracker.py From c499f99b0b88ee6724250c9f1b795e9fd6406c59 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 22:01:12 -0400 Subject: [PATCH 09/19] Move `polyclang/polytracker.h` -> `polytracker/polytracker.h` --- polytracker/include/dfsan/dfsan_log_mgmt.h | 2 +- polytracker/include/{polyclang => polytracker}/polytracker.h | 0 polytracker/src/CMakeLists.txt | 1 - polytracker/src/dfsan_rt/dfsan/dfsan.cpp | 2 +- 4 files changed, 2 insertions(+), 3 deletions(-) rename polytracker/include/{polyclang => polytracker}/polytracker.h (100%) diff --git a/polytracker/include/dfsan/dfsan_log_mgmt.h b/polytracker/include/dfsan/dfsan_log_mgmt.h index bcd3f0c9..989bfbf8 100644 --- a/polytracker/include/dfsan/dfsan_log_mgmt.h +++ b/polytracker/include/dfsan/dfsan_log_mgmt.h @@ -3,7 +3,7 @@ #include "dfsan/dfsan.h" #include "json.hpp" -#include "polyclang/polytracker.h" +#include "polytracker/polytracker.h" #include #include #include diff --git a/polytracker/include/polyclang/polytracker.h b/polytracker/include/polytracker/polytracker.h similarity index 100% rename from polytracker/include/polyclang/polytracker.h rename to polytracker/include/polytracker/polytracker.h diff --git a/polytracker/src/CMakeLists.txt b/polytracker/src/CMakeLists.txt index f669e67d..a2a63c36 100644 --- a/polytracker/src/CMakeLists.txt +++ b/polytracker/src/CMakeLists.txt @@ -1,4 +1,3 @@ -#add_subdirectory(polyclang) add_subdirectory(dfsan_rt) add_subdirectory(dfsan_pass) add_subdirectory(dfsan_sources) diff --git a/polytracker/src/dfsan_rt/dfsan/dfsan.cpp b/polytracker/src/dfsan_rt/dfsan/dfsan.cpp index 523b6189..7400e03b 100644 --- a/polytracker/src/dfsan_rt/dfsan/dfsan.cpp +++ b/polytracker/src/dfsan_rt/dfsan/dfsan.cpp @@ -21,7 +21,7 @@ #include "dfsan/dfsan.h" #include "dfsan/dfsan_log_mgmt.h" -#include "polyclang/polytracker.h" +#include "polytracker/polytracker.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_file.h" From 4517ae51d560f978e2c73fde394ae8bd6a55e080 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 22:29:28 -0400 Subject: [PATCH 10/19] Automatically update the Python package version from the C++ header --- polytracker/include/polytracker/polytracker.h | 5 +- setup.py | 53 ++++++++++++++++++- 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/polytracker/include/polytracker/polytracker.h b/polytracker/include/polytracker/polytracker.h index b4acb0a5..1e7c7989 100644 --- a/polytracker/include/polytracker/polytracker.h +++ b/polytracker/include/polytracker/polytracker.h @@ -5,8 +5,9 @@ #define POLYTRACKER_VERSION_MINOR 0 #define POLYTRACKER_VERSION_REVISION 0 -// Set the version note to an empty string if there is no note -#define POLYTRACKER_SUFFIX "alpha2.2" +// If there is a suffix, it should always start with a hypen, like "-alpha2.2". +// If there is no suffix, set POLYTRACKER_VERSION_SUFFIX to an empty string. +#define POLYTRACKER_VERSION_SUFFIX "-alpha2.2" /**********************************************************************************/ diff --git a/setup.py b/setup.py index a0fbf1eb..b19b1500 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,62 @@ +import os +import re +import sys from setuptools import setup, find_packages +from typing import Optional, Tuple + +SETUP_DIR = os.path.dirname(os.path.realpath(__file__)) +POLYTRACKER_HEADER = os.path.join(SETUP_DIR, 'polytracker', 'include', 'polytracker', 'polytracker.h') + +if not os.path.exists(POLYTRACKER_HEADER): + sys.stderr.write(f"Error loading polytracker.h!\nIt was expected to be here:\n{POLYTRACKER_HEADER}\n\n") + exit(1) + + +def polytracker_version() -> Tuple[int, int, int, Optional[str]]: + version_parts = {} + with open(POLYTRACKER_HEADER, 'r') as f: + for i, line in enumerate(f): + m = re.match(r"\s*#define\s+POLYTRACKER_VERSION_([A-Za-z_0-9]+)\s+([^\s]+)\s*$", line) + if m: + if m[1] not in ('MAJOR', 'MINOR', 'REVISION', 'SUFFIX'): + sys.stderr.write(f"Warning: Ignoring unexpected #define for \"POLYTRACKER_VERSION_{m[1]}\" on line " + f"{i + 1} of {POLYTRACKER_HEADER}\n") + else: + version_parts[m[1]] = m[2] + for required_part in ('MAJOR', 'MINOR', 'REVISION'): + if required_part not in version_parts: + sys.stderr.write( + f"Error: #define POLYTRACKER_VERSION_{required_part} not found in {POLYTRACKER_HEADER}\n\n") + sys.exit(1) + try: + version_parts[required_part] = int(version_parts[required_part]) + except ValueError: + sys.stderr.write( + f"Error: POLYTRACKER_VERSION_{required_part} in {POLYTRACKER_HEADER} is not an integer!\n\n") + sys.exit(1) + suffix = version_parts.get('SUFFIX', None) + if suffix is not None: + suffix = suffix.strip() + if suffix.startswith('"') and suffix.endswith('"'): + suffix = suffix[1:-1] + return version_parts['MAJOR'], version_parts['MINOR'], version_parts['REVISION'], suffix + + +def polytracker_version_string() -> str: + *primary, suffix = polytracker_version() + primary = map(str, primary) + if suffix is None: + return '.'.join(primary) + else: + return f"{'.'.join(primary)}{suffix}" + setup( name='polytracker', description='API and Library for operating and interacting with PolyTracker', url='https://github.com/trailofbits/polytracker', author='Trail of Bits', - version="0.1.0", + version=polytracker_version_string(), packages=find_packages(), python_requires='>=3.7', install_requires=[ From 09a9051c42600ec1fbd920278086cf28c4e426ac Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 22:30:50 -0400 Subject: [PATCH 11/19] Add the suffix to the end of the Polytracker version string --- polytracker/include/polytracker/polytracker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytracker/include/polytracker/polytracker.h b/polytracker/include/polytracker/polytracker.h index 1e7c7989..fdbee2dd 100644 --- a/polytracker/include/polytracker/polytracker.h +++ b/polytracker/include/polytracker/polytracker.h @@ -17,6 +17,6 @@ #define POLYTRACKER_VERSION \ PF_MAKE_STR(POLYTRACKER_VERSION_MAJOR) \ "." PF_MAKE_STR(POLYTRACKER_VERSION_MINOR) "." PF_MAKE_STR( \ - POLYTRACKER_VERSION_REVISION) + POLYTRACKER_VERSION_REVISION) POLYTRACKER_VERSION_SUFFIX #endif From 7e4a56de747d0d5492350977363e46bccdf87f19 Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 22:31:13 -0400 Subject: [PATCH 12/19] Up the version number --- polytracker/include/polytracker/polytracker.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polytracker/include/polytracker/polytracker.h b/polytracker/include/polytracker/polytracker.h index fdbee2dd..fa2b7e2b 100644 --- a/polytracker/include/polytracker/polytracker.h +++ b/polytracker/include/polytracker/polytracker.h @@ -2,12 +2,12 @@ #define __POLYTRACKER_H__ #define POLYTRACKER_VERSION_MAJOR 2 -#define POLYTRACKER_VERSION_MINOR 0 +#define POLYTRACKER_VERSION_MINOR 1 #define POLYTRACKER_VERSION_REVISION 0 // If there is a suffix, it should always start with a hypen, like "-alpha2.2". // If there is no suffix, set POLYTRACKER_VERSION_SUFFIX to an empty string. -#define POLYTRACKER_VERSION_SUFFIX "-alpha2.2" +#define POLYTRACKER_VERSION_SUFFIX "" /**********************************************************************************/ From c3ac13854788d2f97c34e3e3e4269f9442d30acb Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 22:44:43 -0400 Subject: [PATCH 13/19] The polybuild module is no longer included in the polytracker package --- polytracker/polybuild/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 polytracker/polybuild/__init__.py diff --git a/polytracker/polybuild/__init__.py b/polytracker/polybuild/__init__.py deleted file mode 100644 index e69de29b..00000000 From a0ce75140aac7fa661943f9b63eeab020908b4df Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 22:45:02 -0400 Subject: [PATCH 14/19] Better support for polytracker JSON parsing and versioning --- polytracker/include/polytracker/polytracker.h | 2 ++ polytracker/polytracker.py | 16 ++++++++++------ .../src/dfsan_rt/dfsan/dfsan_log_mgmt.cpp | 3 +++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/polytracker/include/polytracker/polytracker.h b/polytracker/include/polytracker/polytracker.h index fa2b7e2b..f3fef72c 100644 --- a/polytracker/include/polytracker/polytracker.h +++ b/polytracker/include/polytracker/polytracker.h @@ -1,6 +1,8 @@ #ifndef __POLYTRACKER_H__ #define __POLYTRACKER_H__ +// NOTE: Whenever the version is updated, make sure to add support to the JSON parsing code in polytracker.py! + #define POLYTRACKER_VERSION_MAJOR 2 #define POLYTRACKER_VERSION_MINOR 1 #define POLYTRACKER_VERSION_REVISION 0 diff --git a/polytracker/polytracker.py b/polytracker/polytracker.py index 8dd1f723..2a6abdc1 100644 --- a/polytracker/polytracker.py +++ b/polytracker/polytracker.py @@ -1,10 +1,10 @@ +import logging from typing import Dict, Iterable, List, Set, Tuple -from polyfile import logger - from .cfg import CFG -log = logger.getStatusLogger('PolyTracker') + +log = logging.getLogger('PolyTracker') class FunctionInfo: @@ -50,7 +50,7 @@ def __init__(self, polytracker_version: tuple, function_data: Iterable[FunctionI def cfg(self) -> CFG: if self._cfg is not None: return self._cfg - self._cfg = CFG(self) + self._cfg = CFG() self._cfg.add_nodes_from(self.functions.values()) for f in list(self.functions.values()): for caller in f.called_from: @@ -89,12 +89,14 @@ def parse(polytracker_json_obj: dict) -> ProgramTrace: if 'version' in polytracker_json_obj: version = normalize_version(*polytracker_json_obj['version'].split('.')) if len(version) > 4: - log.warn(f"Unexpectedly long PolyTracker version: {polytracker_json_obj['version']!r}") + log.warning(f"Unexpectedly long PolyTracker version: {polytracker_json_obj['version']!r}") for i, (known_version, parser) in enumerate(POLYTRACKER_JSON_FORMATS): # POLYTRACKER_JSON_FORMATS is auto-sorted in decreasing order if version >= known_version: if i == 0 and version > known_version: - log.warn(f"PolyTracker version {polytracker_json_obj['version']!r} is newer than the latest supported by PolyMerge ({'.'.join(known_version)})") + log.warning(f"PolyTracker version {polytracker_json_obj['version']!r} " + "is newer than the latest supported by the polytracker Python module " + f"({'.'.join(known_version)})") return parser(polytracker_json_obj) raise ValueError(f"Unsupported PolyTracker version {polytracker_json_obj['version']!r}") for function_name, function_data in polytracker_json_obj.items(): @@ -148,6 +150,8 @@ def parse_format_v2(polytracker_json_obj: dict) -> ProgramTrace: ) +@polytracker_version(2, 0, 1) +@polytracker_version(2, 0, 0) @polytracker_version(1, 0, 1) def parse_format_v3(polytracker_json_obj: dict) -> ProgramTrace: version = polytracker_json_obj['version'].split('.') diff --git a/polytracker/src/dfsan_rt/dfsan/dfsan_log_mgmt.cpp b/polytracker/src/dfsan_rt/dfsan/dfsan_log_mgmt.cpp index 454b355a..b5dcac24 100644 --- a/polytracker/src/dfsan_rt/dfsan/dfsan_log_mgmt.cpp +++ b/polytracker/src/dfsan_rt/dfsan/dfsan_log_mgmt.cpp @@ -177,6 +177,9 @@ void taintManager::addTaintedBlocks() { void taintManager::outputRawTaintSets() { string_node_map::iterator it; + // NOTE: Whenever the output JSON format changes, make sure to: + // (1) Up the version number in /polytracker/include/polytracker/polytracker.h; and + // (2) Add support for parsing the changes in /polytracker/polytracker.py addJsonVersion(); addJsonRuntimeCFG(); addTaintSources(); From 0484dd1571fb3dcc99c8e6133b9302fb525f6b3f Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 22:47:51 -0400 Subject: [PATCH 15/19] Carson why u gotta lint so hard --- polytracker/cfg.py | 14 ++--- polytracker/polytracker.py | 115 +++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 70 deletions(-) diff --git a/polytracker/cfg.py b/polytracker/cfg.py index 2b9c2cc2..4950d965 100644 --- a/polytracker/cfg.py +++ b/polytracker/cfg.py @@ -62,10 +62,10 @@ def to_dot(self, comment: str = None, labeler=Callable[[Any], str], node_filter= node_ids = {node: i for i, node in enumerate(self.nodes)} for node in self.nodes: if node_filter is None or node_filter(node): - dot.node(f'func{node_ids[node]}', label=labeler(node)) + dot.node(f"func{node_ids[node]}", label=labeler(node)) for caller, callee in self.edges: if node_filter is None or (node_filter(caller) and node_filter(callee)): - dot.edge(f'func{node_ids[caller]}', f'func{node_ids[callee]}') + dot.edge(f"func{node_ids[caller]}", f"func{node_ids[callee]}") return dot @@ -100,13 +100,9 @@ class CFG(DiGraph): def __init__(self): super().__init__() - def to_dot(self, - comment='PolyTracker Program Trace', - merged_json_obj=None, - only_labeled_functions=False, - labeler=None, - **kwargs - ) -> graphviz.Digraph: + def to_dot( + self, comment="PolyTracker Program Trace", merged_json_obj=None, only_labeled_functions=False, labeler=None, **kwargs + ) -> graphviz.Digraph: function_labels = {} def func_labeler(f): diff --git a/polytracker/polytracker.py b/polytracker/polytracker.py index 2a6abdc1..ecbb3621 100644 --- a/polytracker/polytracker.py +++ b/polytracker/polytracker.py @@ -4,11 +4,17 @@ from .cfg import CFG -log = logging.getLogger('PolyTracker') +log = logging.getLogger("PolyTracker") class FunctionInfo: - def __init__(self, name: str, cmp_bytes: Dict[str, List[int]], input_bytes: Dict[str, List[int]] = None, called_from: Iterable[str] = ()): + def __init__( + self, + name: str, + cmp_bytes: Dict[str, List[int]], + input_bytes: Dict[str, List[int]] = None, + called_from: Iterable[str] = (), + ): self.name = name self.called_from = frozenset(called_from) self.cmp_bytes = cmp_bytes @@ -72,8 +78,8 @@ def __repr__(self): def normalize_version(*version) -> Tuple[str]: version = tuple(str(v) for v in version) - version = tuple(version) + ('0',) * (3 - len(version)) - version = tuple(version) + ('',) * (4 - len(version)) + version = tuple(version) + ("0",) * (3 - len(version)) + version = tuple(version) + ("",) * (4 - len(version)) return version @@ -82,112 +88,99 @@ def wrapper(func): POLYTRACKER_JSON_FORMATS.append((normalize_version(*version), func)) POLYTRACKER_JSON_FORMATS.sort(reverse=True) return func + return wrapper def parse(polytracker_json_obj: dict) -> ProgramTrace: - if 'version' in polytracker_json_obj: - version = normalize_version(*polytracker_json_obj['version'].split('.')) + if "version" in polytracker_json_obj: + version = normalize_version(*polytracker_json_obj["version"].split(".")) if len(version) > 4: log.warning(f"Unexpectedly long PolyTracker version: {polytracker_json_obj['version']!r}") for i, (known_version, parser) in enumerate(POLYTRACKER_JSON_FORMATS): # POLYTRACKER_JSON_FORMATS is auto-sorted in decreasing order if version >= known_version: if i == 0 and version > known_version: - log.warning(f"PolyTracker version {polytracker_json_obj['version']!r} " - "is newer than the latest supported by the polytracker Python module " - f"({'.'.join(known_version)})") + log.warning( + f"PolyTracker version {polytracker_json_obj['version']!r} " + "is newer than the latest supported by the polytracker Python module " + f"({'.'.join(known_version)})" + ) return parser(polytracker_json_obj) raise ValueError(f"Unsupported PolyTracker version {polytracker_json_obj['version']!r}") for function_name, function_data in polytracker_json_obj.items(): - if isinstance(function_data, dict) and 'called_from' in function_data: + if isinstance(function_data, dict) and "called_from" in function_data: # this is the second version of the output format return parse_format_v2(polytracker_json_obj) else: return parse_format_v1(polytracker_json_obj) -@polytracker_version(0, 0, 1, '') +@polytracker_version(0, 0, 1, "") def parse_format_v1(polytracker_json_obj: dict) -> ProgramTrace: return ProgramTrace( polytracker_version=(0, 0, 1), - function_data=[FunctionInfo( - function_name, - {None: taint_bytes} - ) for function_name, taint_bytes in polytracker_json_obj.items() - ] + function_data=[ + FunctionInfo(function_name, {None: taint_bytes}) for function_name, taint_bytes in polytracker_json_obj.items() + ], ) -@polytracker_version(0, 0, 1, 'alpha2.1') +@polytracker_version(0, 0, 1, "alpha2.1") def parse_format_v2(polytracker_json_obj: dict) -> ProgramTrace: function_data = [] for function_name, data in polytracker_json_obj.items(): - if 'input_bytes' not in data: - if 'cmp_bytes' in data: - input_bytes = data['cmp_bytes'] + if "input_bytes" not in data: + if "cmp_bytes" in data: + input_bytes = data["cmp_bytes"] else: input_bytes = {} else: - input_bytes = data['input_bytes'] - if 'cmp_bytes' in data: - cmp_bytes = data['cmp_bytes'] + input_bytes = data["input_bytes"] + if "cmp_bytes" in data: + cmp_bytes = data["cmp_bytes"] else: cmp_bytes = input_bytes - if 'called_from' in data: - called_from = data['called_from'] + if "called_from" in data: + called_from = data["called_from"] else: called_from = () - function_data.append(FunctionInfo( - name=function_name, - cmp_bytes=cmp_bytes, - input_bytes=input_bytes, - called_from=called_from - )) - return ProgramTrace( - polytracker_version=(0, 0, 1, 'alpha2.1'), - function_data=function_data - ) + function_data.append( + FunctionInfo(name=function_name, cmp_bytes=cmp_bytes, input_bytes=input_bytes, called_from=called_from) + ) + return ProgramTrace(polytracker_version=(0, 0, 1, "alpha2.1"), function_data=function_data) @polytracker_version(2, 0, 1) @polytracker_version(2, 0, 0) @polytracker_version(1, 0, 1) def parse_format_v3(polytracker_json_obj: dict) -> ProgramTrace: - version = polytracker_json_obj['version'].split('.') + version = polytracker_json_obj["version"].split(".") function_data = [] tainted_functions = set() - for function_name, data in polytracker_json_obj['tainted_functions'].items(): - if 'input_bytes' not in data: - if 'cmp_bytes' in data: - input_bytes = data['cmp_bytes'] + for function_name, data in polytracker_json_obj["tainted_functions"].items(): + if "input_bytes" not in data: + if "cmp_bytes" in data: + input_bytes = data["cmp_bytes"] else: input_bytes = {} else: - input_bytes = data['input_bytes'] - if 'cmp_bytes' in data: - cmp_bytes = data['cmp_bytes'] + input_bytes = data["input_bytes"] + if "cmp_bytes" in data: + cmp_bytes = data["cmp_bytes"] else: cmp_bytes = input_bytes - if function_name in polytracker_json_obj['runtime_cfg']: - called_from = frozenset(polytracker_json_obj['runtime_cfg'][function_name]) + if function_name in polytracker_json_obj["runtime_cfg"]: + called_from = frozenset(polytracker_json_obj["runtime_cfg"][function_name]) else: called_from = frozenset() - function_data.append(FunctionInfo( - name=function_name, - cmp_bytes=cmp_bytes, - input_bytes=input_bytes, - called_from=called_from - )) + function_data.append( + FunctionInfo(name=function_name, cmp_bytes=cmp_bytes, input_bytes=input_bytes, called_from=called_from) + ) tainted_functions.add(function_name) # Add any additional functions from the CFG that didn't operate on tainted bytes - for function_name in polytracker_json_obj['runtime_cfg'].keys() - tainted_functions: - function_data.append(FunctionInfo( - name=function_name, - cmp_bytes={}, - called_from=polytracker_json_obj['runtime_cfg'][function_name] - )) - return ProgramTrace( - polytracker_version=version, - function_data=function_data - ) + for function_name in polytracker_json_obj["runtime_cfg"].keys() - tainted_functions: + function_data.append( + FunctionInfo(name=function_name, cmp_bytes={}, called_from=polytracker_json_obj["runtime_cfg"][function_name]) + ) + return ProgramTrace(polytracker_version=version, function_data=function_data) From e0af394394ea7c22c3e7b637230c3c7436c0529c Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Tue, 21 Jul 2020 23:39:18 -0400 Subject: [PATCH 16/19] Lots of type hints and fixes --- polytracker/cfg.py | 103 +++++++++++++++++++++++++++++-------- polytracker/polytracker.py | 57 ++++---------------- 2 files changed, 92 insertions(+), 68 deletions(-) diff --git a/polytracker/cfg.py b/polytracker/cfg.py index 4950d965..2f18b895 100644 --- a/polytracker/cfg.py +++ b/polytracker/cfg.py @@ -1,20 +1,32 @@ import math -from typing import Any, Callable, Optional +from typing import ( + Any, + Callable, + Collection, + Dict, + FrozenSet, + Generic, + ItemsView, + Iterable, + KeysView, + List, + Optional, + Set, + TypeVar, +) import graphviz import networkx as nx +N = TypeVar("N") -def roots(graph): - return (n for n, d in graph.in_degree() if d == 0) - -class DiGraph(nx.DiGraph): +class DiGraph(nx.DiGraph, Generic[N]): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._dominator_forest: Optional[DiGraph] = None - self._roots = None + self._dominator_forest: Optional[DiGraph[N]] = None + self._roots: Optional[Collection[N]] = None self._path_lengths = None def path_length(self, from_node, to_node): @@ -25,26 +37,29 @@ def path_length(self, from_node, to_node): else: return self._path_lengths[from_node][to_node] - def set_roots(self, roots): + def set_roots(self, roots: Collection[N]): self._roots = roots + def _find_roots(self) -> Iterable[N]: + return (n for n, d in self.in_degree() if d == 0) + @property - def roots(self): + def roots(self) -> Collection[N]: if self._roots is None: - self._roots = tuple(roots(self)) + self._roots = tuple(self._find_roots()) return self._roots - def depth(self, node): + def depth(self, node) -> int: return min(self.path_length(root, node) for root in self.roots) - def ancestors(self, node) -> set: + def ancestors(self, node) -> Set[N]: return nx.ancestors(self, node) - def descendants(self, node) -> frozenset: + def descendants(self, node) -> FrozenSet[N]: return frozenset(nx.dfs_successors(self, node).keys()) @property - def dominator_forest(self): + def dominator_forest(self) -> "DAG[N]": if self._dominator_forest is not None: return self._dominator_forest self._dominator_forest = DAG() @@ -54,11 +69,15 @@ def dominator_forest(self): self._dominator_forest.add_edge(dominated_by, node) return self._dominator_forest - def to_dot(self, comment: str = None, labeler=Callable[[Any], str], node_filter=None) -> graphviz.Digraph: + def to_dot( + self, comment: Optional[str] = None, labeler: Optional[Callable[[N], str]] = None, node_filter=None + ) -> graphviz.Digraph: if comment is not None: dot = graphviz.Digraph(comment=comment) else: dot = graphviz.Digraph() + if labeler is None: + labeler = str node_ids = {node: i for i, node in enumerate(self.nodes)} for node in self.nodes: if node_filter is None or node_filter(node): @@ -69,8 +88,8 @@ def to_dot(self, comment: str = None, labeler=Callable[[Any], str], node_filter= return dot -class DAG(DiGraph): - def vertex_induced_subgraph(self, vertices): +class DAG(DiGraph[N], Generic[N]): + def vertex_induced_subgraph(self, vertices: Iterable[N]) -> "DAG[N]": vertices = frozenset(vertices) subgraph = self.copy() to_remove = set(self.nodes) - vertices @@ -96,14 +115,56 @@ def vertex_induced_subgraph(self, vertices): return subgraph -class CFG(DiGraph): +class FunctionInfo: + def __init__( + self, + name: str, + cmp_bytes: Dict[str, List[int]], + input_bytes: Dict[str, List[int]] = None, + called_from: Iterable[str] = (), + ): + self.name: str = name + self.called_from: FrozenSet[str] = frozenset(called_from) + self.cmp_bytes: Dict[str, List[int]] = cmp_bytes + if input_bytes is None: + self.input_bytes: Dict[str, List[int]] = cmp_bytes + else: + self.input_bytes = input_bytes + + @property + def taint_sources(self) -> KeysView[str]: + return self.input_bytes.keys() + + def __getitem__(self, input_source_name: str) -> List[int]: + return self.input_bytes[input_source_name] + + def __iter__(self) -> Iterable[str]: + return self.taint_sources + + def items(self) -> ItemsView[str, List[int]]: + return self.input_bytes.items() + + def __hash__(self): + return hash(self.name) + + def __str__(self): + return self.name + + def __repr__(self): + return f"{self.__class__.__name__}(name={self.name!r}, cmp_bytes={self.cmp_bytes!r}, input_bytes={self.input_bytes!r}, called_from={self.called_from!r})" + + +class CFG(DiGraph[FunctionInfo]): def __init__(self): super().__init__() def to_dot( - self, comment="PolyTracker Program Trace", merged_json_obj=None, only_labeled_functions=False, labeler=None, **kwargs + self, + comment: Optional[str] = "PolyTracker Program Trace", + labeler: Optional[Callable[[FunctionInfo], str]] = None, + node_filter=None, ) -> graphviz.Digraph: - function_labels = {} + function_labels: Dict[str, str] = {} def func_labeler(f): if labeler is not None: @@ -113,4 +174,4 @@ def func_labeler(f): else: return f.name - return super().to_dot(comment, labeler=func_labeler, **kwargs) + return super().to_dot(comment, labeler=func_labeler, node_filter=node_filter) diff --git a/polytracker/polytracker.py b/polytracker/polytracker.py index ecbb3621..6c2d64df 100644 --- a/polytracker/polytracker.py +++ b/polytracker/polytracker.py @@ -1,56 +1,18 @@ import logging -from typing import Dict, Iterable, List, Set, Tuple - -from .cfg import CFG +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union +from .cfg import CFG, FunctionInfo log = logging.getLogger("PolyTracker") - -class FunctionInfo: - def __init__( - self, - name: str, - cmp_bytes: Dict[str, List[int]], - input_bytes: Dict[str, List[int]] = None, - called_from: Iterable[str] = (), - ): - self.name = name - self.called_from = frozenset(called_from) - self.cmp_bytes = cmp_bytes - if input_bytes is None: - self.input_bytes = cmp_bytes - else: - self.input_bytes = input_bytes - - @property - def taint_sources(self) -> Set[str]: - return self.input_bytes.keys() - - def __getitem__(self, input_source_name): - return self.input_bytes[input_source_name] - - def __iter__(self): - return self.taint_sources - - def items(self): - return self.input_bytes.items() - - def __hash__(self): - return hash(self.name) - - def __str__(self): - return self.name - - def __repr__(self): - return f"{self.__class__.__name__}(name={self.name!r}, cmp_bytes={self.cmp_bytes!r}, input_bytes={self.input_bytes!r}, called_from={self.called_from!r})" +VersionElement = Union[int, str] class ProgramTrace: - def __init__(self, polytracker_version: tuple, function_data: Iterable[FunctionInfo]): - self.polytracker_version = polytracker_version + def __init__(self, polytracker_version: Tuple[VersionElement, ...], function_data: Iterable[FunctionInfo]): + self.polytracker_version: Tuple[VersionElement, ...] = polytracker_version self.functions: Dict[str, FunctionInfo] = {f.name: f for f in function_data} - self._cfg = None + self._cfg: Optional[CFG] = None @property def cfg(self) -> CFG: @@ -73,10 +35,10 @@ def __repr__(self): return f"{self.__class__.__name__}(polytracker_version={self.polytracker_version!r}, function_data={list(self.functions.values())!r})" -POLYTRACKER_JSON_FORMATS = [] +POLYTRACKER_JSON_FORMATS: List[Tuple[Tuple[str, ...], Callable[[dict], ProgramTrace]]] = [] -def normalize_version(*version) -> Tuple[str]: +def normalize_version(*version: Iterable[VersionElement]) -> Tuple[Any, ...]: version = tuple(str(v) for v in version) version = tuple(version) + ("0",) * (3 - len(version)) version = tuple(version) + ("",) * (4 - len(version)) @@ -114,6 +76,7 @@ def parse(polytracker_json_obj: dict) -> ProgramTrace: return parse_format_v2(polytracker_json_obj) else: return parse_format_v1(polytracker_json_obj) + return parse_format_v1(polytracker_json_obj) @polytracker_version(0, 0, 1, "") @@ -121,7 +84,7 @@ def parse_format_v1(polytracker_json_obj: dict) -> ProgramTrace: return ProgramTrace( polytracker_version=(0, 0, 1), function_data=[ - FunctionInfo(function_name, {None: taint_bytes}) for function_name, taint_bytes in polytracker_json_obj.items() + FunctionInfo(function_name, {"": taint_bytes}) for function_name, taint_bytes in polytracker_json_obj.items() ], ) From 1c300f2d1a11069ef52cce6bc386358e93c62bce Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Thu, 23 Jul 2020 13:37:22 -0400 Subject: [PATCH 17/19] Adds missing type hints --- polytracker/cfg.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/polytracker/cfg.py b/polytracker/cfg.py index 2f18b895..556a4616 100644 --- a/polytracker/cfg.py +++ b/polytracker/cfg.py @@ -1,7 +1,6 @@ import math from typing import ( - Any, Callable, Collection, Dict, @@ -14,6 +13,7 @@ Optional, Set, TypeVar, + Union ) import graphviz @@ -27,9 +27,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._dominator_forest: Optional[DiGraph[N]] = None self._roots: Optional[Collection[N]] = None - self._path_lengths = None + self._path_lengths: Optional[Dict[N, Dict[N, int]]] = None - def path_length(self, from_node, to_node): + def path_length(self, from_node: N, to_node: N) -> Union[int, float]: if self._path_lengths is None: self._path_lengths = dict(nx.all_pairs_shortest_path_length(self, cutoff=None)) if from_node not in self._path_lengths or to_node not in self._path_lengths[from_node]: @@ -49,13 +49,13 @@ def roots(self) -> Collection[N]: self._roots = tuple(self._find_roots()) return self._roots - def depth(self, node) -> int: + def depth(self, node: N) -> int: return min(self.path_length(root, node) for root in self.roots) - def ancestors(self, node) -> Set[N]: + def ancestors(self, node: N) -> Set[N]: return nx.ancestors(self, node) - def descendants(self, node) -> FrozenSet[N]: + def descendants(self, node: N) -> FrozenSet[N]: return frozenset(nx.dfs_successors(self, node).keys()) @property From 4408212472314f1c3bd8ae681a141661493166ea Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Thu, 23 Jul 2020 14:03:34 -0400 Subject: [PATCH 18/19] adds a single comma to appease black --- polytracker/cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytracker/cfg.py b/polytracker/cfg.py index 556a4616..6bfb591a 100644 --- a/polytracker/cfg.py +++ b/polytracker/cfg.py @@ -13,7 +13,7 @@ Optional, Set, TypeVar, - Union + Union, ) import graphviz From 91b6a6e67cf09ed8d67ed5caee161f91fe01259e Mon Sep 17 00:00:00 2001 From: Evan Sultanik Date: Thu, 23 Jul 2020 14:04:59 -0400 Subject: [PATCH 19/19] Update a type hint to account for an error case --- polytracker/cfg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytracker/cfg.py b/polytracker/cfg.py index 6bfb591a..edb815c9 100644 --- a/polytracker/cfg.py +++ b/polytracker/cfg.py @@ -49,7 +49,7 @@ def roots(self) -> Collection[N]: self._roots = tuple(self._find_roots()) return self._roots - def depth(self, node: N) -> int: + def depth(self, node: N) -> Union[int, float]: return min(self.path_length(root, node) for root in self.roots) def ancestors(self, node: N) -> Set[N]: