From f45009e4c346492f89874788f90f5a293bec7fa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 12 Feb 2020 12:57:42 +0100 Subject: [PATCH 01/46] uniform input seeds and output directory handling --- bin/deepstate/core/base.py | 9 ++++---- bin/deepstate/core/fuzz.py | 25 +++++++++++++++++++- bin/deepstate/executors/fuzz/afl.py | 28 ++++++----------------- bin/deepstate/executors/fuzz/angora.py | 22 ++++++------------ bin/deepstate/executors/fuzz/eclipser.py | 15 ------------ bin/deepstate/executors/fuzz/honggfuzz.py | 20 ++++++---------- bin/deepstate/executors/fuzz/libfuzzer.py | 14 ------------ 7 files changed, 50 insertions(+), 83 deletions(-) diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index 900db753..e98d30b1 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -75,7 +75,7 @@ def __init__(self): # parsed argument attributes self.binary: str = None - self.output_test_dir: str = f"{self}_out" + self.output_test_dir: Optional[str] = None self.timeout: int = 0 self.num_workers: int = 1 self.mem_limit: int = 50 @@ -124,7 +124,8 @@ def parse_args(cls) -> Optional[argparse.Namespace]: help="Linker flags (space seperated) to include for external libraries.") compile_group.add_argument("--out_test_name", type=str, - help="Set name of generated instrumented binary.") + help=("Set name of generated instrumented binary. Default is `out`. " + "Automatically add `.frontend_name_lowercase` suffix.")) compile_group.add_argument("--no_exit_compile", action="store_true", help="Continue execution after compiling a harness (set as default if `--config` is set).") @@ -135,8 +136,8 @@ def parse_args(cls) -> Optional[argparse.Namespace]: # Analysis-related configurations parser.add_argument( - "-o", "--output_test_dir", type=str, default="out", - help="Output directory where tests will be saved (default is `out`).") + "-o", "--output_test_dir", type=str, + help="Output directory where tests will be saved. Must be empty. Required.") parser.add_argument( "-c", "--config", type=str, diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 3f759cfa..9672dc7b 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -384,11 +384,34 @@ def pre_exec(self): raise FuzzFrontendError(f"Binary {self.binary} doesn't exists.") L.debug("Target binary: %s", self.binary) - # no sanity check, since some fuzzers require optional input seeds + # if input_seeds is provided run checks if self.input_seeds: L.debug("Input seeds directory: %s", self.input_seeds) + # AFL uses "-" to tell it to resume fuzzing, don't treat as a real seed dir + if self.input_seeds != "-": + if not os.path.exists(self.input_seeds): + raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") + + if not os.path.isdir(self.input_seeds): + raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) is not a directory.") + + if len(os.listdir(self.input_seeds)) == 0: + raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") + + # require empty output directory L.debug("Output directory: %s", self.output_test_dir) + if not self.output_test_dir: + raise FuzzFrontendError("Must provide -o/--output_test_dir.") + + if not os.path.exists(self.output_test_dir): + raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) doesn't exist.") + + if not os.path.isdir(self.output_test_dir): + raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") + + if len(os.listdir(self.output_test_dir)) != 0: + raise FuzzFrontendError(f"output_test_dir `{self.output_test_dir}` must be empty.") # check if we enabled seed synchronization, and initialize directory if self.enable_sync: diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index adb72b7f..9b906d78 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -18,6 +18,7 @@ import argparse import shutil +from tempfile import mkdtemp from typing import List, Dict, Optional from deepstate.core import FuzzerFrontend, FuzzFrontendError @@ -73,33 +74,18 @@ def pre_exec(self): if f_min.read() != f_max.read(): raise FuzzFrontendError("Suboptimal CPU scaling governor. Execute 'echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'") - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if not os.path.exists(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) doesn't exist.") - - if not os.path.isdir(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - # check for afl-qemu if in QEMU mode if 'Q' in self.fuzzer_args or self.blackbox == True: if not shutil.which('afl-qemu-trace'): raise FuzzFrontendError("Must provide `afl-qemu-trace` executable in PATH") # require input seeds if we aren't in dumb mode, or we are using crash mode - if 'n' not in self.fuzzer_args or 'C' in self.fuzzer_args: - if self.input_seeds is None: - raise FuzzFrontendError(f"Must provide -i/--input_seeds option for {self.name}.") - - # AFL uses "-" to tell it to resume fuzzing, don't treat as a real seed dir - if self.input_seeds != "-": - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") - - if len(os.listdir(self.input_seeds)) == 0: - raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") + if self.input_seeds is None: + if 'n' not in self.fuzzer_args or 'C' in self.fuzzer_args: + self.input_seeds = mkdtemp() + with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: + f.write(b'X') + L.info("Creating fake input seeds directory: %s", self.input_seeds) @property diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index a61c0383..a94ff666 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -19,6 +19,7 @@ import argparse import subprocess +from tempfile import mkdtemp from typing import List, Dict, Optional, Any from deepstate.core import FuzzerFrontend, FuzzFrontendError @@ -134,14 +135,12 @@ def pre_exec(self): if not os.path.exists(self.taint_binary): raise FuzzFrontendError("Taint binary doesn't exist") - if not self.input_seeds: - raise FuzzFrontendError(f"Must provide -i/--input_seeds option for {self.name}.") - - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") - - if len(os.listdir(self.input_seeds)) == 0: - raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") + # require input seeds + if self.input_seeds is None: + self.input_seeds = mkdtemp() + with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: + f.write(b'X') + L.info("Creating fake input seeds directory: %s", self.input_seeds) if self.blackbox is True: raise FuzzFrontendError(f"Blackbox fuzzing is not supported by {self.name}.") @@ -149,13 +148,6 @@ def pre_exec(self): if self.dictionary: L.error("%s can't use dictionaries.", self.name) - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if os.path.exists(self.output_test_dir): - raise FuzzFrontendError(f"Remove previous output directory (`{self.output_test_dir}`) before running {self.name}.") - @property def cmd(self): diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index 202bbd44..f4a4706b 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -68,21 +68,6 @@ def pre_exec(self) -> None: if self.dictionary: L.error("Angora can't use dictionaries.") - - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if os.path.exists(self.output_test_dir): - if not os.path.isdir(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - - if self.input_seeds: - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") - - if len(os.listdir(self.input_seeds)) == 0: - raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") @property diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index b4599fe2..fd3ca949 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -16,6 +16,7 @@ import logging import argparse +from tempfile import mkdtemp from typing import List, Dict, Optional from deepstate.core import FuzzerFrontend, FuzzFrontendError @@ -58,19 +59,12 @@ def compile(self) -> None: # type: ignore def pre_exec(self): super().pre_exec() - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if os.path.exists(self.output_test_dir): - if not os.path.isdir(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - - if not self.input_seeds: - raise FuzzFrontendError(f"Must provide -i/--input_seeds option for {self.name}.") - - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") + # require input seeds + if self.input_seeds is None: + self.input_seeds = mkdtemp() + with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: + f.write(b'X') + L.info("Creating fake input seeds directory: %s", self.input_seeds) @property diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index b5355eff..827a2f7f 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -63,23 +63,9 @@ def pre_exec(self) -> None: self.binary = os.path.abspath(self.binary) self.fuzzer_exe = self.binary # type: ignore - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if not os.path.exists(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) doesn't exist.") - - if not os.path.isdir(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - if self.blackbox is True: raise FuzzFrontendError("Blackbox fuzzing is not supported by libFuzzer.") - if self.input_seeds: - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") - @property def cmd(self): From 2f8822a53132e26b376b340c6b7635f0974e6c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 12 Feb 2020 15:53:27 +0100 Subject: [PATCH 02/46] implement session resuming in afl/hfuzz/libfuzz and base for syncing --- bin/deepstate/core/base.py | 2 +- bin/deepstate/core/fuzz.py | 29 +++++++++------- bin/deepstate/executors/fuzz/afl.py | 42 ++++++++++++++++++----- bin/deepstate/executors/fuzz/honggfuzz.py | 34 ++++++++++++++---- bin/deepstate/executors/fuzz/libfuzzer.py | 28 ++++++++++++--- 5 files changed, 103 insertions(+), 32 deletions(-) diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index e98d30b1..e9daa61f 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -137,7 +137,7 @@ def parse_args(cls) -> Optional[argparse.Namespace]: # Analysis-related configurations parser.add_argument( "-o", "--output_test_dir", type=str, - help="Output directory where tests will be saved. Must be empty. Required.") + help="Output directory where tests will be saved. Required. If not empty, will try to resume.") parser.add_argument( "-c", "--config", type=str, diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 9672dc7b..65dbfaef 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -87,7 +87,10 @@ def __init__(self, envvar: str) -> None: self.enable_sync: bool = False self.sync_cycle: int = 5 self.sync_out: bool = True - self.sync_dir: str = "out_sync" + + self.push_dir: str = '' # push testcases from external sources here + self.pull_dir: str = '' # pull new testcases from this dir + self.crash_dir: str = '' # crashes will be in this dir self.post_stats: bool = False self.home_path: Optional[str] = None @@ -343,6 +346,13 @@ def compile(self, lib_path: str, flags: List[str], _out_bin: str, env = os.envir self.binary = out_bin + def create_fake_seeds(self): + self.input_seeds = mkdtemp() + with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: + f.write(b'X') + L.info("Creating fake input seeds directory: %s", self.input_seeds) + + def pre_exec(self): """ Called before fuzzer execution in order to perform sanity checks. Base method contains @@ -388,16 +398,14 @@ def pre_exec(self): if self.input_seeds: L.debug("Input seeds directory: %s", self.input_seeds) - # AFL uses "-" to tell it to resume fuzzing, don't treat as a real seed dir - if self.input_seeds != "-": - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") + if not os.path.exists(self.input_seeds): + raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") - if not os.path.isdir(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) is not a directory.") + if not os.path.isdir(self.input_seeds): + raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) is not a directory.") - if len(os.listdir(self.input_seeds)) == 0: - raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") + if len(os.listdir(self.input_seeds)) == 0: + raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") # require empty output directory L.debug("Output directory: %s", self.output_test_dir) @@ -410,9 +418,6 @@ def pre_exec(self): if not os.path.isdir(self.output_test_dir): raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - if len(os.listdir(self.output_test_dir)) != 0: - raise FuzzFrontendError(f"output_test_dir `{self.output_test_dir}` must be empty.") - # check if we enabled seed synchronization, and initialize directory if self.enable_sync: if not os.path.isdir(self.sync_dir): diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index 9b906d78..44a9668f 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -79,13 +79,36 @@ def pre_exec(self): if not shutil.which('afl-qemu-trace'): raise FuzzFrontendError("Must provide `afl-qemu-trace` executable in PATH") - # require input seeds if we aren't in dumb mode, or we are using crash mode - if self.input_seeds is None: - if 'n' not in self.fuzzer_args or 'C' in self.fuzzer_args: - self.input_seeds = mkdtemp() - with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: - f.write(b'X') - L.info("Creating fake input seeds directory: %s", self.input_seeds) + sync_dir = os.path.join(self.output_test_dir, "sync_dir") + main_dir = os.path.join(self.output_test_dir, "the_afl") + self.push_dir = os.path.join(sync_dir, "queue") + self.pull_dir = os.path.join(main_dir, "queue") + self.crash_dir = os.path.join(main_dir, "crashes") + + # resuming fuzzing + if len(os.listdir(self.output_test_dir)) > 1: + if not os.path.isdir(sync_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + "No `sync_dir` directory inside.") + if not os.path.isdir(self.push_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + "No `sync_dir/queue` directory inside.") + if not os.path.isdir(main_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + "No `the_afl` directory inside.") + + self.input_seeds = '-' + L.info(f"Resuming fuzzing using seeds from {self.output_test_dir}/the_afl/queue " + "(skipping --input_seeds option).") + + else: + os.mkdir(sync_dir) + os.mkdir(self.push_dir) + + # create fake input seeds if we aren't in dumb mode, or we are using crash mode + if self.input_seeds is None: + if 'n' not in self.fuzzer_args or 'C' in self.fuzzer_args: + self.create_fake_seeds() @property @@ -93,7 +116,10 @@ def cmd(self): cmd_list: List[str] = list() # guaranteed arguments - cmd_list.extend(["-o", self.output_test_dir]) # auto-create, reusable + cmd_list.extend([ + "-o", self.output_test_dir, # auto-create, reusable + "-S", "the_afl" + ]) if self.mem_limit == 0: cmd_list.extend(["-m", "1099511627776"]) # use 1TiB as unlimited diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index fd3ca949..27cac13b 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -59,12 +59,29 @@ def compile(self) -> None: # type: ignore def pre_exec(self): super().pre_exec() - # require input seeds - if self.input_seeds is None: - self.input_seeds = mkdtemp() - with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: - f.write(b'X') - L.info("Creating fake input seeds directory: %s", self.input_seeds) + self.push_dir = os.path.join(self.output_test_dir, "sync_dir") + self.pull_dir = self.push_dir + self.crash_dir = os.path.join(self.output_test_dir, "crashes") + + # resuming fuzzing + if len(os.listdir(self.output_test_dir)) > 1: + if not os.path.isdir(self.push_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + "No `sync_dir` directory inside.") + if not os.path.isdir(self.crash_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + "No `crashes` directory inside.") + + self.input_seeds = self.push_dir + L.info(f"Resuming fuzzing using seeds from {self.input_seeds} (skipping --input_seeds option).") + + else: + os.mkdir(self.push_dir) + os.mkdir(self.crash_dir) + + # create fake input seeds + if self.input_seeds is None: + self.create_fake_seeds() @property @@ -73,8 +90,11 @@ def cmd(self): # guaranteed arguments cmd_list.extend([ - "--output", self.output_test_dir, # auto-create, reusable "--workspace", self.output_test_dir, + "--output", self.push_dir, # auto-create, reusable + "--crashdir", self.crash_dir, + # "--logfile", os.path.join(self.output_test_dir, "hfuzz_log.txt"), + "--verbose", "--rlimit_rss", str(self.mem_limit), ]) diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index 827a2f7f..472171b8 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -66,6 +66,25 @@ def pre_exec(self) -> None: if self.blackbox is True: raise FuzzFrontendError("Blackbox fuzzing is not supported by libFuzzer.") + self.push_dir = os.path.join(self.output_test_dir, "sync_dir") + self.pull_dir = self.push_dir + self.crash_dir = os.path.join(self.output_test_dir, "crashes") + + # resuming fuzzing + if len(os.listdir(self.output_test_dir)) > 0: + if not os.path.isdir(self.push_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + "No `sync_dir` directory inside.") + if not os.path.isdir(self.crash_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + "No `crashes` directory inside.") + + self.input_seeds = None + L.info(f"Resuming fuzzing using seeds from {self.push_dir} (skipping --input_seeds option).") + else: + os.mkdir(self.push_dir) + os.mkdir(self.crash_dir) + @property def cmd(self): @@ -78,7 +97,10 @@ def cmd(self): # guaranteed arguments cmd_list.extend([ "-rss_limit_mb={}".format(self.mem_limit), - "-max_len={}".format(self.max_input_size) + "-max_len={}".format(self.max_input_size), + "-artifact_prefix={}".format(self.crash_dir + "/"), + # "-jobs={}".format(2), # crashes deepstate ;/ + "-workers={}".format(1) ]) for key, val in self.fuzzer_args: @@ -97,10 +119,8 @@ def cmd(self): if self.post_stats: cmd_list.append("-print_final_stats={}".format(1)) - cmd_list.append("-artifact_prefix={}".format("deepstate_")) - # must be here, this are positional args - cmd_list.append(self.output_test_dir) # no auto-create, reusable + cmd_list.append(self.push_dir) # no auto-create, reusable # not required, if provided: not auto-create and not require any files inside if self.input_seeds: From dc1c8f19f8ce229dd093ad1ab9252ff481622ca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 12 Feb 2020 16:21:50 +0100 Subject: [PATCH 03/46] fix angora compilation --- bin/deepstate/core/fuzz.py | 8 ++-- bin/deepstate/executors/fuzz/angora.py | 51 ++++++++++++++------------ 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 65dbfaef..10aded6e 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -23,6 +23,7 @@ import shutil import multiprocessing as mp +from tempfile import mkdtemp from multiprocessing.pool import ApplyResult from typing import Optional, Dict, List, Any, Tuple @@ -334,11 +335,8 @@ def compile(self, lib_path: str, flags: List[str], _out_bin: str, env = os.envir L.debug("Compilation command: %s", compile_cmd) # call compiler, and deal with exceptions accordingly - L.info("Compiling test harness `%s` with %s", self.compile_test, self.compiler_exe) - try: - subprocess.Popen(compile_cmd, env=env).communicate() - except BaseException as e: - raise FuzzFrontendError(f"{self.compiler_exe} interrupted due to exception:", e) + L.info("Compiling test harness `%s`", compile_cmd) + subprocess.Popen(compile_cmd, env=env).communicate() # extra check if target binary was successfully compiled, and set that as target binary out_bin = os.path.join(os.getcwd(), _out_bin) diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index a94ff666..7e0e51b7 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -19,7 +19,6 @@ import argparse import subprocess -from tempfile import mkdtemp from typing import List, Dict, Optional, Any from deepstate.core import FuzzerFrontend, FuzzFrontendError @@ -32,7 +31,7 @@ class Angora(FuzzerFrontend): # these classvars are set under the assumption that $ANGORA_PATH is set to the built source NAME = "Angora" - SEARCH_DIRS = ["bin", "clang+llvm", "tools"] + SEARCH_DIRS = ["bin", "tools"] EXECUTABLES = {"FUZZER": "angora_fuzzer", "COMPILER": "angora-clang++", "GEN_LIB_ABILIST": "gen_library_abilist.sh" @@ -87,7 +86,6 @@ def compile(self) -> None: # type: ignore out: bytes = subprocess.check_output(cmd) ignore_bufs += [out] - # write all to final out_file with open(out_file, "wb") as f: for buf in ignore_bufs: @@ -96,8 +94,26 @@ def compile(self) -> None: # type: ignore # set envvar for fuzzer compilers env["ANGORA_TAINT_RULE_LIST"] = os.path.abspath(out_file) + # make a binary with taint tracking information + # env["USE_PIN"] = "1" # TODO, add pin support + env["USE_TRACK"] = "1" + + taint_path: str = "/usr/local/lib/libdeepstate_taint.a" + L.debug("Static library path: %s", taint_path) + + taint_flags: List[str] = ["-ldeepstate_taint"] + if self.compiler_args: + taint_flags += [arg for arg in self.compiler_args.split(' ')] + L.info("Compiling %s for %s with taint tracking", self.compile_test, self.name) + super().compile(taint_path, taint_flags, self.out_test_name + ".taint", env=env) + + self.taint_binary = self.binary + self.binary = None + env.pop("USE_TRACK") # make a binary with light instrumentation + env["USE_FAST"] = "1" + fast_path: str = "/usr/local/lib/libdeepstate_fast.a" L.debug("Static library path: %s", fast_path) @@ -107,24 +123,13 @@ def compile(self) -> None: # type: ignore L.info("Compiling %s for %s with light instrumentation.", self.compile_test, self.name) super().compile(fast_path, fast_flags, self.out_test_name + ".fast", env=env) - # initialize envvar for instrumentation framework - if self.mode == "pin": # type: ignore - env["USE_PIN"] = "1" - else: - env["USE_TRACK"] = "1" - - # make a binary with taint tracking information - taint_path: str = "/usr/local/lib/libdeepstate_taint.a" - L.debug("Static library path: %s", taint_path) - - taint_flags: List[str] = ["-ldeepstate_taint"] - if self.compiler_args: - taint_flags += [arg for arg in self.compiler_args.split(' ')] - L.info("Compiling %s for %s with taint tracking", self.compile_test, self.name) - super().compile(taint_path, taint_flags, self.out_test_name + ".taint", env=env) - def pre_exec(self): + # correct version of clang is required + if self.env: + os.environ["PATH"] = ":".join((self.env, os.environ.get("PATH", ""))) + L.info(f"Adding `{self.env}` to $PATH.") + super().pre_exec() # since base method checks for self.binary by default @@ -137,10 +142,7 @@ def pre_exec(self): # require input seeds if self.input_seeds is None: - self.input_seeds = mkdtemp() - with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: - f.write(b'X') - L.info("Creating fake input seeds directory: %s", self.input_seeds) + self.create_fake_seeds() if self.blackbox is True: raise FuzzFrontendError(f"Blackbox fuzzing is not supported by {self.name}.") @@ -158,7 +160,8 @@ def cmd(self): "--mode", "llvm", # TODO, add pin support "--track", os.path.abspath(self.taint_binary), "--memory_limit", str(self.mem_limit), - "--output", self.output_test_dir # auto-create, not reusable + "--output", self.output_test_dir, # auto-create, not reusable + "--sync_afl" ]) for key, val in self.fuzzer_args: From d6ba18aef2a611c342ffb5d8cf87aa7f1a39ebc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 12 Feb 2020 17:30:10 +0100 Subject: [PATCH 04/46] fix eclipser compilator --- bin/deepstate/core/fuzz.py | 12 ++++++------ bin/deepstate/executors/fuzz/eclipser.py | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 10aded6e..29915e34 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -247,14 +247,14 @@ def _search_for_executable(self, exe_name): if self.env: for one_env_path in self.env.split(":"): for search_dir in [""] + self.search_dirs: - exe_path: Optional[str] = shutil.which(exe_name, path=os.path.join(one_env_path, search_dir)) + exe_path: Optional[str] = shutil.which(exe_name, mode=os.F_OK, path=os.path.join(one_env_path, search_dir)) if exe_path is not None: return exe_path # search in current dir and $PATH where_to_search = ['.', None] for search_env in where_to_search: - exe_path: Optional[str] = shutil.which(exe_name, path=search_env) + exe_path: Optional[str] = shutil.which(exe_name, mode=os.F_OK, path=search_env) if exe_path is not None: return exe_path @@ -361,13 +361,13 @@ def pre_exec(self): if self.parser is None: raise FuzzFrontendError("No arguments parsed yet. Call parse_args() before pre_exec().") + # search for executables and set proper variables + self._set_executables() + if self.fuzzer_help: self.print_help() sys.exit(0) - # search for executables and set proper variables - self._set_executables() - # if compile_test is set, call compile for user if self.compile_test: self.compile() @@ -405,7 +405,7 @@ def pre_exec(self): if len(os.listdir(self.input_seeds)) == 0: raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") - # require empty output directory + # require output directory L.debug("Output directory: %s", self.output_test_dir) if not self.output_test_dir: raise FuzzFrontendError("Must provide -o/--output_test_dir.") diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index f4a4706b..a7f83eeb 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -36,12 +36,13 @@ class Eclipser(FuzzerFrontend): NAME = "Eclipser" SEARCH_DIRS = ["build"] EXECUTABLES = {"FUZZER": "Eclipser.dll", - "COMPILER": "clang++" # for regular compilation + "COMPILER": "clang++", # for regular compilation + "RUNNER": "dotnet" } def print_help(self): - subprocess.call(["dotnet", self.fuzzer_exe, "fuzz", "--help"]) + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "fuzz", "--help"]) def compile(self) -> None: # type: ignore @@ -137,8 +138,8 @@ def post_exec(self) -> None: out: str = self.output_test_dir L.info("Performing post-processing decoding on testcases and crashes") - subprocess.call(["dotnet", self.fuzzer_exe, "decode", "-i", out + "/testcase", "-o", out + "/decoded"]) - subprocess.call(["dotnet", self.fuzzer_exe, "decode", "-i", out + "/crash", "-o", out + "/decoded"]) + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", "-i", out + "/testcase", "-o", out + "/decoded"]) + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", "-i", out + "/crash", "-o", out + "/decoded"]) for f in glob.glob(out + "/decoded/decoded_files/*"): shutil.copy(f, out) shutil.rmtree(out + "/decoded") @@ -160,7 +161,7 @@ def main(): try: fuzzer = Eclipser(envvar="ECLIPSER_HOME") fuzzer.parse_args() - fuzzer.run(compiler="dotnet") + fuzzer.run(compiler=fuzzer.EXECUTABLES["RUNNER"]) return 0 except FuzzFrontendError as e: L.error(e) From c056289afc817508df5623cb6c136be83d61925f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 12 Feb 2020 20:12:26 +0100 Subject: [PATCH 05/46] implement session resuming and base for syncing --- bin/deepstate/core/fuzz.py | 33 +++++++++++++++-- bin/deepstate/executors/fuzz/afl.py | 43 ++++++++--------------- bin/deepstate/executors/fuzz/angora.py | 30 +++++++++++----- bin/deepstate/executors/fuzz/eclipser.py | 15 +++++++- bin/deepstate/executors/fuzz/honggfuzz.py | 29 ++++++--------- bin/deepstate/executors/fuzz/libfuzzer.py | 24 ++++++------- 6 files changed, 104 insertions(+), 70 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 29915e34..adbf0552 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -24,6 +24,7 @@ import multiprocessing as mp from tempfile import mkdtemp +from pathlib import Path from multiprocessing.pool import ApplyResult from typing import Optional, Dict, List, Any, Tuple @@ -51,6 +52,7 @@ def __init__(self, envvar: str) -> None: - fuzzer_exe (fuzzer executable file) - env (environment variable name) - search_dirs (directories inside fuzzer home dir where to look for executables) + - require_seeds Inherits: - name (name for pretty printing) @@ -77,6 +79,8 @@ def __init__(self, envvar: str) -> None: # flag to ensure fuzzer processes do not persist self._on: bool = False + self.require_seeds: bool = False + # parsed argument attributes self.input_seeds: Optional[str] = None self.max_input_size: int = 8192 @@ -345,10 +349,27 @@ def compile(self, lib_path: str, flags: List[str], _out_bin: str, env = os.envir def create_fake_seeds(self): - self.input_seeds = mkdtemp() + if not self.input_seeds: + self.input_seeds = mkdtemp() with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: f.write(b'X') - L.info("Creating fake input seeds directory: %s", self.input_seeds) + L.info("Creating fake input seed file in directory `%s`", self.input_seeds) + + + def check_required_directories(self, required_dirs): + for required_dir in required_dirs: + if not os.path.isdir(required_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + f"No `{required_dir}` directory inside.") + + + def setup_new_session(self, dirs_to_create=[]): + for dir_to_create in dirs_to_create: + Path(dir_to_create).mkdir(parents=True, exist_ok=True) + L.debug(f"Creating directory {dir_to_create}.") + + if self.require_seeds is True and not self.input_seeds: + self.create_fake_seeds() def pre_exec(self): @@ -356,6 +377,14 @@ def pre_exec(self): Called before fuzzer execution in order to perform sanity checks. Base method contains default argument checks. Users should implement inherited method for any other environment checks or initializations before execution. + + Do: + - search for executables (update self.EXECUTABLES) + - may print fuzzer help (and exit) + - may compile + - check for targets (self.binary) + - may check for input_seeds + - check for output directory """ if self.parser is None: diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index 44a9668f..bc09c126 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -18,7 +18,6 @@ import argparse import shutil -from tempfile import mkdtemp from typing import List, Dict, Optional from deepstate.core import FuzzerFrontend, FuzzFrontendError @@ -59,6 +58,10 @@ def pre_exec(self): """ Perform argparse and environment-related sanity checks. """ + # check for afl-qemu-trace if in QEMU mode + if 'Q' in self.fuzzer_args or self.blackbox == True: + self.EXECUTABLES["AFL-QEMU-TRACE"] = "afl-qemu-trace" + super().pre_exec() # check if core dump pattern is set as `core` @@ -74,41 +77,24 @@ def pre_exec(self): if f_min.read() != f_max.read(): raise FuzzFrontendError("Suboptimal CPU scaling governor. Execute 'echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'") - # check for afl-qemu if in QEMU mode - if 'Q' in self.fuzzer_args or self.blackbox == True: - if not shutil.which('afl-qemu-trace'): - raise FuzzFrontendError("Must provide `afl-qemu-trace` executable in PATH") + # set input/output variables + # if we aren't in dumb mode, or we are using crash mode + if 'n' not in self.fuzzer_args or 'C' in self.fuzzer_args: + self.require_seeds = True sync_dir = os.path.join(self.output_test_dir, "sync_dir") - main_dir = os.path.join(self.output_test_dir, "the_afl") + main_dir = os.path.join(self.output_test_dir, "the_fuzzer") self.push_dir = os.path.join(sync_dir, "queue") self.pull_dir = os.path.join(main_dir, "queue") self.crash_dir = os.path.join(main_dir, "crashes") - # resuming fuzzing + # resume fuzzing if len(os.listdir(self.output_test_dir)) > 1: - if not os.path.isdir(sync_dir): - raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " - "No `sync_dir` directory inside.") - if not os.path.isdir(self.push_dir): - raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " - "No `sync_dir/queue` directory inside.") - if not os.path.isdir(main_dir): - raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " - "No `the_afl` directory inside.") - + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) self.input_seeds = '-' - L.info(f"Resuming fuzzing using seeds from {self.output_test_dir}/the_afl/queue " - "(skipping --input_seeds option).") - + L.info(f"Resuming fuzzing using seeds from {self.pull_dir} (skipping --input_seeds option).") else: - os.mkdir(sync_dir) - os.mkdir(self.push_dir) - - # create fake input seeds if we aren't in dumb mode, or we are using crash mode - if self.input_seeds is None: - if 'n' not in self.fuzzer_args or 'C' in self.fuzzer_args: - self.create_fake_seeds() + self.setup_new_session([self.push_dir]) @property @@ -118,7 +104,7 @@ def cmd(self): # guaranteed arguments cmd_list.extend([ "-o", self.output_test_dir, # auto-create, reusable - "-S", "the_afl" + "-M", "the_fuzzer" # TODO, detect when to use -S ]) if self.mem_limit == 0: @@ -220,6 +206,7 @@ def post_exec(self) -> None: and (TODO) performs crash triaging with seeds from both sync_dir and local queue. """ + # TODO: merge output_test_dir/the_fuzzer/crashes* into one dir if self.post_stats: print(f"\n{self.name} RUN STATS:\n") for stat, val in self.stats.items(): diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index 7e0e51b7..345817b9 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -31,10 +31,11 @@ class Angora(FuzzerFrontend): # these classvars are set under the assumption that $ANGORA_PATH is set to the built source NAME = "Angora" - SEARCH_DIRS = ["bin", "tools"] + SEARCH_DIRS = ["clang+llvm/bin", "bin", "tools"] EXECUTABLES = {"FUZZER": "angora_fuzzer", "COMPILER": "angora-clang++", - "GEN_LIB_ABILIST": "gen_library_abilist.sh" + "GEN_LIB_ABILIST": "gen_library_abilist.sh", + "CLANG_COMPILER": "clang++" } @@ -126,9 +127,10 @@ def compile(self) -> None: # type: ignore def pre_exec(self): # correct version of clang is required - if self.env: - os.environ["PATH"] = ":".join((self.env, os.environ.get("PATH", ""))) - L.info(f"Adding `{self.env}` to $PATH.") + self._set_executables() + clang_for_angora_path = os.path.dirname(self.EXECUTABLES["CLANG_COMPILER"]) + os.environ["PATH"] = ":".join((clang_for_angora_path, os.environ.get("PATH", ""))) + L.info(f"Adding `{clang_for_angora_path}` to $PATH.") super().pre_exec() @@ -140,9 +142,21 @@ def pre_exec(self): if not os.path.exists(self.taint_binary): raise FuzzFrontendError("Taint binary doesn't exist") - # require input seeds - if self.input_seeds is None: - self.create_fake_seeds() + # set input/output variables + self.require_seeds = True + sync_dir = os.path.join(self.output_test_dir, "sync_dir") + main_dir = os.path.join(self.output_test_dir, "angora") + self.push_dir = os.path.join(sync_dir, "queue") + self.pull_dir = os.path.join(main_dir, "queue") + self.crash_dir = os.path.join(main_dir, "crashes") + + # resume fuzzing + if len(os.listdir(self.output_test_dir)) > 1: + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) + self.input_seeds = '-' + L.info(f"Resuming fuzzing using seeds from {self.pull_dir} (skipping --input_seeds option).") + else: + self.setup_new_session([self.push_dir]) if self.blackbox is True: raise FuzzFrontendError(f"Blackbox fuzzing is not supported by {self.name}.") diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index a7f83eeb..afc6678c 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -64,6 +64,19 @@ def pre_exec(self) -> None: # TODO handle that somehow L.warning("Eclipser doesn't limit child processes memory.") + sync_dir = os.path.join(self.output_test_dir, "sync_dir") + main_dir = os.path.join(self.output_test_dir, "the_fuzzer") + self.push_dir = os.path.join(sync_dir, "queue") + self.pull_dir = os.path.join(main_dir, "testcase") + self.crash_dir = os.path.join(main_dir, "crash") + + # resume fuzzing + if len(os.listdir(self.output_test_dir)) > 1: + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) + L.info(f"Resuming fuzzing using seeds from {self.pull_dir} (skipping --input_seeds option).") + else: + self.setup_new_session([main_dir, self.push_dir]) + if self.blackbox == True: L.info("Blackbox option is redundant. Eclipser works on non-instrumented binaries using QEMU by default.") @@ -88,7 +101,7 @@ def cmd(self): "--src", "file", "--fixfilepath", "eclipser.input", "--initarg", " ".join(deepstate_args), - "--outputdir", self.output_test_dir, # auto-create, reusable + "--outputdir", os.path.join(self.output_test_dir, "the_fuzzer"), # auto-create, reusable ]) if self.max_input_size == 0: diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index 27cac13b..4d5237eb 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -16,7 +16,6 @@ import logging import argparse -from tempfile import mkdtemp from typing import List, Dict, Optional from deepstate.core import FuzzerFrontend, FuzzFrontendError @@ -57,31 +56,23 @@ def compile(self) -> None: # type: ignore def pre_exec(self): + self.require_seeds = True + super().pre_exec() - self.push_dir = os.path.join(self.output_test_dir, "sync_dir") + sync_dir = os.path.join(self.output_test_dir, "sync_dir") + main_dir = os.path.join(self.output_test_dir, "the_fuzzer") + self.push_dir = os.path.join(sync_dir, "queue") self.pull_dir = self.push_dir - self.crash_dir = os.path.join(self.output_test_dir, "crashes") + self.crash_dir = os.path.join(main_dir, "crashes") - # resuming fuzzing + # resume fuzzing if len(os.listdir(self.output_test_dir)) > 1: - if not os.path.isdir(self.push_dir): - raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " - "No `sync_dir` directory inside.") - if not os.path.isdir(self.crash_dir): - raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " - "No `crashes` directory inside.") - + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) self.input_seeds = self.push_dir - L.info(f"Resuming fuzzing using seeds from {self.input_seeds} (skipping --input_seeds option).") - + L.info(f"Resuming fuzzing using seeds from {self.push_dir} (skipping --input_seeds option).") else: - os.mkdir(self.push_dir) - os.mkdir(self.crash_dir) - - # create fake input seeds - if self.input_seeds is None: - self.create_fake_seeds() + self.setup_new_session([self.pull_dir, self.crash_dir]) @property diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index 472171b8..ed2b248f 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -66,24 +66,19 @@ def pre_exec(self) -> None: if self.blackbox is True: raise FuzzFrontendError("Blackbox fuzzing is not supported by libFuzzer.") - self.push_dir = os.path.join(self.output_test_dir, "sync_dir") + sync_dir = os.path.join(self.output_test_dir, "sync_dir") + main_dir = os.path.join(self.output_test_dir, "the_fuzzer") + self.push_dir = os.path.join(sync_dir, "queue") self.pull_dir = self.push_dir - self.crash_dir = os.path.join(self.output_test_dir, "crashes") + self.crash_dir = os.path.join(main_dir, "crashes") # resuming fuzzing if len(os.listdir(self.output_test_dir)) > 0: - if not os.path.isdir(self.push_dir): - raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " - "No `sync_dir` directory inside.") - if not os.path.isdir(self.crash_dir): - raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " - "No `crashes` directory inside.") - + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) self.input_seeds = None L.info(f"Resuming fuzzing using seeds from {self.push_dir} (skipping --input_seeds option).") else: - os.mkdir(self.push_dir) - os.mkdir(self.crash_dir) + self.setup_new_session([self.pull_dir, self.crash_dir]) @property @@ -100,7 +95,8 @@ def cmd(self): "-max_len={}".format(self.max_input_size), "-artifact_prefix={}".format(self.crash_dir + "/"), # "-jobs={}".format(2), # crashes deepstate ;/ - "-workers={}".format(1) + "-workers={}".format(1), + "-reload" ]) for key, val in self.fuzzer_args: @@ -129,6 +125,10 @@ def cmd(self): return cmd_list + def post_exec(self): + pass + + def main(): fuzzer = LibFuzzer(envvar="LIBFUZZER_HOME") return fuzzer.main() From 29bcbf24dfcbec8ee2d7edf5c8fd9048c389ea5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 12 Feb 2020 21:10:53 +0100 Subject: [PATCH 06/46] rename compiler (like "dotnet") to runner --- bin/deepstate/core/fuzz.py | 10 +++++----- bin/deepstate/executors/fuzz/eclipser.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index adbf0552..ae779602 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -522,12 +522,12 @@ def main(self): # Fuzzer process execution methods ############################################## - def run(self, compiler: Optional[str] = None, no_exec: bool = False): + def run(self, runner: Optional[str] = None, no_exec: bool = False): """ Interface for spawning and executing fuzzer jobs. Uses the configured `num_workers` in order to create a multiprocessing pool to parallelize fuzzers for execution in self._run. - :param compiler: if necessary, a compiler that is invoked before fuzzer executable (ie `dotnet`) + :param runner: if necessary, a runner that is invoked before fuzzer executable (ie `dotnet`) :param no_exec: skips pre- and post-processing steps during execution """ @@ -543,9 +543,9 @@ def run(self, compiler: Optional[str] = None, no_exec: bool = False): # initialize cmd from property command = [self.fuzzer_exe] + self.cmd # type: ignore - # prepend compiler that invokes fuzzer - if compiler: - command.insert(0, compiler) + # prepend runner that invokes fuzzer + if runner: + command.insert(0, runner) results: List[ApplyResult[int]] results_outputs: List[int] diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index afc6678c..a3fec6ae 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -174,7 +174,7 @@ def main(): try: fuzzer = Eclipser(envvar="ECLIPSER_HOME") fuzzer.parse_args() - fuzzer.run(compiler=fuzzer.EXECUTABLES["RUNNER"]) + fuzzer.run(runner=fuzzer.EXECUTABLES["RUNNER"]) return 0 except FuzzFrontendError as e: L.error(e) From e30bcf98c355e9e524e7133c5934c9fb89b35e55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 12 Feb 2020 21:42:57 +0100 Subject: [PATCH 07/46] compile output extension from NAME --- bin/deepstate/core/base.py | 2 +- bin/deepstate/core/fuzz.py | 2 ++ bin/deepstate/executors/fuzz/afl.py | 2 +- bin/deepstate/executors/fuzz/eclipser.py | 2 +- bin/deepstate/executors/fuzz/honggfuzz.py | 2 +- bin/deepstate/executors/fuzz/libfuzzer.py | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index e9daa61f..4b1c1a61 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -125,7 +125,7 @@ def parse_args(cls) -> Optional[argparse.Namespace]: compile_group.add_argument("--out_test_name", type=str, help=("Set name of generated instrumented binary. Default is `out`. " - "Automatically add `.frontend_name_lowercase` suffix.")) + "Automatically adds `.frontend_name_lowercase` suffix.")) compile_group.add_argument("--no_exit_compile", action="store_true", help="Continue execution after compiling a harness (set as default if `--config` is set).") diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index ae779602..e7cf34f0 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -318,6 +318,8 @@ def compile(self, lib_path: str, flags: List[str], _out_bin: str, env = os.envir :param env: optional envvars to set during compilation """ + _out_bin += f".{self.NAME.lower()}" + if self.compiler_exe is None: raise FuzzFrontendError(f"No compiler specified for compile-time instrumentation.") diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index bc09c126..ede9fada 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -51,7 +51,7 @@ def compile(self) -> None: # type: ignore flags += [arg for arg in self.compiler_args.split(" ")] flags.append("-ldeepstate_AFL") - super().compile(lib_path, flags, self.out_test_name + ".afl") + super().compile(lib_path, flags, self.out_test_name) def pre_exec(self): diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index a3fec6ae..d3d5bc2a 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -55,7 +55,7 @@ def compile(self) -> None: # type: ignore flags: List[str] = ["-ldeepstate"] if self.compiler_args: flags += [arg for arg in self.compiler_args.split(" ")] - super().compile(lib_path, flags, self.out_test_name + ".eclipser") + super().compile(lib_path, flags, self.out_test_name) def pre_exec(self) -> None: diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index 4d5237eb..cc30abf7 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -52,7 +52,7 @@ def compile(self) -> None: # type: ignore if self.compiler_args: flags += [arg for arg in self.compiler_args.split(" ")] - super().compile(lib_path, flags, self.out_test_name + ".hfuzz") + super().compile(lib_path, flags, self.out_test_name) def pre_exec(self): diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index ed2b248f..c9a6bea5 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -45,7 +45,7 @@ def compile(self) -> None: # type: ignore flags: List[str] = ["-ldeepstate_LF", "-fsanitize=fuzzer,undefined"] if self.compiler_args: flags += [arg for arg in self.compiler_args.split(" ")] - super().compile(lib_path, flags, self.out_test_name + ".lfuzz") + super().compile(lib_path, flags, self.out_test_name) def pre_exec(self) -> None: From bb5cb945d6b08b35149237df3018eb2083458ab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 12 Feb 2020 23:46:10 +0100 Subject: [PATCH 08/46] better statistic init --- bin/deepstate/core/base.py | 2 +- bin/deepstate/core/fuzz.py | 64 +++++++++++++++++++------- bin/deepstate/executors/fuzz/afl.py | 50 ++++---------------- bin/deepstate/executors/fuzz/angora.py | 62 +++++++++++++++++++------ 4 files changed, 104 insertions(+), 74 deletions(-) diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index 4b1c1a61..c39405cc 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -75,7 +75,7 @@ def __init__(self): # parsed argument attributes self.binary: str = None - self.output_test_dir: Optional[str] = None + self.output_test_dir: str self.timeout: int = 0 self.num_workers: int = 1 self.mem_limit: int = 50 diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index e7cf34f0..8661543d 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -53,6 +53,9 @@ def __init__(self, envvar: str) -> None: - env (environment variable name) - search_dirs (directories inside fuzzer home dir where to look for executables) - require_seeds + - stats (dict that frontend should populate in populate_stats method) + - stats_file (file where to put stats from fuzzer in common format) + - proc (handler to fuzzer process) Inherits: - name (name for pretty printing) @@ -79,7 +82,39 @@ def __init__(self, envvar: str) -> None: # flag to ensure fuzzer processes do not persist self._on: bool = False + self.proc: subprocess.Popen[bytes] self.require_seeds: bool = False + self.stats_file: str = 'deepstate-stats.txt' + + # same as AFL's (https://github.com/google/AFL/blob/master/docs/status_screen.txt) + self.stats: Dict[str, Optional[str]] = { + "last_update": None, + "start_time": None, + "fuzzer_pid": None, + "cycles_done": None, + "execs_done": None, + "execs_per_sec": None, + "paths_total": None, + "paths_favored": None, + "paths_found": None, + "paths_imported": None, + "max_depth": None, + "cur_path": None, + "pending_favs": None, + "pending_total": None, + "variable_paths": None, + "stability": None, + "bitmap_cvg": None, + "unique_crashes": None, + "unique_hangs": None, + "last_path": None, + "last_crash": None, + "last_hang": None, + "execs_since_crash": None, + "slowest_exec_ms": None, + "peak_rss_mb": None + "command_line": None, + } # parsed argument attributes self.input_seeds: Optional[str] = None @@ -387,6 +422,7 @@ def pre_exec(self): - check for targets (self.binary) - may check for input_seeds - check for output directory + - update stats_file path """ if self.parser is None: @@ -447,6 +483,9 @@ def pre_exec(self): if not os.path.isdir(self.output_test_dir): raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") + # update stats file + self.stats_file = os.path.join(self.output_test_dir, self.stats_file) + # check if we enabled seed synchronization, and initialize directory if self.enable_sync: if not os.path.isdir(self.sync_dir): @@ -526,8 +565,7 @@ def main(self): def run(self, runner: Optional[str] = None, no_exec: bool = False): """ - Interface for spawning and executing fuzzer jobs. Uses the configured `num_workers` in order to - create a multiprocessing pool to parallelize fuzzers for execution in self._run. + Interface for spawning and executing fuzzer job. :param runner: if necessary, a runner that is invoked before fuzzer executable (ie `dotnet`) :param no_exec: skips pre- and post-processing steps during execution @@ -549,22 +587,14 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): if runner: command.insert(0, runner) - results: List[ApplyResult[int]] - results_outputs: List[int] - mp.set_start_method('fork') - with mp.Pool(processes=self.num_workers) as pool: - results = [pool.apply_async(self._run, args=(command,)) for _ in range(self.num_workers)] - results_outputs = [result.get() for result in results] - - L.debug(results_outputs) - + result = self._run(command) # TODO: check results for failures + L.debug(result) # do post-fuzz operations if not no_exec: - if callable(getattr(self, "post_exec")): - L.info("Calling post-exec for fuzzer post-processing") - self.post_exec() + L.info("Calling post-exec for fuzzer post-processing") + self.post_exec() def _run(self, command: List[str]) -> int: @@ -578,7 +608,8 @@ def _run(self, command: List[str]) -> int: L.info("Executing command `%s`", command) self._on = True - self._start_time = int(time.time()) + self._start_time: int = int(time.time()) + self._command: str = ' '.join(command) try: @@ -708,8 +739,7 @@ def reporter(self): return NotImplementedError("Must implement in frontend subclass.") - @property - def stats(self): + def populate_stats(self): """ Parses out stats generated by fuzzer output. Should be implemented by user, and can return custom feedback. diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index ede9fada..9b40ac04 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -138,56 +138,24 @@ def cmd(self): return self.build_cmd(cmd_list) - @property - def stats(self) -> Dict[str, Optional[str]]: + def populate_stats(self): """ Retrieves and parses the stats file produced by AFL """ - stat_file: str = self.output_test_dir + "/fuzzer_stats" - with open(stat_file, "r") as sf: - lines = sf.readlines() - - stats: Dict[str, Optional[str]] = { - "last_update": None, - "start_time": None, - "fuzzer_pid": None, - "cycles_done": None, - "execs_done": None, - "execs_per_sec": None, - "paths_total": None, - "paths_favored": None, - "paths_found": None, - "paths_imported": None, - "max_depth": None, - "cur_path": None, - "pending_favs": None, - "pending_total": None, - "variable_paths": None, - "stability": None, - "bitmap_cvg": None, - "unique_crashes": None, - "unique_hangs": None, - "last_path": None, - "last_crash": None, - "last_hang": None, - "execs_since_crash": None, - "exec_timeout": None, - "afl_banner": None, - "afl_version": None, - "command_line": None - } - - for l in lines: - for k in stats.keys(): - if k in l: - stats[k] = l[19:].strip(": %\r\n") - return stats + stat_file_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "fuzzer_stats") + with open(stat_file_path, "r") as stat_file: + for line in stat_file: + key = line.split(":", 1)[0].strip() + value = line.split(":", 1)[1].strip() + if key in self.stats: + self.stats[key] = value def reporter(self) -> Dict[str, Optional[str]]: """ Report a summarized version of statistics, ideal for ensembler output. """ + self.populate_stats() return dict({ "Execs Done": self.stats["execs_done"], "Cycle Completed": self.stats["cycles_done"], diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index 345817b9..68113216 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -200,26 +200,58 @@ def cmd(self): return self.build_cmd(cmd_list) - @property - def stats(self) -> Optional[Dict[str, str]]: + def populate_stats(self): """ Parses Angora output JSON config to dict for reporting. """ - stat_file: str = self.output_test_dir + "/chart_stat.json" - - if not hasattr(self, "prev_stats"): - self.prev_stats: Optional[Dict[str, str]] = None + stat_file_path: str = os.path.join(self.output_test_dir, "angora", "fuzzer_stats") + with open(stat_file_path, "r") as stat_file: + self.stats["fuzzer_pid"] = stat_file.read().split(":", 1)[1].strip() + stat_file_path = os.path.join(self.output_test_dir, "angora", "chart_stat.json") + new_stats: Dict[str, str] = {} try: - with open(stat_file, "r") as handle: - stats: Optional[Dict[str, str]] = json.loads(handle.read()) - self.prev_stats = stats - - # fallback on initially parsed stats if failed to decode - except json.decoder.JSONDecodeError: - stats = self.prev_stats - - return stats + with open(stat_file_path, "r") as stat_file: + new_stats = json.loads(stat_file.read()) + except json.decoder.JSONDecodeError as e: + L.error(f"Error parsing {stat_file_path}: {e}.") + + previous_stats = self.stats.copy() + + if new_stats.get("init_time"): + self.stats["start_time"] = str(time() - int(new_stats.get("init_time"))) + elif self.proc: + self.stats["start_time"] = str(self._start_time) + + self.stats["last_update"] = os.path.getmtime(stat_file_path) + + self.stats["execs_done"] = new_stats.get("num_exec", None) + self.stats["execs_per_sec"] = new_stats.get("speed", None) + self.stats["paths_total"] = new_stats.get("num_inputs", None) + + self.stats["unique_crashes"] = new_stats.get("num_crashes", None) + self.stats["unique_hangs"] = new_stats.get("num_hangs", None) + + all_fuzz = [] + for one_fuzz in new_stats.get("fuzz", []): + time_key = one_fuzz.pop("time", {}) + s = time_key.get("secs", 0) + ns = time_key.get("nanos", 0) + t = float('{}.{:09d}'.format(s, ns)) + all_fuzz.append((t, one_fuzz)) + all_fuzz = sorted(all_fuzz, reversed=True) + + if len(all_fuzz) >= 2: + last_crash_execs = 0 + for one_fuzz in all_fuzz: + if one_fuzz.get("num_crashes") < self.stats["unique_crashes"]: + last_crash_execs = one_fuzz["num_exec"] + self.stats["execs_since_crash"] = self.stats["execs_done"] - last_crash_execs + + self.stats["command_line"] = self._command + self.stats["unique_hangs"] = new_stats.get("num_hangs", None) + self.stats["unique_hangs"] = new_stats.get("num_hangs", None) + self.stats["unique_hangs"] = new_stats.get("num_hangs", None) def reporter(self) -> Optional[Dict[str, Any]]: From 824bdff0daa7939ff7804598c1089a3e52b37574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Thu, 13 Feb 2020 12:41:22 +0100 Subject: [PATCH 09/46] better run method --- bin/deepstate/core/fuzz.py | 163 +++++++++++++------------ bin/deepstate/executors/fuzz/angora.py | 4 + 2 files changed, 90 insertions(+), 77 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 8661543d..3a536aa0 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -19,10 +19,12 @@ import time import sys import subprocess +import psutil import argparse import shutil import multiprocessing as mp +from contextlib import contextmanager from tempfile import mkdtemp from pathlib import Path from multiprocessing.pool import ApplyResult @@ -112,7 +114,7 @@ def __init__(self, envvar: str) -> None: "last_hang": None, "execs_since_crash": None, "slowest_exec_ms": None, - "peak_rss_mb": None + "peak_rss_mb": None, "command_line": None, } @@ -563,6 +565,18 @@ def main(self): # Fuzzer process execution methods ############################################## + + @contextmanager + def process(*args, **kwargs): + proc = subprocess.Popen(*args, **kwargs) + try: + yield proc + finally: + for child in psutil.Process(proc.pid).children(recursive=True): + child.kill() + proc.kill() + + def run(self, runner: Optional[str] = None, no_exec: bool = False): """ Interface for spawning and executing fuzzer job. @@ -587,9 +601,43 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): if runner: command.insert(0, runner) - result = self._run(command) - # TODO: check results for failures - L.debug(result) + L.info("Executing command `%s`", command) + self.start_time: int = int(time.time()) + self.command: str = ' '.join(command) + + # TODO: will need to restart the fuzzer in some cases + # like libFuzzer (stops on first crash found) or hoggfuzz (for synchronization) + while True: + try: + result = self._run(command) + + # any OS-specific errors encountered + except OSError as e: + raise FuzzFrontendError(f"{self.name} run interrupted due to OSError: {e}.") + + # SIGINT stops fuzzer, but continues frontend execution + except KeyboardInterrupt: + print(f"Stopped the {self.name} fuzzer.") + + # our exception, handle it somewhere ruther + except AnalysisBackendError as e: + raise e + + # bad things happed, inform user and proceed + except Exception: + import traceback + L.error("Exception catched during fuzzer run:") + L.error(traceback.format_exc()) + + # TODO: check results for failures + L.debug(result) + + # TODO: resume if needed + break + + # calculate total execution time + exec_time: float = round(time.time() - self.start_time, 2) + L.info("Fuzzer exec time: %ss", exec_time) # do post-fuzz operations if not no_exec: @@ -599,95 +647,52 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): def _run(self, command: List[str]) -> int: """ - Spawns a singular fuzzer process for execution with proper error-handling and foreground STDOUT output. - Also supports rsync-style seed synchronization if configured to share seeds between a global queue. + Spawns a singular fuzzer process for execution. :param command: list of arguments representing fuzzer command to execute. """ + self._start_time_one: int = int(time.time()) - L.info("Executing command `%s`", command) - - self._on = True - self._start_time: int = int(time.time()) - self._command: str = ' '.join(command) - - try: - - # if we are syncing seeds, we background the process and all of the output generated - if self.enable_sync or self.num_workers > 1: - self.proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - L.info("Starting fuzzer with seed synchronization with PID `%d`", self.proc.pid) - else: - self.proc = subprocess.Popen(command) - L.info("Starting fuzzer with PID `%d`", self.proc.pid) - - L.info("Fuzzer start time: %s", self._start_time) + with process(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as self.proc: + L.info("Starting fuzzer with PID `%d`", self.proc.pid) + L.info("Fuzzer one session start time: %s", self._start_time_one) # while fuzzers may configure timeout, subprocess can ensure exit and is useful when parallelizing # processes or doing ensemble-based testing. stdout, stderr = self.proc.communicate(timeout=self.timeout if self.timeout != 0 else None) - if self.proc.returncode != 0: - self._kill() - if self.enable_sync: - err = stdout if stderr is None else stderr - raise FuzzFrontendError(f"{self.name} run interrupted with non-zero return status. Message: {err.decode('utf-8')}") - else: - raise FuzzFrontendError(f"{self.name} run interrupted with non-zero return status. Error code: {self.proc.returncode}") - - # invoke ensemble if seed synchronization option is set - if self.enable_sync: - - # do not ensemble as fuzzer initializes - time.sleep(5) - self.sync_count = 0 - - # ensemble "event" loop - while self._is_alive(): - - L.debug("%s - Performing sync cycle %s", self.name, self.sync_count) - - # sleep for execution cycle - time.sleep(self.sync_cycle) - - # call ensemble to perform seed synchronization - self.ensemble() - # if sync_out argument set, output individual fuzzer statistics - # rather than have our ensembler report global stats - if self.sync_out: - print(f"\n{self.name} Fuzzer Stats\n") - for head, stat in self.reporter().items(): - print(f"{head}\t:\t{stat}") + # calculate total execution time + exec_time: float = round(time.time() - self._start_time_one, 2) + L.info("Fuzzer one session exec time: %ss", exec_time) - self.sync_count += 1 + return stdout, stderr + # invoke ensemble if seed synchronization option is set + # if self.enable_sync: - # any OS-specific errors encountered - except OSError as e: - self._kill() - raise FuzzFrontendError(f"{self.name} run interrupted due to exception {e}.") + # # do not ensemble as fuzzer initializes + # time.sleep(5) + # self.sync_count = 0 - # SIGINT stops fuzzer, but continues execution - except KeyboardInterrupt: - print(f"Killing fuzzer {self.name} with PID {self.proc.pid}") - self._kill() - return 1 + # # ensemble "event" loop + # while self._is_alive(): - except AnalysisBackendError as e: - raise e + # L.debug("%s - Performing sync cycle %s", self.name, self.sync_count) - except Exception: - import traceback - L.error(traceback.format_exc()) + # # sleep for execution cycle + # time.sleep(self.sync_cycle) - finally: - self._kill() + # # call ensemble to perform seed synchronization + # self.ensemble() - # calculate total execution time - exec_time: float = round(time.time() - self._start_time, 2) - L.info("Fuzzer exec time: %ss", exec_time) + # # if sync_out argument set, output individual fuzzer statistics + # # rather than have our ensembler report global stats + # if self.sync_out: + # print(f"\n{self.name} Fuzzer Stats\n") + # for head, stat in self.reporter().items(): + # print(f"{head}\t:\t{stat}") - return 0 + # self.sync_count += 1 def _is_alive(self) -> bool: @@ -715,12 +720,16 @@ def _kill(self) -> None: if not hasattr(self, "proc"): raise FuzzFrontendError("Attempted to kill non-running PID.") + if not self.proc or self._on is False: + return + self.proc.terminate() try: - self.proc.wait(timeout=0.5) + self.proc.communicate(timeout=1) L.info("Fuzzer subprocess exited with `%d`", self.proc.returncode) except subprocess.TimeoutExpired: - raise FuzzFrontendError("Subprocess could not terminate in time") + L.warning("Subprocess could not terminate in time, killing.") + self.proc.terminate() self._on = False diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index 68113216..dfc733e3 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -267,6 +267,10 @@ def reporter(self) -> Optional[Dict[str, Any]]: }) + def post_exec(self): + pass + + def main(): fuzzer = Angora(envvar="ANGORA_HOME") return fuzzer.main() From 20a06aac9110071968eb0fda436dce109fc97ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Thu, 13 Feb 2020 16:39:25 +0100 Subject: [PATCH 10/46] better run method - working --- bin/deepstate/core/base.py | 2 +- bin/deepstate/core/fuzz.py | 287 ++++++++++++---------- bin/deepstate/executors/fuzz/afl.py | 1 - bin/deepstate/executors/fuzz/angora.py | 43 ++-- bin/deepstate/executors/fuzz/eclipser.py | 9 +- bin/deepstate/executors/fuzz/honggfuzz.py | 28 +-- bin/deepstate/executors/fuzz/libfuzzer.py | 9 +- 7 files changed, 209 insertions(+), 170 deletions(-) diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index c39405cc..f70b9850 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -74,7 +74,7 @@ def __init__(self): AnalysisBackend.compiler_exe = self.EXECUTABLES.pop("COMPILER", None) # parsed argument attributes - self.binary: str = None + self.binary: Optional[str] = None self.output_test_dir: str self.timeout: int = 0 self.num_workers: int = 1 diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 3a536aa0..b3292bc8 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -19,15 +19,13 @@ import time import sys import subprocess -import psutil +import psutil # type: ignore import argparse import shutil -import multiprocessing as mp +import traceback -from contextlib import contextmanager from tempfile import mkdtemp from pathlib import Path -from multiprocessing.pool import ApplyResult from typing import Optional, Dict, List, Any, Tuple from deepstate.core.base import AnalysisBackend, AnalysisBackendError @@ -115,7 +113,7 @@ def __init__(self, envvar: str) -> None: "execs_since_crash": None, "slowest_exec_ms": None, "peak_rss_mb": None, - "command_line": None, + "command_line": None } # parsed argument attributes @@ -125,10 +123,12 @@ def __init__(self, envvar: str) -> None: self.exec_timeout: Optional[int] = None self.blackbox: Optional[bool] = None self.fuzzer_args: List[Any] = [] + self.fuzzer_out: bool = False self.enable_sync: bool = False self.sync_cycle: int = 5 self.sync_out: bool = True + self.sync_dir: Optional[str] = None self.push_dir: str = '' # push testcases from external sources here self.pull_dir: str = '' # pull new testcases from this dir @@ -203,6 +203,10 @@ def parse_args(cls) -> Optional[argparse.Namespace]: "--blackbox", action="store_true", help="Black-box fuzzing without compile-time instrumentation.") + parser.add_argument( + "--fuzzer_out", action="store_true", + help="Show fuzzer-specific output (graphical interface) instead of deepstate one.") + parser.add_argument( "--fuzzer_args", default=[], nargs='*', help="Flags to pass to the fuzzer. Format: `a arg1=val` -> `-a --arg val`.") @@ -566,15 +570,49 @@ def main(self): ############################################## - @contextmanager - def process(*args, **kwargs): - proc = subprocess.Popen(*args, **kwargs) + def manage(self): + # print and save statistics + if not self.fuzzer_out: + self.populate_stats() + self.print_stats() + + # invoke ensemble if seed synchronization option is set + if self.enable_sync: + L.debug("%s - Performing sync cycle %s", self.name, self.sync_count) + + # call ensemble to perform seed synchronization + self.ensemble() + + # if sync_out argument set, output individual fuzzer statistics + # rather than have our ensembler report global stats + if self.sync_out: + print(f"\n{self.name} Fuzzer Stats\n") + for head, stat in self.reporter().items(): + print(f"{head}\t:\t{stat}") + + self.sync_count += 1 + + + def cleanup(self): + if not self.proc: + return + + L.info(f"Killing process {self.proc.pid} and childs.") + + # terminate + for some_proc in psutil.Process(self.proc.pid).children(recursive=True) + [self.proc]: + some_proc.terminate() + + # hard kill + for some_proc in psutil.Process(self.proc.pid).children(recursive=True) + [self.proc]: try: - yield proc - finally: - for child in psutil.Process(proc.pid).children(recursive=True): - child.kill() - proc.kill() + some_proc.communicate(timeout=1) + L.info("Fuzzer subprocess (PID %d) exited with `%d`", some_proc.pid, some_proc.returncode) + except subprocess.TimeoutExpired: + L.warning("Subprocess (PID %d) could not terminate in time, killing.", some_proc.pid) + some_proc.kill() + + self.proc = None def run(self, runner: Optional[str] = None, no_exec: bool = False): @@ -604,33 +642,110 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): L.info("Executing command `%s`", command) self.start_time: int = int(time.time()) self.command: str = ' '.join(command) + self.sync_count = 0 - # TODO: will need to restart the fuzzer in some cases - # like libFuzzer (stops on first crash found) or hoggfuzz (for synchronization) - while True: - try: - result = self._run(command) + total_execution_time: int = 0 + wait_time: int = self.sync_cycle + run_fuzzer: bool = True + prev_log_level = L.level - # any OS-specific errors encountered - except OSError as e: - raise FuzzFrontendError(f"{self.name} run interrupted due to OSError: {e}.") - - # SIGINT stops fuzzer, but continues frontend execution - except KeyboardInterrupt: - print(f"Stopped the {self.name} fuzzer.") - - # our exception, handle it somewhere ruther - except AnalysisBackendError as e: - raise e - - # bad things happed, inform user and proceed - except Exception: - import traceback - L.error("Exception catched during fuzzer run:") + # run or resume fuzzer process as long as it is needed + # may create new processes continuously + while run_fuzzer: + run_one_fuzzer_process: bool = False + try: + if self.fuzzer_out: + # disable deepstate output + L.setLevel("ERROR") + self.proc = subprocess.Popen(command) + else: + self.proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + run_one_fuzzer_process = True + L.info("Started fuzzer process with PID %d.", self.proc.pid) + + except (OSError, ValueError): + L.setLevel(prev_log_level) L.error(traceback.format_exc()) - - # TODO: check results for failures - L.debug(result) + raise FuzzFrontendError("Exception during fuzzer startup.") + + # run-manage loop, until somethings happens (error, interrupt, fuzzer exits) + # use only one process + while run_one_fuzzer_process: + # general timeout + time_left = float('inf') + total_execution_time = int(time.time() - self.start_time) + if self.timeout != 0: + time_left = self.timeout - total_execution_time + if time_left < 0: + run_one_fuzzer_process = False + run_fuzzer = False + wait_time = 0 + L.info("Timeout") + + try: + # sleep/communicate for `self.sync_cycle` time + timeout_one_cycle: int = wait_time + if wait_time > time_left: + timeout_one_cycle = int(time_left) + + L.debug("One cycle `communicate` with timeout %d.", timeout_one_cycle) + stdout, stderr = self.proc.communicate(timeout=timeout_one_cycle) + + # fuzzer process exited, check return code + if self.proc.returncode != 0: + if stdout: + L.error(stdout.decode('utf8')) + if stderr: + L.error(stderr.decode('utf8')) + raise FuzzFrontendError(f"Fuzzer {self.name} (PID {self.proc.pid}) exited " + f"with return code {self.proc.returncode}.") + else: + L.info("Fuzzer %s (PID %d) exited with return code %d.", + self.name, self.proc.pid, self.proc.returncode) + run_one_fuzzer_process = False + + # Timeout, just continue to management step + except subprocess.TimeoutExpired: + L.debug("Once cycle timeout.") + + # Any OS-specific errors encountered + except OSError as e: + L.error("%s run interrupted due to OSError: %s.", self.name, e) + run_one_fuzzer_process = False + + # SIGINT stops fuzzer, but continues frontend execution + except KeyboardInterrupt: + L.info("Stopped the %s fuzzer.", self.name) + run_one_fuzzer_process = False + run_fuzzer = False + + # bad things happed, inform user and exit + except Exception: + L.error(traceback.format_exc()) + L.error("Exception during fuzzer %s run.", self.name) + run_one_fuzzer_process = False + run_fuzzer = False + + # manage + if run_one_fuzzer_process: + try: + L.debug("Management cycle starts after %ss.", total_execution_time) + self.manage() + + # error in management, exit + except Exception: + L.error(traceback.format_exc()) + L.error("Exception during fuzzer %s run.", self.name) + run_one_fuzzer_process = False + run_fuzzer = False + + # cleanup + try: + L.setLevel(prev_log_level) + self.cleanup() + except: + pass # TODO: resume if needed break @@ -645,95 +760,6 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): self.post_exec() - def _run(self, command: List[str]) -> int: - """ - Spawns a singular fuzzer process for execution. - - :param command: list of arguments representing fuzzer command to execute. - """ - self._start_time_one: int = int(time.time()) - - with process(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as self.proc: - L.info("Starting fuzzer with PID `%d`", self.proc.pid) - L.info("Fuzzer one session start time: %s", self._start_time_one) - - # while fuzzers may configure timeout, subprocess can ensure exit and is useful when parallelizing - # processes or doing ensemble-based testing. - stdout, stderr = self.proc.communicate(timeout=self.timeout if self.timeout != 0 else None) - - # calculate total execution time - exec_time: float = round(time.time() - self._start_time_one, 2) - L.info("Fuzzer one session exec time: %ss", exec_time) - - return stdout, stderr - - # invoke ensemble if seed synchronization option is set - # if self.enable_sync: - - # # do not ensemble as fuzzer initializes - # time.sleep(5) - # self.sync_count = 0 - - # # ensemble "event" loop - # while self._is_alive(): - - # L.debug("%s - Performing sync cycle %s", self.name, self.sync_count) - - # # sleep for execution cycle - # time.sleep(self.sync_cycle) - - # # call ensemble to perform seed synchronization - # self.ensemble() - - # # if sync_out argument set, output individual fuzzer statistics - # # rather than have our ensembler report global stats - # if self.sync_out: - # print(f"\n{self.name} Fuzzer Stats\n") - # for head, stat in self.reporter().items(): - # print(f"{head}\t:\t{stat}") - - # self.sync_count += 1 - - - def _is_alive(self) -> bool: - """ - Checks to see if fuzzer PID is running, but tossing SIGT (0) to see if we can - interact. Ideally used in an event loop during a running process. - """ - - if self._on: - return True - - try: - os.kill(self.proc.pid, 0) - except (OSError, ProcessLookupError): - return False - - return True - - - def _kill(self) -> None: - """ - Kills running fuzzer process. Can be used forcefully if - KeyboardInterrupt signal falls through and process continues execution. - """ - if not hasattr(self, "proc"): - raise FuzzFrontendError("Attempted to kill non-running PID.") - - if not self.proc or self._on is False: - return - - self.proc.terminate() - try: - self.proc.communicate(timeout=1) - L.info("Fuzzer subprocess exited with `%d`", self.proc.returncode) - except subprocess.TimeoutExpired: - L.warning("Subprocess could not terminate in time, killing.") - self.proc.terminate() - - self._on = False - - ############################################ # Auxiliary reporting and processing methods ############################################ @@ -756,6 +782,12 @@ def populate_stats(self): raise NotImplementedError("Must implement in frontend subclass.") + def print_stats(self): + for key, value in self.stats.items(): + if value: + print(f"{key}: {value}") + + def post_exec(self): """ Performs user-specified post-processing execution logic. Should be implemented by user, and can implement @@ -823,6 +855,9 @@ def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str implement any additional logic for determining whether to sync/get seeds as if in event loop. """ + if not self.sync_dir: + return + if global_queue is None: global_queue = self.sync_dir + "/" diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index 9b40ac04..7e197ea5 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -16,7 +16,6 @@ import os import logging import argparse -import shutil from typing import List, Dict, Optional diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index dfc733e3..94151be9 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -18,6 +18,8 @@ import logging import argparse import subprocess +import operator +import time from typing import List, Dict, Optional, Any @@ -216,21 +218,21 @@ def populate_stats(self): except json.decoder.JSONDecodeError as e: L.error(f"Error parsing {stat_file_path}: {e}.") - previous_stats = self.stats.copy() + # previous_stats = self.stats.copy() if new_stats.get("init_time"): - self.stats["start_time"] = str(time() - int(new_stats.get("init_time"))) + self.stats["start_time"] = str(int(time.time() - int(new_stats.get("init_time")))) elif self.proc: - self.stats["start_time"] = str(self._start_time) + self.stats["start_time"] = str(int(self.start_time)) - self.stats["last_update"] = os.path.getmtime(stat_file_path) + self.stats["last_update"] = str(int(os.path.getmtime(stat_file_path))) - self.stats["execs_done"] = new_stats.get("num_exec", None) - self.stats["execs_per_sec"] = new_stats.get("speed", None) - self.stats["paths_total"] = new_stats.get("num_inputs", None) + self.stats["execs_done"] = new_stats.get("num_exec", 0) + self.stats["execs_per_sec"] = new_stats.get("speed", [0])[0] + self.stats["paths_total"] = new_stats.get("num_inputs", 0) - self.stats["unique_crashes"] = new_stats.get("num_crashes", None) - self.stats["unique_hangs"] = new_stats.get("num_hangs", None) + self.stats["unique_crashes"] = new_stats.get("num_crashes", 0) + self.stats["unique_hangs"] = new_stats.get("num_hangs", 0) all_fuzz = [] for one_fuzz in new_stats.get("fuzz", []): @@ -239,19 +241,16 @@ def populate_stats(self): ns = time_key.get("nanos", 0) t = float('{}.{:09d}'.format(s, ns)) all_fuzz.append((t, one_fuzz)) - all_fuzz = sorted(all_fuzz, reversed=True) - - if len(all_fuzz) >= 2: - last_crash_execs = 0 - for one_fuzz in all_fuzz: - if one_fuzz.get("num_crashes") < self.stats["unique_crashes"]: - last_crash_execs = one_fuzz["num_exec"] - self.stats["execs_since_crash"] = self.stats["execs_done"] - last_crash_execs - - self.stats["command_line"] = self._command - self.stats["unique_hangs"] = new_stats.get("num_hangs", None) - self.stats["unique_hangs"] = new_stats.get("num_hangs", None) - self.stats["unique_hangs"] = new_stats.get("num_hangs", None) + all_fuzz = sorted(all_fuzz, key=operator.itemgetter(0), reverse=True) + + # if len(all_fuzz) >= 2: + # last_crash_execs = 0 + # for one_fuzz in all_fuzz: + # if one_fuzz.get("num_crashes") < self.stats["unique_crashes"]: + # last_crash_execs = one_fuzz["num_exec"] + # self.stats["execs_since_crash"] = self.stats["execs_done"] - last_crash_execs + + self.stats["command_line"] = self.command def reporter(self) -> Optional[Dict[str, Any]]: diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index d3d5bc2a..85b08a54 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -151,13 +151,18 @@ def post_exec(self) -> None: out: str = self.output_test_dir L.info("Performing post-processing decoding on testcases and crashes") - subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", "-i", out + "/testcase", "-o", out + "/decoded"]) - subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", "-i", out + "/crash", "-o", out + "/decoded"]) + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", "-i", self.pull_dir, "-o", out + "/decoded"]) + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", "-i", self.crash_dir, "-o", out + "/decoded"]) for f in glob.glob(out + "/decoded/decoded_files/*"): shutil.copy(f, out) shutil.rmtree(out + "/decoded") + def populate_stats(self): + crashes: int = len(os.listdir(self.crash_dir)) + self.stats["unique_crashes"] = str(crashes) + + def reporter(self) -> Dict[str, int]: """ TODO: report more metrics diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index cc30abf7..6eea7014 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -18,7 +18,7 @@ from typing import List, Dict, Optional -from deepstate.core import FuzzerFrontend, FuzzFrontendError +from deepstate.core import FuzzerFrontend L = logging.getLogger(__name__) @@ -123,18 +123,20 @@ def cmd(self): return self.build_cmd(cmd_list, input_symbol="___FILE___") - @property - def stats(self) -> Dict[str, Optional[str]]: + def populate_stats(self): """ Retrieves and parses the stats file produced by Honggfuzz """ - out_dir: str = os.path.abspath(self.output_test_dir) - report_file: str = "HONGGFUZZ.REPORT.TXT" + # add crash metrics + crashes: int = len(os.listdir(self.crash_dir)) + self.stats["unique_crashes"] = str(crashes) - # read report file generated by honggfuzz - stat_file: str = os.path.join(out_dir + report_file) - with open(stat_file, "r") as sf: - lines = sf.readlines() + stat_file_path: str = os.path.join(self.output_test_dir, "HONGGFUZZ.REPORT.TXT") + if not os.path.isfile(stat_file_path): + return + + with open(stat_file_path, "r") as stat_file: + lines = stat_file.readlines() stats: Dict[str, Optional[str]] = { "mutationsPerRun": None, @@ -164,14 +166,6 @@ def stats(self) -> Dict[str, Optional[str]]: if k in l: stats[k] = l.split(":")[1].strip() - # add crash metrics - crashes: int = len([name for name in os.listdir(out_dir) if name != report_file]) - stats.update({ - "CRASHES": str(crashes) - }) - - return stats - def reporter(self) -> Dict[str, Optional[str]]: """ diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index c9a6bea5..db2c3e0c 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -60,6 +60,8 @@ def pre_exec(self) -> None: super().pre_exec() # again, because we may had run compiler + if not self.binary: + raise FuzzFrontendError("Binary not set.") self.binary = os.path.abspath(self.binary) self.fuzzer_exe = self.binary # type: ignore @@ -96,7 +98,8 @@ def cmd(self): "-artifact_prefix={}".format(self.crash_dir + "/"), # "-jobs={}".format(2), # crashes deepstate ;/ "-workers={}".format(1), - "-reload" + "-reload=1", + "-runs=-1" ]) for key, val in self.fuzzer_args: @@ -125,6 +128,10 @@ def cmd(self): return cmd_list + def populate_stats(self): + pass + + def post_exec(self): pass From 3b4941cd40b27a00535fc194729a8d9c83fa1bfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 11:23:54 +0100 Subject: [PATCH 11/46] fix symex min_log_level --- bin/deepstate/__init__.py | 4 ++- bin/deepstate/core/base.py | 6 +++-- bin/deepstate/executors/symex/__init__.py | 31 +++++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/bin/deepstate/__init__.py b/bin/deepstate/__init__.py index 87f43acd..fe4bfe53 100644 --- a/bin/deepstate/__init__.py +++ b/bin/deepstate/__init__.py @@ -48,7 +48,9 @@ def __init__(self, name: str) -> None: log_level_from_env: str = os.environ.get("DEEPSTATE_LOG", "2") try: - logger.setLevel(LOG_LEVEL_INT_TO_STR[int(log_level_from_env)]) + log_level_from_env_int: int = int(log_level_from_env) + logger.setLevel(LOG_LEVEL_INT_TO_STR[log_level_from_env_int]) + logger.info("Setting log level from DEEPSTATE_LOG: %d", log_level_from_env_int) except ValueError: print("$DEEPSTATE_LOG contains invalid value `%s`, " "should be int in 0-6 (debug, trace, info, warning, error, external, critical).", diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index f70b9850..e4f26aca 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -184,6 +184,7 @@ def parse_args(cls) -> Optional[argparse.Namespace]: target_args_parsed.append((key, val)) _args['target_args'] = target_args_parsed + # if configuration is specified, parse and replace argument instantiations if args.config: _args.update(cls.build_from_config(args.config)) # type: ignore @@ -194,15 +195,16 @@ def parse_args(cls) -> Optional[argparse.Namespace]: del _args["config"] # log level fixing - if os.environ.get("DEEPSTATE_LOG", None) is None: + if not os.environ.get("DEEPSTATE_LOG"): if _args["min_log_level"] < 0 or _args["min_log_level"] > 6: raise AnalysisBackendError(f"`--min_log_level` is in invalid range, should be in 0-6 " "(debug, trace, info, warning, error, external, critical).") + L.warning("Setting log level from --min_log_level: %d", _args["min_log_level"]) logger = logging.getLogger("deepstate") logger.setLevel(LOG_LEVEL_INT_TO_STR[_args["min_log_level"]]) else: - L.info("Using log level from $DEEPSTATE_LOG.") + L.debug("Using log level from $DEEPSTATE_LOG.") cls._ARGS = args return cls._ARGS diff --git a/bin/deepstate/executors/symex/__init__.py b/bin/deepstate/executors/symex/__init__.py index e69de29b..2fcc3d5b 100644 --- a/bin/deepstate/executors/symex/__init__.py +++ b/bin/deepstate/executors/symex/__init__.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3.6 +# Copyright (c) 2019 Trail of Bits, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import pkgutil +import importlib + + +def import_fuzzers(pkg_name): + """ + dynamically load fuzzer frontends using importlib + """ + package = sys.modules[pkg_name] + return [ + importlib.import_module(pkg_name + '.' + submod) + for _, submod, _ in pkgutil.walk_packages(package.__path__) + ] + +__all__ = import_fuzzers(__name__) From fe299aee161132c9a2a8366d289b137a1af37738 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 11:24:11 +0100 Subject: [PATCH 12/46] fix symex output dir handling --- bin/deepstate/core/base.py | 2 +- bin/deepstate/core/symex.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index e4f26aca..5b1b2a3f 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -200,7 +200,7 @@ def parse_args(cls) -> Optional[argparse.Namespace]: raise AnalysisBackendError(f"`--min_log_level` is in invalid range, should be in 0-6 " "(debug, trace, info, warning, error, external, critical).") - L.warning("Setting log level from --min_log_level: %d", _args["min_log_level"]) + L.info("Setting log level from --min_log_level: %d", _args["min_log_level"]) logger = logging.getLogger("deepstate") logger.setLevel(LOG_LEVEL_INT_TO_STR[_args["min_log_level"]]) else: diff --git a/bin/deepstate/core/symex.py b/bin/deepstate/core/symex.py index c889206b..b93dfda7 100644 --- a/bin/deepstate/core/symex.py +++ b/bin/deepstate/core/symex.py @@ -24,7 +24,6 @@ from deepstate.core.base import AnalysisBackend -logging.setLoggerClass(DeepStateLogger) # fails without it, don't know why LOGGER = logging.getLogger(__name__) @@ -230,7 +229,7 @@ def begin_test(self, info): # Create the output directory for this test case. args = self.parse_args() - if args.output_test_dir is not None: + if args.output_test_dir: test_dir = os.path.join(args.output_test_dir, os.path.basename(info.file_name), info.name) From c85124461ae38cba45f8ff0aa9979bcf7eda2aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 11:38:55 +0100 Subject: [PATCH 13/46] fix symex tests (logrun) --- tests/logrun.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/tests/logrun.py b/tests/logrun.py index 553983e9..e685bb6b 100644 --- a/tests/logrun.py +++ b/tests/logrun.py @@ -2,15 +2,31 @@ import subprocess import time import sys +from tempfile import mkdtemp +from shutil import rmtree -def logrun(cmd, file, timeout): + +def logrun(cmd, file, timeout, break_callback=None): sys.stderr.write("\n\n" + ("=" * 80) + "\n") sys.stderr.write("RUNNING: ") sys.stderr.write(" ".join(cmd) + "\n\n") sys.stderr.flush() + + tmp_out_dir = None with open(file, 'w') as outf: + additional_args = [] + + # auto-create output dir + if set(cmd).isdisjoint({"-o", "--output_test_dir", "--out_test_name"}): + tmp_out_dir = mkdtemp(prefix="deepstate_logrun_") + additional_args.extend(["--output_test_dir", tmp_out_dir]) # create empty output dir + # We need to set log_level so we see ALL messages, for testing - p = subprocess.Popen(cmd + ["--min_log_level", "0"], stdout=outf, stderr=outf) + if "--min_log_level" not in cmd: + additional_args.extend(["--min_log_level", "0"]) + + p = subprocess.Popen(cmd + additional_args, stdout=outf, stderr=outf) + start = time.time() oldContents = "" lastOutput = time.time() @@ -19,16 +35,24 @@ def logrun(cmd, file, timeout): sys.stderr.write(".") sys.stderr.flush() lastOutput = time.time() + with open(file, 'r') as inf: contents = inf.read() + if len(contents) > len(oldContents): sys.stderr.write(contents[len(oldContents):]) sys.stderr.flush() oldContents = contents lastOutput = time.time() + + if break_callback and break_callback(contents): + break + time.sleep(0.05) + totalTime = time.time() - start sys.stderr.write("\n") + rv = (p.returncode, contents) if p.poll() is None: rv = ("TIMEOUT", contents) @@ -36,13 +60,14 @@ def logrun(cmd, file, timeout): rv = ("EXCEPTION RAISED", contents) if "internal error" in contents: rv = ("INTERNAL ERROR", contents) + sys.stderr.write("\nDONE\n\n") sys.stderr.write("TOTAL EXECUTION TIME: " + str(totalTime) + "\n") sys.stderr.write("RETURN VALUE: " + str(p.returncode) + "\n") sys.stderr.write("RETURNING AS RESULT: " + str(rv[0]) + "\n") sys.stderr.write("=" * 80 + "\n") - return rv - - + if tmp_out_dir: + rmtree(tmp_out_dir, ignore_errors=True) + return rv From 263c8e721ac2827d21e9625c9e1ae3d6ae8edc90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 11:46:44 +0100 Subject: [PATCH 14/46] symex testcases not save warning --- bin/deepstate/core/symex.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/deepstate/core/symex.py b/bin/deepstate/core/symex.py index b93dfda7..3d50beb4 100644 --- a/bin/deepstate/core/symex.py +++ b/bin/deepstate/core/symex.py @@ -242,6 +242,8 @@ def begin_test(self, info): LOGGER.critical("Cannot create test output directory: %s", test_dir) self.context['test_dir'] = test_dir + else: + LOGGER.warning("Argument `--output_test_dir` not given, will not save test cases.") def log_message(self, level, message): """Add `message` to the `level`-specific log as a `Stream` object for From bd44ff69ff884421961d640bece901a1b2019130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 12:31:45 +0100 Subject: [PATCH 15/46] add fuzzers tests --- bin/deepstate/__init__.py | 2 + bin/deepstate/core/fuzz.py | 6 ++- tests/deepstate_base.py | 10 ++++- tests/logrun.py | 62 +++++++++++++++++++----------- tests/test_fuzzers.py | 79 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 25 deletions(-) create mode 100644 tests/test_fuzzers.py diff --git a/bin/deepstate/__init__.py b/bin/deepstate/__init__.py index fe4bfe53..7016b47d 100644 --- a/bin/deepstate/__init__.py +++ b/bin/deepstate/__init__.py @@ -9,11 +9,13 @@ def __init__(self, name: str) -> None: logging.Logger.__init__(self, name=name) self.trace = functools.partial(self.log, 15) # type: ignore self.external = functools.partial(self.log, 45) # type: ignore + self.fuzz_stats = functools.partial(self.log, 46) # type: ignore logging.basicConfig() logging.addLevelName(15, "TRACE") logging.addLevelName(45, "EXTERNAL") +logging.addLevelName(46, "FUZZ_STATS") logging.setLoggerClass(DeepStateLogger) logger = logging.getLogger(__name__) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index b3292bc8..55c9309c 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -393,7 +393,7 @@ def compile(self, lib_path: str, flags: List[str], _out_bin: str, env = os.envir def create_fake_seeds(self): if not self.input_seeds: - self.input_seeds = mkdtemp() + self.input_seeds = mkdtemp(prefix="deepstate_fake_seed") with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: f.write(b'X') L.info("Creating fake input seed file in directory `%s`", self.input_seeds) @@ -656,9 +656,11 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): try: if self.fuzzer_out: # disable deepstate output + L.info("Using fuzzer output.") L.setLevel("ERROR") self.proc = subprocess.Popen(command) else: + L.info("Using DeepState output.") self.proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) run_one_fuzzer_process = True @@ -785,7 +787,7 @@ def populate_stats(self): def print_stats(self): for key, value in self.stats.items(): if value: - print(f"{key}: {value}") + L.fuzz_stats("%s:%s", key, value) def post_exec(self): diff --git a/tests/deepstate_base.py b/tests/deepstate_base.py index ca2249f6..dabf191c 100644 --- a/tests/deepstate_base.py +++ b/tests/deepstate_base.py @@ -10,4 +10,12 @@ def test_manticore(self): self.run_deepstate("deepstate-manticore") def run_deepstate(self, deepstate): - print("define an actual test of DeepState here.") + raise NotImplementedError("Define an actual test of DeepState in DeepStateTestCase:run_deepstate.") + + +class DeepStateFuzzerTestCase(TestCase): + def test_afl(self): + self.run_deepstate("deepstate-afl") + + def run_deepstate(self, deepstate): + raise NotImplementedError("Define an actual test of DeepState in DeepStateFuzzerTestCase:run_deepstate.") diff --git a/tests/logrun.py b/tests/logrun.py index e685bb6b..4723449d 100644 --- a/tests/logrun.py +++ b/tests/logrun.py @@ -4,6 +4,7 @@ import sys from tempfile import mkdtemp from shutil import rmtree +import psutil def logrun(cmd, file, timeout, break_callback=None): @@ -25,45 +26,62 @@ def logrun(cmd, file, timeout, break_callback=None): if "--min_log_level" not in cmd: additional_args.extend(["--min_log_level", "0"]) - p = subprocess.Popen(cmd + additional_args, stdout=outf, stderr=outf) + proc = subprocess.Popen(cmd + additional_args, stdout=outf, stderr=outf) + callback_break = False + oldContentLen = 0 start = time.time() - oldContents = "" lastOutput = time.time() - while (p.poll() is None) and ((time.time() - start) < timeout): - if (time.time() - lastOutput) > 300: - sys.stderr.write(".") + inf = open(file, 'r') + while (proc.poll() is None) and ((time.time() - start) < timeout): + inf.seek(0, 2) + newContentLen = inf.tell() + + if newContentLen > oldContentLen: + inf.seek(oldContentLen, 0) + newContent = inf.read() + sys.stderr.write(newContent) sys.stderr.flush() + oldContentLen = newContentLen lastOutput = time.time() - with open(file, 'r') as inf: - contents = inf.read() + if break_callback and break_callback(newContent): + callback_break = True + break - if len(contents) > len(oldContents): - sys.stderr.write(contents[len(oldContents):]) + if (time.time() - lastOutput) > 300: + sys.stderr.write(".") sys.stderr.flush() - oldContents = contents lastOutput = time.time() - if break_callback and break_callback(contents): - break - - time.sleep(0.05) + time.sleep(0.5) totalTime = time.time() - start sys.stderr.write("\n") - rv = (p.returncode, contents) - if p.poll() is None: - rv = ("TIMEOUT", contents) - if "Traceback (most recent call last)" in contents: - rv = ("EXCEPTION RAISED", contents) - if "internal error" in contents: - rv = ("INTERNAL ERROR", contents) + inf.seek(0, 0) + contents = inf.read() + inf.close() + + rv = [proc.returncode, contents] + if callback_break: + rv[0] = "CALLBACK_BREAK" + elif proc.poll() is None: + rv[0] = "TIMEOUT" + elif "Traceback (most recent call last)" in contents: + rv[0] = "EXCEPTION RAISED" + elif "internal error" in contents: + rv[0] = "INTERNAL ERROR" + + try: + for some_proc in psutil.Process(proc.pid).children(recursive=True) + [proc]: + some_proc.terminate() + except psutil.NoSuchProcess: + pass sys.stderr.write("\nDONE\n\n") sys.stderr.write("TOTAL EXECUTION TIME: " + str(totalTime) + "\n") - sys.stderr.write("RETURN VALUE: " + str(p.returncode) + "\n") + sys.stderr.write("RETURN VALUE: " + str(proc.returncode) + "\n") sys.stderr.write("RETURNING AS RESULT: " + str(rv[0]) + "\n") sys.stderr.write("=" * 80 + "\n") diff --git a/tests/test_fuzzers.py b/tests/test_fuzzers.py new file mode 100644 index 00000000..15bc703d --- /dev/null +++ b/tests/test_fuzzers.py @@ -0,0 +1,79 @@ +from __future__ import print_function +import deepstate_base +import logrun +from tempfile import mkdtemp, TemporaryDirectory, mkstemp +from pathlib import Path +from os import path +from glob import glob +import re +import sys + + +class CrashFuzzerTest(deepstate_base.DeepStateFuzzerTestCase): + def run_deepstate(self, deepstate): + def do_compile(tempdir, test_source_file): + """ + Compile test_source_file using frontend API + temdir is a workspace + """ + # prepare args + output_test_name = path.join(tempdir, Path(test_source_file).stem) + _, output_log_file = mkstemp(dir=tempdir) + arguments = [ + "--compile_test", test_source_file, + "--out_test_name", output_test_name + ] + + # run command + (r, output) = logrun.logrun([deepstate] + arguments, output_log_file, 360) + compiled_files = glob(output_test_name + '*') + + # check output + self.assertEqual(r, 0) + for compiled_file in compiled_files: + self.assertTrue(path.isfile(compiled_file)) + + # return compiled file(s) + # if Angora fuzzer, file.taint should be before file.fast + if any([compiled_file.endswith('.taint') for compiled_file in compiled_files]): + compiled_files = sorted(compiled_files, reverse=True) + return compiled_files + + + def break_callback(output): + """ + Check if some crash were found assuming that + fuzzer output is the deepstate one (--fuzzer_out == False) + """ + for crashes_stat in re.finditer(r"^FUZZ_STATS:.*:unique_crashes:(\d+)$", + output, re.MULTILINE): + if int(crashes_stat.group(1)) > 0: + return True + return False + + + def do_fuzz(tempdir, compiled_files): + """ + Fuzz compiled_files (single compiled test/harness or two files if Angora) + until first crash + """ + # prepare args + _, output_log_file = mkstemp(dir=tempdir) + output_test_dir = mkdtemp(dir=tempdir) + + arguments = [ + "--output_test_dir", output_test_dir + ] + compiled_files + + # run command + (r, output) = logrun.logrun([deepstate] + arguments, output_log_file, + 180, break_callback=break_callback) + + # check output + self.assertEqual(r, "CALLBACK_BREAK") + + + test_source_file = "examples/SimpleCrash.cpp" + with TemporaryDirectory(prefix="deepstate_test_fuzzers_") as tempdir: + compiled_files = do_compile(tempdir, test_source_file) + do_fuzz(tempdir, compiled_files) From afaa7d82d22089921d9c03f9caee7a0e6e1f310e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 15:35:11 +0100 Subject: [PATCH 16/46] fuzzer test, fixes, stats --- bin/deepstate/core/fuzz.py | 60 +++++++++++++++-------- bin/deepstate/executors/fuzz/afl.py | 1 + bin/deepstate/executors/fuzz/angora.py | 23 +++++---- bin/deepstate/executors/fuzz/eclipser.py | 49 ++++++++++++------ bin/deepstate/executors/fuzz/honggfuzz.py | 41 +--------------- bin/deepstate/executors/fuzz/libfuzzer.py | 33 ++++++++++++- tests/deepstate_base.py | 12 +++++ tests/logrun.py | 4 ++ tests/test_fuzzers.py | 6 +-- 9 files changed, 141 insertions(+), 88 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 55c9309c..a2d221f2 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -23,6 +23,7 @@ import argparse import shutil import traceback +import fcntl from tempfile import mkdtemp from pathlib import Path @@ -600,8 +601,12 @@ def cleanup(self): L.info(f"Killing process {self.proc.pid} and childs.") # terminate - for some_proc in psutil.Process(self.proc.pid).children(recursive=True) + [self.proc]: - some_proc.terminate() + try: + for some_proc in psutil.Process(self.proc.pid).children(recursive=True) + [self.proc]: + some_proc.terminate() + except psutil.NoSuchProcess: + self.proc = None + return # hard kill for some_proc in psutil.Process(self.proc.pid).children(recursive=True) + [self.proc]: @@ -611,6 +616,9 @@ def cleanup(self): except subprocess.TimeoutExpired: L.warning("Subprocess (PID %d) could not terminate in time, killing.", some_proc.pid) some_proc.kill() + except psutil.NoSuchProcess: + self.proc = None + return self.proc = None @@ -659,8 +667,11 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): L.info("Using fuzzer output.") L.setLevel("ERROR") self.proc = subprocess.Popen(command) + else: L.info("Using DeepState output.") + # TODO: frontends uses blocking read in `populate_stats`, + # we may replace PIPE with normal file and do reads non-blocking self.proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) run_one_fuzzer_process = True @@ -694,22 +705,25 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): L.debug("One cycle `communicate` with timeout %d.", timeout_one_cycle) stdout, stderr = self.proc.communicate(timeout=timeout_one_cycle) - # fuzzer process exited, check return code - if self.proc.returncode != 0: + # fuzzer process exited + # it's fine if returncode is 0 or 1 for libfuzzer + if self.proc.returncode == 0 or \ + (self.proc.returncode == 1 and self.name == "libFuzzer"): + L.info("Fuzzer %s (PID %d) exited with return code %d.", + self.name, self.proc.pid, self.proc.returncode) + run_one_fuzzer_process = False + + else: if stdout: L.error(stdout.decode('utf8')) if stderr: L.error(stderr.decode('utf8')) raise FuzzFrontendError(f"Fuzzer {self.name} (PID {self.proc.pid}) exited " f"with return code {self.proc.returncode}.") - else: - L.info("Fuzzer %s (PID %d) exited with return code %d.", - self.name, self.proc.pid, self.proc.returncode) - run_one_fuzzer_process = False # Timeout, just continue to management step except subprocess.TimeoutExpired: - L.debug("Once cycle timeout.") + L.debug("One cycle timeout.") # Any OS-specific errors encountered except OSError as e: @@ -730,17 +744,16 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): run_fuzzer = False # manage - if run_one_fuzzer_process: - try: - L.debug("Management cycle starts after %ss.", total_execution_time) - self.manage() - - # error in management, exit - except Exception: - L.error(traceback.format_exc()) - L.error("Exception during fuzzer %s run.", self.name) - run_one_fuzzer_process = False - run_fuzzer = False + try: + L.debug("Management cycle starts after %ss.", total_execution_time) + self.manage() + + # error in management, exit + except Exception: + L.error(traceback.format_exc()) + L.error("Exception during fuzzer %s run.", self.name) + run_one_fuzzer_process = False + run_fuzzer = False # cleanup try: @@ -781,13 +794,15 @@ def populate_stats(self): Parses out stats generated by fuzzer output. Should be implemented by user, and can return custom feedback. """ - raise NotImplementedError("Must implement in frontend subclass.") + crashes: int = len(os.listdir(self.crash_dir)) + self.stats["unique_crashes"] = str(crashes) def print_stats(self): for key, value in self.stats.items(): if value: L.fuzz_stats("%s:%s", key, value) + L.fuzz_stats("-"*30) def post_exec(self): @@ -796,6 +811,9 @@ def post_exec(self): things like crash triaging, testcase minimization (ie with `deepstate-reduce`), or any other manipulations with produced testcases. """ + # make sure that child processes are killed + self.cleanup() + raise NotImplementedError("Must implement in frontend subclass.") diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index 7e197ea5..7dee4cf5 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -141,6 +141,7 @@ def populate_stats(self): """ Retrieves and parses the stats file produced by AFL """ + super().populate_stats() stat_file_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "fuzzer_stats") with open(stat_file_path, "r") as stat_file: for line in stat_file: diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index 94151be9..7ffa084f 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -206,6 +206,8 @@ def populate_stats(self): """ Parses Angora output JSON config to dict for reporting. """ + super().populate_stats() + stat_file_path: str = os.path.join(self.output_test_dir, "angora", "fuzzer_stats") with open(stat_file_path, "r") as stat_file: self.stats["fuzzer_pid"] = stat_file.read().split(":", 1)[1].strip() @@ -231,17 +233,18 @@ def populate_stats(self): self.stats["execs_per_sec"] = new_stats.get("speed", [0])[0] self.stats["paths_total"] = new_stats.get("num_inputs", 0) - self.stats["unique_crashes"] = new_stats.get("num_crashes", 0) + if new_stats.get("num_crashes"): + self.stats["unique_crashes"] = new_stats.get("num_crashes") self.stats["unique_hangs"] = new_stats.get("num_hangs", 0) - all_fuzz = [] - for one_fuzz in new_stats.get("fuzz", []): - time_key = one_fuzz.pop("time", {}) - s = time_key.get("secs", 0) - ns = time_key.get("nanos", 0) - t = float('{}.{:09d}'.format(s, ns)) - all_fuzz.append((t, one_fuzz)) - all_fuzz = sorted(all_fuzz, key=operator.itemgetter(0), reverse=True) + # all_fuzz = [] + # for one_fuzz in new_stats.get("fuzz", []): + # time_key = one_fuzz.pop("time", {}) + # s = time_key.get("secs", 0) + # ns = time_key.get("nanos", 0) + # t = float('{}.{:09d}'.format(s, ns)) + # all_fuzz.append((t, one_fuzz)) + # all_fuzz = sorted(all_fuzz, key=operator.itemgetter(0), reverse=True) # if len(all_fuzz) >= 2: # last_crash_execs = 0 @@ -250,7 +253,7 @@ def populate_stats(self): # last_crash_execs = one_fuzz["num_exec"] # self.stats["execs_since_crash"] = self.stats["execs_done"] - last_crash_execs - self.stats["command_line"] = self.command + # self.stats["command_line"] = self.command def reporter(self) -> Optional[Dict[str, Any]]: diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index 85b08a54..ebfffcf8 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -67,15 +67,18 @@ def pre_exec(self) -> None: sync_dir = os.path.join(self.output_test_dir, "sync_dir") main_dir = os.path.join(self.output_test_dir, "the_fuzzer") self.push_dir = os.path.join(sync_dir, "queue") - self.pull_dir = os.path.join(main_dir, "testcase") - self.crash_dir = os.path.join(main_dir, "crash") + self.pull_dir = self.push_dir + self.crash_dir = os.path.join(main_dir, "crashes") # resume fuzzing if len(os.listdir(self.output_test_dir)) > 1: - self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) + self.check_required_directories([self.push_dir, self.crash_dir, + os.path.join(main_dir, "crash"), os.path.join(main_dir, "testcase")]) L.info(f"Resuming fuzzing using seeds from {self.pull_dir} (skipping --input_seeds option).") + self.decode_testcases() + self.input_seeds = self.push_dir else: - self.setup_new_session([main_dir, self.push_dir]) + self.setup_new_session([self.crash_dir, self.push_dir]) if self.blackbox == True: L.info("Blackbox option is redundant. Eclipser works on non-instrumented binaries using QEMU by default.") @@ -144,23 +147,41 @@ def ensemble(self) -> None: # type: ignore super().ensemble(local_queue) + def decode_testcases(self): + L.info("Performing decoding on testcases and crashes") + encoded_testcases_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "testcase") + encoded_crashes_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "crash") + decoded_path: str = os.path.join(self.output_test_dir, "decoded") + + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", + "-i", encoded_crashes_path, "-o", decoded_path], + stdout=subprocess.PIPE) + for f in glob.glob(os.path.join(decoded_path, "decoded_files", "*")): + shutil.copy(f, self.crash_dir) + shutil.rmtree(decoded_path) + + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", + "-i", encoded_testcases_path, "-o", decoded_path], + stdout=subprocess.PIPE) + for f in glob.glob(os.path.join(decoded_path, "decoded_files", "*")): + shutil.copy(f, self.pull_dir) + shutil.rmtree(decoded_path) + + + def manage(self): + self.decode_testcases() + super().manage() + + def post_exec(self) -> None: """ Decode and minimize testcases after fuzzing. """ - out: str = self.output_test_dir - - L.info("Performing post-processing decoding on testcases and crashes") - subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", "-i", self.pull_dir, "-o", out + "/decoded"]) - subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", "-i", self.crash_dir, "-o", out + "/decoded"]) - for f in glob.glob(out + "/decoded/decoded_files/*"): - shutil.copy(f, out) - shutil.rmtree(out + "/decoded") + self.decode_testcases() def populate_stats(self): - crashes: int = len(os.listdir(self.crash_dir)) - self.stats["unique_crashes"] = str(crashes) + super().populate_stats() def reporter(self) -> Dict[str, int]: diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index 6eea7014..8cb89a0d 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -85,7 +85,7 @@ def cmd(self): "--output", self.push_dir, # auto-create, reusable "--crashdir", self.crash_dir, # "--logfile", os.path.join(self.output_test_dir, "hfuzz_log.txt"), - "--verbose", + # "--verbose", "--rlimit_rss", str(self.mem_limit), ]) @@ -127,44 +127,7 @@ def populate_stats(self): """ Retrieves and parses the stats file produced by Honggfuzz """ - # add crash metrics - crashes: int = len(os.listdir(self.crash_dir)) - self.stats["unique_crashes"] = str(crashes) - - stat_file_path: str = os.path.join(self.output_test_dir, "HONGGFUZZ.REPORT.TXT") - if not os.path.isfile(stat_file_path): - return - - with open(stat_file_path, "r") as stat_file: - lines = stat_file.readlines() - - stats: Dict[str, Optional[str]] = { - "mutationsPerRun": None, - "externalCmd": None, - "fuzzStdin": None, - "timeout": None, - "ignoreAddr": None, - "ASLimit": None, - "RSSLimit": None, - "DATALimit": None, - "wordlistFile": None, - "fuzzTarget": None, - "ORIG_FNAME": None, - "FUZZ_FNAME": None, - "PID": None, - "SIGNAL": None, - "FAULT ADDRESS": None, - "INSTRUCTION": None, - "STACK HASH": None, - } - - # strip first 4 and last 5 lines to make a parseable file - lines = lines[4:][:-5] - - for l in lines: - for k in stats.keys(): - if k in l: - stats[k] = l.split(":")[1].strip() + super().populate_stats() def reporter(self) -> Dict[str, Optional[str]]: diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index db2c3e0c..a4da34cc 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -129,7 +129,38 @@ def cmd(self): def populate_stats(self): - pass + super().populate_stats() + if not self.proc or self.proc.stderr.closed: + return + + # libFuzzer under DeepState have broken output + # splitted into multiple lines, preceeded with "EXTERNAL:" + done_reading: bool = False + for line in self.proc.stderr.readlines(100): + if done_reading: + break + + if line.startswith(b"EXTERNAL: "): + line = line.split(b":", 1)[1].strip() + if line.startswith(b"#"): + # new event code + self.stats["execs_done"] = line.split()[0].strip(b"#").decode() + + for line in self.proc.stderr.readlines(100): + line = line.split(b":", 1)[1].strip() + if not line or line == b'\n': + done_reading = True + break + + if b": " in line: + key, value = line.split(b": ", 1) + if key == b"exec/s": + self.stats["execs_per_sec"] = value.decode() + elif key == b"units": + self.stats["paths_total"] = value.decode() + elif key == b"cov": + self.stats["bitmap_cvg"] = value.decode() + def post_exec(self): diff --git a/tests/deepstate_base.py b/tests/deepstate_base.py index dabf191c..75be4f56 100644 --- a/tests/deepstate_base.py +++ b/tests/deepstate_base.py @@ -17,5 +17,17 @@ class DeepStateFuzzerTestCase(TestCase): def test_afl(self): self.run_deepstate("deepstate-afl") + def test_libfuzzer(self): + self.run_deepstate("deepstate-libfuzzer") + + def test_honggfuzz(self): + self.run_deepstate("deepstate-honggfuzz") + + def test_angora(self): + self.run_deepstate("deepstate-angora") + + def test_eclipser(self): + self.run_deepstate("deepstate-eclipser") + def run_deepstate(self, deepstate): raise NotImplementedError("Define an actual test of DeepState in DeepStateFuzzerTestCase:run_deepstate.") diff --git a/tests/logrun.py b/tests/logrun.py index 4723449d..10b663e6 100644 --- a/tests/logrun.py +++ b/tests/logrun.py @@ -59,6 +59,10 @@ def logrun(cmd, file, timeout, break_callback=None): totalTime = time.time() - start sys.stderr.write("\n") + inf.seek(oldContentLen, 0) + newContent = inf.read() + sys.stderr.write(newContent) + sys.stderr.flush() inf.seek(0, 0) contents = inf.read() inf.close() diff --git a/tests/test_fuzzers.py b/tests/test_fuzzers.py index 15bc703d..4a10cf01 100644 --- a/tests/test_fuzzers.py +++ b/tests/test_fuzzers.py @@ -40,7 +40,7 @@ def do_compile(tempdir, test_source_file): return compiled_files - def break_callback(output): + def crash_found(output): """ Check if some crash were found assuming that fuzzer output is the deepstate one (--fuzzer_out == False) @@ -67,10 +67,10 @@ def do_fuzz(tempdir, compiled_files): # run command (r, output) = logrun.logrun([deepstate] + arguments, output_log_file, - 180, break_callback=break_callback) + 180, break_callback=crash_found) # check output - self.assertEqual(r, "CALLBACK_BREAK") + self.assertTrue(crash_found(output)) test_source_file = "examples/SimpleCrash.cpp" From fa5ac1e83ffeb6860cada39d2d51323eaac48a44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 16:04:00 +0100 Subject: [PATCH 17/46] fuzzer test CI --- .github/workflows/ci.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3703bfa4..0d612907 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,3 +88,10 @@ jobs: dockerfile: docker/Dockerfile push_image_and_stages: true # because we run workflow on PRs build_extra_args: "--cache-from=deepstate-base" + fuzzer: + needs: container + container: deepstate + steps: + - name: Test + run: | + nosetests tests/test_fuzzers.py From 0cc3d88f60ba354ad20844cf2f68d729bc1e2ef0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 16:05:18 +0100 Subject: [PATCH 18/46] fuzzer test CI - fix1 --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0d612907..125a183c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -89,6 +89,7 @@ jobs: push_image_and_stages: true # because we run workflow on PRs build_extra_args: "--cache-from=deepstate-base" fuzzer: + runs-on: ubuntu-latest needs: container container: deepstate steps: From a87d9f7c4446026ea1254b7c04e00a4b66f5f7d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 16:51:02 +0100 Subject: [PATCH 19/46] pyflakes errors fix --- bin/deepstate/core/fuzz.py | 1 - bin/deepstate/core/symex.py | 2 +- bin/deepstate/executors/fuzz/angora.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index a2d221f2..6d6596a3 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -23,7 +23,6 @@ import argparse import shutil import traceback -import fcntl from tempfile import mkdtemp from pathlib import Path diff --git a/bin/deepstate/core/symex.py b/bin/deepstate/core/symex.py index 3d50beb4..a6812564 100644 --- a/bin/deepstate/core/symex.py +++ b/bin/deepstate/core/symex.py @@ -19,7 +19,7 @@ import argparse import hashlib -from deepstate import (DeepStateLogger, LOG_LEVEL_INT_TO_LOGGER, +from deepstate import (LOG_LEVEL_INT_TO_LOGGER, LOG_LEVEL_TRACE, LOG_LEVEL_ERROR, LOG_LEVEL_CRITICAL) from deepstate.core.base import AnalysisBackend diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index 7ffa084f..a85c1c77 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -18,7 +18,6 @@ import logging import argparse import subprocess -import operator import time from typing import List, Dict, Optional, Any From d1b70f94a55aceeeb068e75f3ac3a79b844f7ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 16:51:21 +0100 Subject: [PATCH 20/46] fuzzer test CI - fix2 --- .github/workflows/ci.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 125a183c..18c33c69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,12 +87,8 @@ jobs: context: . dockerfile: docker/Dockerfile push_image_and_stages: true # because we run workflow on PRs - build_extra_args: "--cache-from=deepstate-base" - fuzzer: - runs-on: ubuntu-latest - needs: container - container: deepstate - steps: - - name: Test + build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=4" + - name: Test fuzzers + container: deepstate run: | - nosetests tests/test_fuzzers.py + sudo pip3 install nose && nosetests tests/test_fuzzers.py From a57b40fe71d48450e9385e62bf80bc1a8653cd75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 16:52:30 +0100 Subject: [PATCH 21/46] fuzzer test CI - fix3 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 18c33c69..7d9e9efa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,7 +88,7 @@ jobs: dockerfile: docker/Dockerfile push_image_and_stages: true # because we run workflow on PRs build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=4" - - name: Test fuzzers - container: deepstate + - container: deepstate + name: Test fuzzers run: | sudo pip3 install nose && nosetests tests/test_fuzzers.py From 2c1a268cbe2d0b3e792b050a596de27de7ad93fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 16:53:05 +0100 Subject: [PATCH 22/46] fuzzer test CI - fix4 --- .github/workflows/ci.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d9e9efa..3519690b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,7 +88,6 @@ jobs: dockerfile: docker/Dockerfile push_image_and_stages: true # because we run workflow on PRs build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=4" - - container: deepstate - name: Test fuzzers + - name: Test fuzzers run: | - sudo pip3 install nose && nosetests tests/test_fuzzers.py + docker run -it deepstate bash -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' From 2c52b9a02d2713eb9a82eb916a5945fc2f18d61e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 19:22:50 +0100 Subject: [PATCH 23/46] fuzzer test CI - fix5 --- .github/workflows/ci.yml | 2 +- docker/Dockerfile | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3519690b..2217be29 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,7 +87,7 @@ jobs: context: . dockerfile: docker/Dockerfile push_image_and_stages: true # because we run workflow on PRs - build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=4" + build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=2" - name: Test fuzzers run: | docker run -it deepstate bash -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' diff --git a/docker/Dockerfile b/docker/Dockerfile index 84033642..70fe83ce 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -27,8 +27,12 @@ FROM deepstate-base ENV DEPS_DIR /home/user/deps ARG make_j +# Angr, Manticore +RUN echo 'Installing angr and manticore' \ + && pip3 install z3-solver angr git+git://github.com/trailofbits/manticore.git --user + # Eclipser - not deepstate dependent -COPY --from=Eclipser /home/user/Eclipser $DEPS_DIR/eclipser +COPY --from=Eclipser /home/user/Eclipser/build $DEPS_DIR/eclipser RUN echo 'Eclipser - installing dotnet' \ && wget -q https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb \ && sudo dpkg -i packages-microsoft-prod.deb \ @@ -98,10 +102,8 @@ RUN echo 'Building deepstate with Honggfuzz' \ && make -j $make_j \ && sudo cp ./libdeepstate_HFUZZ.a /usr/local/lib/ -# Angr, Manticore -RUN echo 'Installing angr and manticore' \ - && pip3 install z3-solver angr git+git://github.com/trailofbits/manticore.git --user - ENV CXX=clang++ CC=clang +ENV AFL_HOME="$DEPS_DIR/afl" HONGGFUZZ_HOME="$DEPS_DIR/honggfuzz" \ + ANGORA_HOME="$DEPS_DIR/angora" ECLIPSER_HOME="$DEPS_DIR/eclipser" CMD ["/bin/bash"] \ No newline at end of file From 60003991bdddd7c1c2ffd746f78c92254f4c7033 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Fri, 14 Feb 2020 23:52:35 +0100 Subject: [PATCH 24/46] ensembling/syncing fuzzers init --- bin/deepstate/core/base.py | 4 - bin/deepstate/core/fuzz.py | 153 +++++++------- bin/deepstate/executors/fuzz/afl.py | 29 ++- bin/deepstate/executors/fuzz/angora.py | 14 +- bin/deepstate/executors/fuzz/eclipser.py | 20 +- bin/deepstate/executors/fuzz/honggfuzz.py | 19 +- bin/deepstate/executors/fuzz/libfuzzer.py | 45 ++--- examples/EnsembledCrash.cpp | 49 +++++ tests/test_fuzzers.py | 2 +- tests/test_fuzzers_sync.py | 231 ++++++++++++++++++++++ 10 files changed, 410 insertions(+), 156 deletions(-) create mode 100644 examples/EnsembledCrash.cpp create mode 100644 tests/test_fuzzers_sync.py diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index 5b1b2a3f..66039cac 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -147,10 +147,6 @@ def parse_args(cls) -> Optional[argparse.Namespace]: "-t", "--timeout", default=0, type=int, help="Time to kill analysis worker processes, in seconds (default is 0 for none).") - parser.add_argument( - "-w", "--num_workers", default=1, type=int, - help="Number of worker jobs to spawn for analysis (default is 1).") - parser.add_argument("--mem_limit", type=int, default=50, help="Child process memory limit in MiB (default is 50). 0 for unlimited.") diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 6d6596a3..a43caea4 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -46,6 +46,12 @@ class FuzzerFrontend(AnalysisBackend): Defines a base front-end object for using DeepState to interact with fuzzers. """ + REQUIRE_SEEDS: bool = False + + PUSH_DIR: str + PULL_DIR: str + CRASH_DIR: str + def __init__(self, envvar: str) -> None: """ Create and store variables: @@ -57,6 +63,10 @@ def __init__(self, envvar: str) -> None: - stats_file (file where to put stats from fuzzer in common format) - proc (handler to fuzzer process) + - push_dir (push testcases from external sources here) + - pull_dir (pull new testcases from this dir) + - crash_dir (crashes will be in this dir) + Inherits: - name (name for pretty printing) - compiler_exe (fuzzer compiler file, optional) @@ -113,7 +123,7 @@ def __init__(self, envvar: str) -> None: "execs_since_crash": None, "slowest_exec_ms": None, "peak_rss_mb": None, - "command_line": None + "sync_dir_size": None } # parsed argument attributes @@ -125,16 +135,14 @@ def __init__(self, envvar: str) -> None: self.fuzzer_args: List[Any] = [] self.fuzzer_out: bool = False - self.enable_sync: bool = False self.sync_cycle: int = 5 self.sync_out: bool = True self.sync_dir: Optional[str] = None - self.push_dir: str = '' # push testcases from external sources here - self.pull_dir: str = '' # pull new testcases from this dir - self.crash_dir: str = '' # crashes will be in this dir + self.push_dir: str = '' + self.pull_dir: str = '' + self.crash_dir: str = '' - self.post_stats: bool = False self.home_path: Optional[str] = None @@ -156,7 +164,6 @@ def parse_args(cls) -> Optional[argparse.Namespace]: - max_input_size (default: 8192B) - fuzzer_args (default: {}) - blackbox (default: False) - - post_stats (default: False) Optional arguments (may be None): - input_seeds @@ -215,28 +222,13 @@ def parse_args(cls) -> Optional[argparse.Namespace]: # Parallel / Ensemble Fuzzing ensemble_group = parser.add_argument_group("Parallel/Ensemble Fuzzing") ensemble_group.add_argument( - "--enable_sync", action="store_true", - help="Enable seed synchronization to another seed queue directory.") - - ensemble_group.add_argument( - "--sync_out", action="store_true", - help="When set, output individual fuzzer stat summary, instead of a global summary from the ensembler") - - ensemble_group.add_argument( - "--sync_dir", type=str, default="out_sync", - help="Directory representing seed queue for synchronization between local queue.") + "--sync_dir", type=str, + help="Directory representing seed queue for synchronization between fuzzers.") ensemble_group.add_argument( "--sync_cycle", type=int, default=5, help="Time in seconds the executor should sync to sync directory (default is 5 seconds).") - - # Post-processing - post_group = parser.add_argument_group("Execution Post-processing") - post_group.add_argument("--post_stats", action="store_true", - help="Output post-fuzzing statistics to user (if any).") - - # Miscellaneous options parser.add_argument( "--fuzzer_help", action="store_true", @@ -428,6 +420,7 @@ def pre_exec(self): - check for targets (self.binary) - may check for input_seeds - check for output directory + - check for sync_dir - update stats_file path """ @@ -492,12 +485,23 @@ def pre_exec(self): # update stats file self.stats_file = os.path.join(self.output_test_dir, self.stats_file) + # require seeds flag + self.require_seeds = self.REQUIRE_SEEDS + + # push/pull/crash paths + self.push_dir = os.path.join(self.output_test_dir, self.PUSH_DIR) + self.pull_dir = os.path.join(self.output_test_dir, self.PULL_DIR) + self.crash_dir = os.path.join(self.output_test_dir, self.CRASH_DIR) + # check if we enabled seed synchronization, and initialize directory - if self.enable_sync: + if self.sync_dir: + if not os.path.exists(self.sync_dir): + raise FuzzFrontendError(f"Seed synchronization dir (`{self.sync_dir}`) doesn't exist.") + if not os.path.isdir(self.sync_dir): - L.info("Initializing sync directory for ensembling seeds.") - os.mkdir(self.sync_dir) - L.debug("Sync directory: %s", self.sync_dir) + raise FuzzFrontendError(f"Seed synchronization dir (`{self.sync_dir}`) is not a directory.") + + L.info("Will synchronize seed using `%s` directory.", self.sync_dir) ################################## @@ -572,23 +576,20 @@ def main(self): def manage(self): # print and save statistics + self.populate_stats() + self.save_stats() if not self.fuzzer_out: - self.populate_stats() self.print_stats() - # invoke ensemble if seed synchronization option is set - if self.enable_sync: - L.debug("%s - Performing sync cycle %s", self.name, self.sync_count) + # invoke ensemble if sync_dir is provided + if self.sync_dir: + L.info("%s - Performing sync cycle %s", self.name, self.sync_count) # call ensemble to perform seed synchronization self.ensemble() - # if sync_out argument set, output individual fuzzer statistics - # rather than have our ensembler report global stats - if self.sync_out: - print(f"\n{self.name} Fuzzer Stats\n") - for head, stat in self.reporter().items(): - print(f"{head}\t:\t{stat}") + # update global statistics + self.stats["sync_dir_size"] = str(len(os.listdir(self.sync_dir))) self.sync_count += 1 @@ -804,6 +805,13 @@ def print_stats(self): L.fuzz_stats("-"*30) + def save_stats(self): + with open(self.stats_file, 'w') as f: + for key, value in self.stats.items(): + if value: + f.write(f"{key}:{value}\n") + + def post_exec(self): """ Performs user-specified post-processing execution logic. Should be implemented by user, and can implement @@ -813,59 +821,47 @@ def post_exec(self): # make sure that child processes are killed self.cleanup() - raise NotImplementedError("Must implement in frontend subclass.") - ################################### # Ensemble/Parallel Fuzzing methods ################################### - def _sync_seeds(self, mode: str, src: str, dest: str, excludes: List[str] = []) -> None: + def _sync_seeds(self, src: str, dest: str, excludes: List[str] = []) -> None: """ Helper that invokes rsync for convenient file syncing between two files. TODO(alan): implement functionality for syncing across servers. TODO(alan): consider implementing "native" syncing alongside current "rsync mode". - :param mode: str representing mode (either 'GET' or 'PUSH') :param src: path to source queue :param dest: path to destination queue :param excludes: list of string patterns for paths to ignore when rsync-ing """ - if not mode in ["GET", "PUSH"]: - raise FuzzFrontendError(f"Unknown mode for seed syncing: `{mode}`") - - rsync_cmd: List[str] = ["rsync", "-racz", "--ignore-existing"] + rsync_cmd: List[str] = [ + "rsync", + "--recursive", + "--archive", + "--checksum", + "--compress", + "--ignore-existing" + ] # subclass should invoke with list of pattern ignores if len(excludes) > 0: rsync_cmd += [f"--exclude={e}" for e in excludes] - if mode == "GET": - rsync_cmd += [dest, src] - elif mode == "PUSH": - rsync_cmd += [src, dest] + rsync_cmd += [ + os.path.join(src, ""), # append trailing / + dest + ] L.debug("rsync command: %s", rsync_cmd) try: subprocess.Popen(rsync_cmd) except subprocess.CalledProcessError as e: - raise FuzzFrontendError(f"{self.name} run interrupted due to exception {e}.") - - - @staticmethod - def _queue_len(queue_path: str) -> int: - """ - Helper that checks the number of seeds in queue, returns 0 if path doesn't - exist yet. - - :param queue_path: path to queue (ie AFL_out/queue/) - """ - if not os.path.exists(queue_path): - return 0 - return len([path for path in os.listdir(queue_path)]) + raise FuzzFrontendError(f"{self.name} rsync interrupted due to exception {e}.") def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str] = None): @@ -875,28 +871,25 @@ def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str """ if not self.sync_dir: + L.warning("Called `ensemble`, but `--sync_dir` not provided.") return - if global_queue is None: - global_queue = self.sync_dir + "/" - - global_len: int = self._queue_len(global_queue) - L.debug("Global seed queue: %s with %d files", global_queue, global_len) + global_queue: str = self.sync_dir + local_queue: str = self.push_dir - if local_queue is None: - local_queue = self.output_test_dir + "/queue/" + # check global queue + global_len: int = len(os.listdir(self.crash_dir)) + L.debug("Global seed queue: `%s` with %d files", global_queue, global_len) - local_len: int = self._queue_len(local_queue) - L.debug("Fuzzer local seed queue: %s with %d files", local_queue, local_len) + # update local queue with new findings + self._sync_seeds(src=self.pull_dir, dest=self.push_dir) - # sanity check: if global queue is empty, populate from local queue - if (global_len == 0) and (local_len > 0): - L.info("Nothing in global queue, pushing seeds from local queue") - self._sync_seeds("PUSH", local_queue, global_queue) - return + # check local queue + local_len: int = len(os.listdir(self.push_dir)) + L.debug("Fuzzer local seed queue: `%s` with %d files", local_queue, local_len) # get seeds from local to global queue, rsync will deal with duplicates - self._sync_seeds("GET", global_queue, local_queue) + self._sync_seeds(src=local_queue, dest=global_queue) # push seeds from global queue to local, rsync will deal with duplicates - self._sync_seeds("PUSH", global_queue, local_queue) + self._sync_seeds(src=global_queue, dest=local_queue) diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index 7dee4cf5..915f77ca 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -33,6 +33,12 @@ class AFL(FuzzerFrontend): "COMPILER": "afl-clang++" } + REQUIRE_SEEDS = True + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("the_fuzzer", "queue") + CRASH_DIR = os.path.join("the_fuzzer", "crashes") + @classmethod def parse_args(cls) -> None: parser: argparse.ArgumentParser = argparse.ArgumentParser( @@ -76,16 +82,9 @@ def pre_exec(self): if f_min.read() != f_max.read(): raise FuzzFrontendError("Suboptimal CPU scaling governor. Execute 'echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'") - # set input/output variables - # if we aren't in dumb mode, or we are using crash mode - if 'n' not in self.fuzzer_args or 'C' in self.fuzzer_args: - self.require_seeds = True - - sync_dir = os.path.join(self.output_test_dir, "sync_dir") - main_dir = os.path.join(self.output_test_dir, "the_fuzzer") - self.push_dir = os.path.join(sync_dir, "queue") - self.pull_dir = os.path.join(main_dir, "queue") - self.crash_dir = os.path.join(main_dir, "crashes") + # if we are in dumb mode and we are not using crash mode + if 'n' in self.fuzzer_args and 'C' not in self.fuzzer_args: + self.require_seeds = False # resume fuzzing if len(os.listdir(self.output_test_dir)) > 1: @@ -164,8 +163,8 @@ def reporter(self) -> Dict[str, Optional[str]]: }) - def _sync_seeds(self, mode, src, dest, excludes=["*.cur_input"]) -> None: - super()._sync_seeds(mode, src, dest, excludes=excludes) + def _sync_seeds(self, src, dest, excludes=["*.cur_input", ".state"]) -> None: + super()._sync_seeds(src, dest, excludes=excludes) def post_exec(self) -> None: @@ -175,11 +174,7 @@ def post_exec(self) -> None: both sync_dir and local queue. """ # TODO: merge output_test_dir/the_fuzzer/crashes* into one dir - if self.post_stats: - print(f"\n{self.name} RUN STATS:\n") - for stat, val in self.stats.items(): - fstat: str = stat.replace("_", " ").upper() - print(f"{fstat}:\t\t\t{val}") + super().post_exec() def main(): diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index a85c1c77..f38a080e 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -39,6 +39,12 @@ class Angora(FuzzerFrontend): "CLANG_COMPILER": "clang++" } + REQUIRE_SEEDS = True + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("angora", "queue") + CRASH_DIR = os.path.join("angora", "crashes") + @classmethod def parse_args(cls) -> None: @@ -143,14 +149,6 @@ def pre_exec(self): if not os.path.exists(self.taint_binary): raise FuzzFrontendError("Taint binary doesn't exist") - # set input/output variables - self.require_seeds = True - sync_dir = os.path.join(self.output_test_dir, "sync_dir") - main_dir = os.path.join(self.output_test_dir, "angora") - self.push_dir = os.path.join(sync_dir, "queue") - self.pull_dir = os.path.join(main_dir, "queue") - self.crash_dir = os.path.join(main_dir, "crashes") - # resume fuzzing if len(os.listdir(self.output_test_dir)) > 1: self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index ebfffcf8..592856a8 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -40,6 +40,12 @@ class Eclipser(FuzzerFrontend): "RUNNER": "dotnet" } + REQUIRE_SEEDS = False + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("sync_dir", "queue") + CRASH_DIR = os.path.join("the_fuzzer", "crashes") + def print_help(self): subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "fuzz", "--help"]) @@ -66,14 +72,14 @@ def pre_exec(self) -> None: sync_dir = os.path.join(self.output_test_dir, "sync_dir") main_dir = os.path.join(self.output_test_dir, "the_fuzzer") - self.push_dir = os.path.join(sync_dir, "queue") - self.pull_dir = self.push_dir - self.crash_dir = os.path.join(main_dir, "crashes") + + self.encoded_testcases_dir: str = os.path.join(self.output_test_dir, "the_fuzzer", "testcase") + self.encoded_crash_dir: str = os.path.join(self.output_test_dir, "the_fuzzer", "crash") # resume fuzzing if len(os.listdir(self.output_test_dir)) > 1: self.check_required_directories([self.push_dir, self.crash_dir, - os.path.join(main_dir, "crash"), os.path.join(main_dir, "testcase")]) + self.encoded_crash_dir, self.encoded_testcases_dir]) L.info(f"Resuming fuzzing using seeds from {self.pull_dir} (skipping --input_seeds option).") self.decode_testcases() self.input_seeds = self.push_dir @@ -149,19 +155,17 @@ def ensemble(self) -> None: # type: ignore def decode_testcases(self): L.info("Performing decoding on testcases and crashes") - encoded_testcases_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "testcase") - encoded_crashes_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "crash") decoded_path: str = os.path.join(self.output_test_dir, "decoded") subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", - "-i", encoded_crashes_path, "-o", decoded_path], + "-i", self.encoded_crash_dir, "-o", decoded_path], stdout=subprocess.PIPE) for f in glob.glob(os.path.join(decoded_path, "decoded_files", "*")): shutil.copy(f, self.crash_dir) shutil.rmtree(decoded_path) subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", - "-i", encoded_testcases_path, "-o", decoded_path], + "-i", self.encoded_testcases_dir, "-o", decoded_path], stdout=subprocess.PIPE) for f in glob.glob(os.path.join(decoded_path, "decoded_files", "*")): shutil.copy(f, self.pull_dir) diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index 8cb89a0d..7e55e391 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -31,6 +31,12 @@ class Honggfuzz(FuzzerFrontend): "COMPILER": "hfuzz-clang++" } + REQUIRE_SEEDS = True + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("sync_dir", "queue") + CRASH_DIR = os.path.join("the_fuzzer", "crashes") + @classmethod def parse_args(cls) -> None: @@ -56,16 +62,8 @@ def compile(self) -> None: # type: ignore def pre_exec(self): - self.require_seeds = True - super().pre_exec() - sync_dir = os.path.join(self.output_test_dir, "sync_dir") - main_dir = os.path.join(self.output_test_dir, "the_fuzzer") - self.push_dir = os.path.join(sync_dir, "queue") - self.pull_dir = self.push_dir - self.crash_dir = os.path.join(main_dir, "crashes") - # resume fuzzing if len(os.listdir(self.output_test_dir)) > 1: self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) @@ -141,10 +139,7 @@ def reporter(self) -> Dict[str, Optional[str]]: def post_exec(self) -> None: - if self.post_stats: - print("\n") - for k, v in self.stats.items(): - print(f"{k} : {v}") + super().post_exec() def main(): diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index a4da34cc..ae3fb409 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -30,6 +30,12 @@ class LibFuzzer(FuzzerFrontend): "COMPILER": "clang++" } + REQUIRE_SEEDS = False + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("sync_dir", "queue") + CRASH_DIR = os.path.join("the_fuzzer", "crashes") + @classmethod def parse_args(cls) -> None: parser: argparse.ArgumentParser = argparse.ArgumentParser( @@ -68,12 +74,6 @@ def pre_exec(self) -> None: if self.blackbox is True: raise FuzzFrontendError("Blackbox fuzzing is not supported by libFuzzer.") - sync_dir = os.path.join(self.output_test_dir, "sync_dir") - main_dir = os.path.join(self.output_test_dir, "the_fuzzer") - self.push_dir = os.path.join(sync_dir, "queue") - self.pull_dir = self.push_dir - self.crash_dir = os.path.join(main_dir, "crashes") - # resuming fuzzing if len(os.listdir(self.output_test_dir)) > 0: self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) @@ -99,7 +99,8 @@ def cmd(self): # "-jobs={}".format(2), # crashes deepstate ;/ "-workers={}".format(1), "-reload=1", - "-runs=-1" + "-runs=-1", + "-print_final_stats=1" ]) for key, val in self.fuzzer_args: @@ -115,9 +116,6 @@ def cmd(self): if self.exec_timeout: cmd_list.append("-timeout={}".format(self.exec_timeout / 1000)) - if self.post_stats: - cmd_list.append("-print_final_stats={}".format(1)) - # must be here, this are positional args cmd_list.append(self.push_dir) # no auto-create, reusable @@ -130,7 +128,7 @@ def cmd(self): def populate_stats(self): super().populate_stats() - if not self.proc or self.proc.stderr.closed: + if not self.proc or not self.proc.stderr or self.proc.stderr.closed: return # libFuzzer under DeepState have broken output @@ -146,21 +144,16 @@ def populate_stats(self): # new event code self.stats["execs_done"] = line.split()[0].strip(b"#").decode() - for line in self.proc.stderr.readlines(100): - line = line.split(b":", 1)[1].strip() - if not line or line == b'\n': - done_reading = True - break - - if b": " in line: - key, value = line.split(b": ", 1) - if key == b"exec/s": - self.stats["execs_per_sec"] = value.decode() - elif key == b"units": - self.stats["paths_total"] = value.decode() - elif key == b"cov": - self.stats["bitmap_cvg"] = value.decode() - + elif ":" in line: + line = line.split(b":", 1)[1].strip() + if b": " in line: + key, value = line.split(b": ", 1) + if key == b"exec/s": + self.stats["execs_per_sec"] = value.decode() + elif key == b"units": + self.stats["paths_total"] = value.decode() + elif key == b"cov": + self.stats["bitmap_cvg"] = value.decode() def post_exec(self): diff --git a/examples/EnsembledCrash.cpp b/examples/EnsembledCrash.cpp new file mode 100644 index 00000000..ba458b15 --- /dev/null +++ b/examples/EnsembledCrash.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include + +using namespace deepstate; + +DEEPSTATE_NOINLINE static void segfault(char *first, char* second) { + std::size_t hashed = std::hash{}(first); + std::size_t hashed2 = std::hash{}(second); + unsigned *p = NULL; + if (hashed == 7169420828666634849U) { + if (hashed2 == 10753164746288518855U) { + *(p+2) = 0xdeadbeef; /* second crash */ + } + *(p+1) = 0xdeadbabe; /* first crash */ + } +} + +TEST(SimpleCrash, SegFault) { + char *first = (char*)DeepState_Malloc(9); + char *second = (char*)DeepState_Malloc(9); + read(0, first, 9); + read(0, second, 9); + for (int i = 0; i < 9; ++i) + printf("%02x", (unsigned char)first[i]); + printf("\n"); + for (int i = 0; i < 9; ++i) + printf("%02x", (unsigned char)second[i]); + + segfault(first, second); + + ASSERT_EQ(first, first); + ASSERT_NE(first, second); +} diff --git a/tests/test_fuzzers.py b/tests/test_fuzzers.py index 4a10cf01..5fd694ad 100644 --- a/tests/test_fuzzers.py +++ b/tests/test_fuzzers.py @@ -35,7 +35,7 @@ def do_compile(tempdir, test_source_file): # return compiled file(s) # if Angora fuzzer, file.taint should be before file.fast - if any([compiled_file.endswith('.taint') for compiled_file in compiled_files]): + if any([compiled_file.endswith('.taint.angora') for compiled_file in compiled_files]): compiled_files = sorted(compiled_files, reverse=True) return compiled_files diff --git a/tests/test_fuzzers_sync.py b/tests/test_fuzzers_sync.py new file mode 100644 index 00000000..7113f191 --- /dev/null +++ b/tests/test_fuzzers_sync.py @@ -0,0 +1,231 @@ +from __future__ import print_function + +import base64 +import deepstate_base +import logrun +import os +import re +import subprocess +import sys +import time + +from base64 import b64decode +from glob import glob +from os import path +from pathlib import Path +from shutil import rmtree +from tempfile import TemporaryDirectory +from tempfile import mkdtemp +from tempfile import mkstemp +from time import sleep +from unittest import TestCase + + +class CrashFuzzerTest(TestCase): + def test_fuzzers_synchronization(self): + def do_compile(fuzzer, tempdir, test_source_file): + """ + Compile test_source_file using frontend API + temdir is a workspace + """ + print(f"Compiling testcase for fuzzer {fuzzer}") + + # prepare args + output_test_name = path.join(tempdir, Path(test_source_file).stem) + _, output_log_file = mkstemp(dir=tempdir) + arguments = [ + "--compile_test", test_source_file, + "--out_test_name", output_test_name + ] + + # run command + proc = subprocess.Popen([f"deepstate-{fuzzer}"] + arguments) + proc.communicate() + compiled_files = glob(output_test_name + f"*.{fuzzer}") + + # check output + self.assertEqual(proc.returncode, 0) + for compiled_file in compiled_files: + self.assertTrue(path.isfile(compiled_file)) + + # return compiled file(s) + # if Angora fuzzer, file.taint should be before file.fast + if any([compiled_file.endswith('.taint.angora') for compiled_file in compiled_files]): + compiled_files = sorted(compiled_files, reverse=True) + return compiled_files + + + def do_fuzz(fuzzer, workspace_dir, sync_dir, compiled_files, output_from_fuzzer=None): + """ + Fuzz compiled_files (single compiled test/harness or two files if Angora) + until first crash + """ + # prepare args + output_dir = mkdtemp(prefix=f"deepstate_{fuzzer}_", dir=workspace_dir) + + arguments = [ + "--output_test_dir", output_dir, + "--sync_dir", sync_dir, + "--sync_cycle", "5", + "--min_log_level", "0" + ] + compiled_files + + # run command + exe = f"deepstate-{fuzzer}" + cmd = ' '.join([exe] + arguments) + print(f"Running: `{cmd}`.") + if output_from_fuzzer and output_from_fuzzer == fuzzer: + proc = subprocess.Popen([exe] + arguments + ["--fuzzer_out"]) + else: + proc = subprocess.Popen([exe] + arguments, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return output_dir, proc + + + def crashes_found(fuzzer, output): + """ + Check if some crash were found assuming that + fuzzer output is the deepstate one (--fuzzer_out == False) + """ + no_crashes = 0 + for crashes_stat in re.finditer(r"^unique_crashes:(\d+)$", + output, re.MULTILINE): + no_crashes = int(crashes_stat.group(1)) + print(f"Crashes found by fuzzer {fuzzer} - {no_crashes}.") + return 0 + + + def wait_for_crashes(fuzzers, timeout, crashes_required): + fuzzers_done = set() + start_time = int(time.time()) + + while len(fuzzers_done) < len(fuzzers): + self.assertLess(time.time() - start_time, timeout) + for fuzzer, values in fuzzers.items(): + + try: + stats = dict() + with open(values["stats_file"], "r") as f: + for line in f: + line = line.strip() + if ":" not in line: + continue + k, v = line.split(":", 1) + stats[k] = v + + print("{:10s}:".format(fuzzer), end="\t") + for stat in ["unique_crashes", "sync_dir_size", "execs_done", "paths_total"]: + if stat in stats: + print(f"{stat}: {stats[stat]}", end="\t|\t") + print("") + + if int(stats["unique_crashes"]) >= crashes_required: + fuzzers_done.add(fuzzer) + except FileNotFoundError: + print(f"Stats for {fuzzer} (`{values['stats_file']}`) - not found") + sleep(1) + + print(f"CRASH {crashes_required} - done") + print("-"*50) + + + def do_sync_test(output_from_fuzzer=None): + # start all fuzzers + for fuzzer in fuzzers.keys(): + output_dir, proc = do_fuzz(fuzzer, workspace_dir, sync_dir, + fuzzers[fuzzer]["compiled_files"], + output_from_fuzzer) + fuzzers[fuzzer]["output_dir"] = output_dir + fuzzers[fuzzer]["proc"] = proc + fuzzers[fuzzer]["stats_file"] = os.path.join(output_dir, "deepstate-stats.txt") + + # import Frontend classes so we can use PUSH/PULL/CRASH dirs + deepstate_python = os.path.join(os.path.dirname(__file__), "bin", "deepstate") + print(f"Adding deepstate python path: {deepstate_python}.") + sys.path.append(deepstate_python) + + from deepstate.executors.fuzz.afl import AFL + fuzzers["afl"]["class"] = AFL + # from deepstate.executors.fuzz.angora import Angora + # fuzzers["angora"]["class"] = Angora + # from deepstate.executors.fuzz.honggfuzz import Honggfuzz + # from deepstate.executors.fuzz.eclipser import Eclipser + from deepstate.executors.fuzz.libfuzzer import LibFuzzer + fuzzers["libfuzzer"]["class"] = LibFuzzer + + # run them for a bit + print("Fuzzers started, waiting 5 seconds.") + sleep(2) + + # assert that all fuzzers started + print("Checking if fuzzers are up and running") + for fuzzer, values in fuzzers.items(): + try: + self.assertTrue(values["proc"].poll() is None) + except Exception as e: + print(f"Error for fuzzer {fuzzer}:") + print(values["proc"].stderr.read().decode('utf8')) + raise e + push_dir = os.path.join(values["output_dir"], values["class"].PUSH_DIR) + self.assertTrue(os.path.isdir(push_dir)) + + # manually push first crashing seed to AFL local dir + push_dir = os.path.join(fuzzers["afl"]["output_dir"], fuzzers["afl"]["class"].PUSH_DIR) + print(f"Pushing seed 1 to AFL: `{push_dir}`") + with open(os.path.join(push_dir, "id:000101,first_crash"), "wb") as f: + f.write(b64decode("R3JvcyBwemRyQUFBQUFBQUFB")) + + # check if all fuzzers find first crash using afl's seed + wait_for_crashes(fuzzers, one_crash_sync_timeout, 1) + + # # manually push second crashing seed to Angora local dir + # push_dir = os.path.join(fuzzers["angora"]["output_dir"], ANGORA_PUSH_DIR) + # print(f"Pushing seed 2 to Angora: `{push_dir}`") + # with open(os.path.join(push_dir, "id:000202,second_crash"), "wb") as f: + # f.write(b64decode("R3JvcyBwemRyIGZyb20gUEwu")) + + # # check if all fuzzers find first crash using afl's seed + # wait_for_crashes(fuzzers, one_crash_sync_timeout, 2) + + + # config + fuzzers_list = ["afl", "libfuzzer"] + output_from_fuzzer = None + + # init + fuzzers = dict() + one_crash_sync_timeout = 4*60 + test_source_file = "examples/EnsembledCrash.cpp" + sync_dir = mkdtemp(prefix="syncing_") + workspace_dir = mkdtemp(prefix="workspace_") + compiled_files_dir = mkdtemp(prefix="compiled_", dir=workspace_dir) + + # compile for all fuzzers + for fuzzer in fuzzers_list: + compiled_files = do_compile(fuzzer, compiled_files_dir, test_source_file) + fuzzers[fuzzer] = {"compiled_files": compiled_files} + + # do testing + try: + print("Starting synchronization run") + do_sync_test(output_from_fuzzer) + except Exception as e: + # cleanup + # hard kill processes + for _, value in fuzzers.items(): + try: + proc = value["proc"] + for some_proc in psutil.Process(proc.pid).children(recursive=True) + [proc]: + some_proc.kill() + except: + pass + + # filesystem + try: + rmtree(workspace_dir, ignore_errors=True) + rmtree(sync_dir, ignore_errors=True) + except: + pass + + # now can raise + raise e From b16241d07df760efdf5d805a0d24d8f5381b333e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Sat, 15 Feb 2020 00:41:47 +0100 Subject: [PATCH 25/46] pyflakes errors fix --- bin/deepstate/core/fuzz.py | 4 ++-- bin/deepstate/executors/fuzz/eclipser.py | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index a43caea4..d0760c4b 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -874,8 +874,8 @@ def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str L.warning("Called `ensemble`, but `--sync_dir` not provided.") return - global_queue: str = self.sync_dir - local_queue: str = self.push_dir + global_queue = self.sync_dir + local_queue = self.push_dir # check global queue global_len: int = len(os.listdir(self.crash_dir)) diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index 592856a8..0ccc5952 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -70,9 +70,6 @@ def pre_exec(self) -> None: # TODO handle that somehow L.warning("Eclipser doesn't limit child processes memory.") - sync_dir = os.path.join(self.output_test_dir, "sync_dir") - main_dir = os.path.join(self.output_test_dir, "the_fuzzer") - self.encoded_testcases_dir: str = os.path.join(self.output_test_dir, "the_fuzzer", "testcase") self.encoded_crash_dir: str = os.path.join(self.output_test_dir, "the_fuzzer", "crash") From 0d4ac4b4b9e01a91d74a34b4df9eb27aa2970d1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Sat, 15 Feb 2020 21:34:50 +0100 Subject: [PATCH 26/46] revert to old docker CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2217be29..fae3267d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,7 +87,7 @@ jobs: context: . dockerfile: docker/Dockerfile push_image_and_stages: true # because we run workflow on PRs - build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=2" + build_extra_args: "--cache-from=deepstate-base" - name: Test fuzzers run: | docker run -it deepstate bash -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' From 601fdb1d54dbd6c0bc239cec86c14b93cf65b190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Sat, 15 Feb 2020 21:35:12 +0100 Subject: [PATCH 27/46] mv num_workers to symex only --- bin/deepstate/core/base.py | 1 - bin/deepstate/core/symex.py | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index 66039cac..b60ac80c 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -77,7 +77,6 @@ def __init__(self): self.binary: Optional[str] = None self.output_test_dir: str self.timeout: int = 0 - self.num_workers: int = 1 self.mem_limit: int = 50 self.min_log_level: int = 2 diff --git a/bin/deepstate/core/symex.py b/bin/deepstate/core/symex.py index a6812564..e17c51cc 100644 --- a/bin/deepstate/core/symex.py +++ b/bin/deepstate/core/symex.py @@ -46,7 +46,7 @@ class SymexFrontend(AnalysisBackend): """Wrapper around a symbolic executor for making it easy to do common DeepState- specific things.""" def __init__(self): - pass + self.num_workers: int = 1 def get_context(self): raise NotImplementedError("Must be implemented by engine.") @@ -112,6 +112,10 @@ def parse_args(cls): "--verbosity", default=1, type=int, help="Verbosity level for symbolic execution tool (default: 1, lower means less output).") + parser.add_argument( + "-w", "--num_workers", default=1, type=int, + help="Number of worker jobs to spawn for analysis (default is 1).") + cls.parser = parser return super(SymexFrontend, cls).parse_args() From 4bd377b53dd4c86b304b87cdf78bfcec8d0aa507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Sat, 15 Feb 2020 21:35:40 +0100 Subject: [PATCH 28/46] fix fuzzer syncing --- bin/deepstate/core/fuzz.py | 58 +++++++---- bin/deepstate/executors/fuzz/afl.py | 20 ++-- bin/deepstate/executors/fuzz/libfuzzer.py | 53 +++++----- examples/EnsembledCrash.cpp | 11 +- tests/test_fuzzers_sync.py | 119 +++++++++++++--------- 5 files changed, 158 insertions(+), 103 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index d0760c4b..27d012a3 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -61,6 +61,7 @@ def __init__(self, envvar: str) -> None: - require_seeds - stats (dict that frontend should populate in populate_stats method) - stats_file (file where to put stats from fuzzer in common format) + - output_file (file where stdout of fuzzer will be redirected) - proc (handler to fuzzer process) - push_dir (push testcases from external sources here) @@ -94,16 +95,22 @@ def __init__(self, envvar: str) -> None: self.proc: subprocess.Popen[bytes] self.require_seeds: bool = False - self.stats_file: str = 'deepstate-stats.txt' + self.stats_file: str = "deepstate-stats.txt" + self.output_file: str = "fuzzer-output.txt" # same as AFL's (https://github.com/google/AFL/blob/master/docs/status_screen.txt) self.stats: Dict[str, Optional[str]] = { - "last_update": None, - "start_time": None, + # guaranteed + "unique_crashes": None, "fuzzer_pid": None, - "cycles_done": None, + "start_time": None, + "sync_dir_size": None, + + # not guaranteed "execs_done": None, "execs_per_sec": None, + "last_update": None, + "cycles_done": None, "paths_total": None, "paths_favored": None, "paths_found": None, @@ -115,7 +122,6 @@ def __init__(self, envvar: str) -> None: "variable_paths": None, "stability": None, "bitmap_cvg": None, - "unique_crashes": None, "unique_hangs": None, "last_path": None, "last_crash": None, @@ -123,7 +129,6 @@ def __init__(self, envvar: str) -> None: "execs_since_crash": None, "slowest_exec_ms": None, "peak_rss_mb": None, - "sync_dir_size": None } # parsed argument attributes @@ -482,8 +487,9 @@ def pre_exec(self): if not os.path.isdir(self.output_test_dir): raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - # update stats file + # update stats and output file self.stats_file = os.path.join(self.output_test_dir, self.stats_file) + self.output_file = os.path.join(self.output_test_dir, self.output_file) # require seeds flag self.require_seeds = self.REQUIRE_SEEDS @@ -584,13 +590,7 @@ def manage(self): # invoke ensemble if sync_dir is provided if self.sync_dir: L.info("%s - Performing sync cycle %s", self.name, self.sync_count) - - # call ensemble to perform seed synchronization self.ensemble() - - # update global statistics - self.stats["sync_dir_size"] = str(len(os.listdir(self.sync_dir))) - self.sync_count += 1 @@ -657,6 +657,10 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): run_fuzzer: bool = True prev_log_level = L.level + # for fuzzer output + if not self.fuzzer_out: + fuzzer_out_file = open(self.output_file, "wb") + # run or resume fuzzer process as long as it is needed # may create new processes continuously while run_fuzzer: @@ -672,7 +676,7 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): L.info("Using DeepState output.") # TODO: frontends uses blocking read in `populate_stats`, # we may replace PIPE with normal file and do reads non-blocking - self.proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + self.proc = subprocess.Popen(command, stdout=fuzzer_out_file, stderr=fuzzer_out_file) run_one_fuzzer_process = True L.info("Started fuzzer process with PID %d.", self.proc.pid) @@ -757,14 +761,19 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): # cleanup try: - L.setLevel(prev_log_level) self.cleanup() except: pass - # TODO: resume if needed - break + if run_fuzzer: + self.post_exec() + # and... loop again! + + if not self.fuzzer_out: + fuzzer_out_file.close() + + L.setLevel(prev_log_level) # calculate total execution time exec_time: float = round(time.time() - self.start_time, 2) L.info("Fuzzer exec time: %ss", exec_time) @@ -795,7 +804,14 @@ def populate_stats(self): feedback. """ crashes: int = len(os.listdir(self.crash_dir)) + if os.path.isfile(os.path.join(self.crash_dir, "README.txt")): + crashes -= 1 self.stats["unique_crashes"] = str(crashes) + self.stats["start_time"] = str(int(self.start_time)) + if self.proc: + self.stats["fuzzer_pid"] = str(self.proc.pid) + if self.sync_dir: + self.stats["sync_dir_size"] = str(len(os.listdir(self.sync_dir))) def print_stats(self): @@ -857,7 +873,8 @@ def _sync_seeds(self, src: str, dest: str, excludes: List[str] = []) -> None: dest ] - L.debug("rsync command: %s", rsync_cmd) + # L.debug("rsync command: %s", rsync_cmd) + L.debug("rsync %s: from `%s` to `%s`.", self.name, src, dest) try: subprocess.Popen(rsync_cmd) except subprocess.CalledProcessError as e: @@ -874,8 +891,10 @@ def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str L.warning("Called `ensemble`, but `--sync_dir` not provided.") return - global_queue = self.sync_dir + global_queue = os.path.join(self.sync_dir, "queue") + global_crashes = os.path.join(self.sync_dir, "crashes") local_queue = self.push_dir + local_crashes = self.crash_dir # check global queue global_len: int = len(os.listdir(self.crash_dir)) @@ -890,6 +909,7 @@ def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str # get seeds from local to global queue, rsync will deal with duplicates self._sync_seeds(src=local_queue, dest=global_queue) + self._sync_seeds(src=local_crashes, dest=global_crashes) # push seeds from global queue to local, rsync will deal with duplicates self._sync_seeds(src=global_queue, dest=local_queue) diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index 915f77ca..6c01fd29 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -140,14 +140,17 @@ def populate_stats(self): """ Retrieves and parses the stats file produced by AFL """ - super().populate_stats() stat_file_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "fuzzer_stats") - with open(stat_file_path, "r") as stat_file: - for line in stat_file: - key = line.split(":", 1)[0].strip() - value = line.split(":", 1)[1].strip() - if key in self.stats: - self.stats[key] = value + # with open(stat_file_path, "r") as stat_file: + # for line in stat_file: + lines = open(stat_file_path, "r").readlines() + for line in lines: + L.error(' - `%s`',line) + key = line.split(":", 1)[0].strip() + value = line.split(":", 1)[1].strip() + if key in self.stats: + self.stats[key] = value + super().populate_stats() def reporter(self) -> Dict[str, Optional[str]]: @@ -163,7 +166,8 @@ def reporter(self) -> Dict[str, Optional[str]]: }) - def _sync_seeds(self, src, dest, excludes=["*.cur_input", ".state"]) -> None: + def _sync_seeds(self, src, dest, excludes=[]) -> None: + excludes += ["*.cur_input", ".state"] super()._sync_seeds(src, dest, excludes=excludes) diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index ae3fb409..bbbbefdc 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -96,8 +96,9 @@ def cmd(self): "-rss_limit_mb={}".format(self.mem_limit), "-max_len={}".format(self.max_input_size), "-artifact_prefix={}".format(self.crash_dir + "/"), - # "-jobs={}".format(2), # crashes deepstate ;/ - "-workers={}".format(1), + # "-jobs={}".format(0), + # "-workers={}".format(1), + # "-fork=1", "-reload=1", "-runs=-1", "-print_final_stats=1" @@ -128,35 +129,39 @@ def cmd(self): def populate_stats(self): super().populate_stats() - if not self.proc or not self.proc.stderr or self.proc.stderr.closed: + + if not os.path.isfile(self.output_file): return - # libFuzzer under DeepState have broken output - # splitted into multiple lines, preceeded with "EXTERNAL:" - done_reading: bool = False - for line in self.proc.stderr.readlines(100): - if done_reading: - break + with open(self.output_file, "rb") as f: + for line in f: + # libFuzzer under DeepState have broken output + # splitted into multiple lines, preceeded with "EXTERNAL:" + if line.startswith(b"EXTERNAL: "): + line = line.split(b":", 1)[1].strip() + if line.startswith(b"#"): + # new event code + self.stats["execs_done"] = line.split()[0].strip(b"#").decode() + + elif b":" in line: + line = line.split(b":", 1)[1].strip() + if b":" in line: + key, value = line.split(b":", 1) + if key == b"exec/s": + self.stats["execs_per_sec"] = value.strip().decode() + elif key == b"units": + self.stats["paths_total"] = value.strip().decode() + elif key == b"cov": + self.stats["bitmap_cvg"] = value.strip().decode() - if line.startswith(b"EXTERNAL: "): - line = line.split(b":", 1)[1].strip() - if line.startswith(b"#"): - # new event code - self.stats["execs_done"] = line.split()[0].strip(b"#").decode() - elif ":" in line: - line = line.split(b":", 1)[1].strip() - if b": " in line: - key, value = line.split(b": ", 1) - if key == b"exec/s": - self.stats["execs_per_sec"] = value.decode() - elif key == b"units": - self.stats["paths_total"] = value.decode() - elif key == b"cov": - self.stats["bitmap_cvg"] = value.decode() + def _sync_seeds(self, src, dest, excludes=[]) -> None: + excludes += ["*.cur_input", ".state"] + super()._sync_seeds(src, dest, excludes=excludes) def post_exec(self): + # TODO: remove crashes from seeds dir and from sync_dir pass diff --git a/examples/EnsembledCrash.cpp b/examples/EnsembledCrash.cpp index ba458b15..4b78a561 100644 --- a/examples/EnsembledCrash.cpp +++ b/examples/EnsembledCrash.cpp @@ -25,17 +25,16 @@ DEEPSTATE_NOINLINE static void segfault(char *first, char* second) { unsigned *p = NULL; if (hashed == 7169420828666634849U) { if (hashed2 == 10753164746288518855U) { - *(p+2) = 0xdeadbeef; /* second crash */ + *(p+2) = 0xdeadbeef; /* crash */ } - *(p+1) = 0xdeadbabe; /* first crash */ + printf("BOM\n"); } } TEST(SimpleCrash, SegFault) { - char *first = (char*)DeepState_Malloc(9); - char *second = (char*)DeepState_Malloc(9); - read(0, first, 9); - read(0, second, 9); + char *first = (char*)DeepState_CStr_C(9, 0); + char *second = (char*)DeepState_CStr_C(9, 0); + for (int i = 0; i < 9; ++i) printf("%02x", (unsigned char)first[i]); printf("\n"); diff --git a/tests/test_fuzzers_sync.py b/tests/test_fuzzers_sync.py index 7113f191..34e5780d 100644 --- a/tests/test_fuzzers_sync.py +++ b/tests/test_fuzzers_sync.py @@ -75,7 +75,7 @@ def do_fuzz(fuzzer, workspace_dir, sync_dir, compiled_files, output_from_fuzzer= cmd = ' '.join([exe] + arguments) print(f"Running: `{cmd}`.") if output_from_fuzzer and output_from_fuzzer == fuzzer: - proc = subprocess.Popen([exe] + arguments + ["--fuzzer_out"]) + proc = subprocess.Popen([exe] + arguments) else: proc = subprocess.Popen([exe] + arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -95,14 +95,17 @@ def crashes_found(fuzzer, output): return 0 - def wait_for_crashes(fuzzers, timeout, crashes_required): - fuzzers_done = set() + def wait_for_crashes(fuzzers, timeout): + for fuzzer in fuzzers: + fuzzers[fuzzer]["no_crashes"] = 0 + start_time = int(time.time()) - while len(fuzzers_done) < len(fuzzers): - self.assertLess(time.time() - start_time, timeout) - for fuzzer, values in fuzzers.items(): + while any([v["no_crashes"] < 1 for _, v in fuzzers.items()]): + if timeout: + self.assertLess(time.time() - start_time, timeout, msg="TIMEOUT") + for fuzzer, values in fuzzers.items(): try: stats = dict() with open(values["stats_file"], "r") as f: @@ -114,18 +117,26 @@ def wait_for_crashes(fuzzers, timeout, crashes_required): stats[k] = v print("{:10s}:".format(fuzzer), end="\t") - for stat in ["unique_crashes", "sync_dir_size", "execs_done", "paths_total"]: - if stat in stats: - print(f"{stat}: {stats[stat]}", end="\t|\t") - print("") + if values["proc"].poll() is None: + for stat in ["unique_crashes", "sync_dir_size", "execs_done", "paths_total"]: + if stat in stats: + print("{}: {:10s}".format(stat, stats[stat]), end=" |\t") + print("") + fuzzers[fuzzer]["no_crashes"] = int(stats["unique_crashes"]) + else: + print("DEAD " + "OoOoo"*5 + "x...") - if int(stats["unique_crashes"]) >= crashes_required: - fuzzers_done.add(fuzzer) except FileNotFoundError: - print(f"Stats for {fuzzer} (`{values['stats_file']}`) - not found") + print(f" - stats not found (`{values['stats_file']}`).") + + for _ in range(3): + print("~*~"*5, end=" - ") + sys.stderr.flush() + sys.stdout.flush() sleep(1) + print("") - print(f"CRASH {crashes_required} - done") + print(f"CRASHING - done") print("-"*50) @@ -144,18 +155,31 @@ def do_sync_test(output_from_fuzzer=None): print(f"Adding deepstate python path: {deepstate_python}.") sys.path.append(deepstate_python) - from deepstate.executors.fuzz.afl import AFL - fuzzers["afl"]["class"] = AFL - # from deepstate.executors.fuzz.angora import Angora - # fuzzers["angora"]["class"] = Angora - # from deepstate.executors.fuzz.honggfuzz import Honggfuzz - # from deepstate.executors.fuzz.eclipser import Eclipser - from deepstate.executors.fuzz.libfuzzer import LibFuzzer - fuzzers["libfuzzer"]["class"] = LibFuzzer + if "afl" in fuzzers: + from deepstate.executors.fuzz.afl import AFL + fuzzers["afl"]["class"] = AFL + if "angora" in fuzzers: + from deepstate.executors.fuzz.angora import Angora + fuzzers["angora"]["class"] = Angora + if "honggfuzz" in fuzzers: + from deepstate.executors.fuzz.honggfuzz import Honggfuzz + fuzzers["honggfuzz"]["class"] = Honggfuzz + if "eclipser" in fuzzers: + from deepstate.executors.fuzz.eclipser import Eclipser + fuzzers["eclipser"]["class"] = Eclipser + if "libfuzzer" in fuzzers: + from deepstate.executors.fuzz.libfuzzer import LibFuzzer + fuzzers["libfuzzer"]["class"] = LibFuzzer # run them for a bit - print("Fuzzers started, waiting 5 seconds.") - sleep(2) + wait_for_start = 2 + print(f"Fuzzers started, waiting {wait_for_start} seconds.") + for _ in range(wait_for_start): + sleep(1) + print('.', end="") + sys.stderr.flush() + sys.stdout.flush() + print("") # assert that all fuzzers started print("Checking if fuzzers are up and running") @@ -164,37 +188,37 @@ def do_sync_test(output_from_fuzzer=None): self.assertTrue(values["proc"].poll() is None) except Exception as e: print(f"Error for fuzzer {fuzzer}:") - print(values["proc"].stderr.read().decode('utf8')) + if values["proc"] and values["proc"].stderr: + print(values["proc"].stderr.read().decode('utf8')) raise e push_dir = os.path.join(values["output_dir"], values["class"].PUSH_DIR) self.assertTrue(os.path.isdir(push_dir)) - # manually push first crashing seed to AFL local dir - push_dir = os.path.join(fuzzers["afl"]["output_dir"], fuzzers["afl"]["class"].PUSH_DIR) - print(f"Pushing seed 1 to AFL: `{push_dir}`") - with open(os.path.join(push_dir, "id:000101,first_crash"), "wb") as f: - f.write(b64decode("R3JvcyBwemRyQUFBQUFBQUFB")) - - # check if all fuzzers find first crash using afl's seed - wait_for_crashes(fuzzers, one_crash_sync_timeout, 1) - - # # manually push second crashing seed to Angora local dir - # push_dir = os.path.join(fuzzers["angora"]["output_dir"], ANGORA_PUSH_DIR) - # print(f"Pushing seed 2 to Angora: `{push_dir}`") - # with open(os.path.join(push_dir, "id:000202,second_crash"), "wb") as f: - # f.write(b64decode("R3JvcyBwemRyIGZyb20gUEwu")) + # manually push crashing seeds to fuzzers local dirs + seeds = [b64decode("R3JvcyBwemRyIGZyb20gUEwu")] + fuzzer_id = 0 + for seed_no, seed in enumerate(seeds): + fuzzer_id %= len(fuzzers) + fuzzer = sorted(fuzzers.keys())[fuzzer_id] + values = fuzzers[fuzzer] + push_dir = os.path.join(values["output_dir"], values["class"].PUSH_DIR) + print(f"Pushing seed {seed_no} to {fuzzer}: `{push_dir}`") + with open(os.path.join(push_dir, f"id:000201,the_crash"), "wb") as f: + f.write(seed) + fuzzer_id += 1 - # # check if all fuzzers find first crash using afl's seed - # wait_for_crashes(fuzzers, one_crash_sync_timeout, 2) + # check if all fuzzers find at least two crashes + # that is: the one pushed to its local dir and at least one other + wait_for_crashes(fuzzers, timeout) # config - fuzzers_list = ["afl", "libfuzzer"] - output_from_fuzzer = None + fuzzers_list = ["afl", "libfuzzer", "angora", "eclipser", "honggfuzz"] + output_from_fuzzer = None # or "afl" etc + timeout = None # init fuzzers = dict() - one_crash_sync_timeout = 4*60 test_source_file = "examples/EnsembledCrash.cpp" sync_dir = mkdtemp(prefix="syncing_") workspace_dir = mkdtemp(prefix="workspace_") @@ -212,6 +236,7 @@ def do_sync_test(output_from_fuzzer=None): except Exception as e: # cleanup # hard kill processes + print('Killing spawned processes.') for _, value in fuzzers.items(): try: proc = value["proc"] @@ -221,11 +246,13 @@ def do_sync_test(output_from_fuzzer=None): pass # filesystem + print("Clearing tmp files.") try: + sleep(1) rmtree(workspace_dir, ignore_errors=True) rmtree(sync_dir, ignore_errors=True) - except: - pass + except Exception as e2: + print(f"Error clearing: {e2}") # now can raise raise e From f7a48819a2876a412971419e2e240697ee9544f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Sat, 15 Feb 2020 21:35:40 +0100 Subject: [PATCH 29/46] fix fuzzer syncing --- bin/deepstate/core/fuzz.py | 82 +++++++++++---- bin/deepstate/executors/fuzz/afl.py | 19 ++-- bin/deepstate/executors/fuzz/angora.py | 9 +- bin/deepstate/executors/fuzz/eclipser.py | 2 +- bin/deepstate/executors/fuzz/honggfuzz.py | 1 + bin/deepstate/executors/fuzz/libfuzzer.py | 53 +++++----- examples/EnsembledCrash.cpp | 11 +- tests/test_fuzzers_sync.py | 121 ++++++++++++++-------- 8 files changed, 192 insertions(+), 106 deletions(-) diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index d0760c4b..a3a184e7 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -25,6 +25,7 @@ import traceback from tempfile import mkdtemp +from time import sleep from pathlib import Path from typing import Optional, Dict, List, Any, Tuple @@ -61,6 +62,7 @@ def __init__(self, envvar: str) -> None: - require_seeds - stats (dict that frontend should populate in populate_stats method) - stats_file (file where to put stats from fuzzer in common format) + - output_file (file where stdout of fuzzer will be redirected) - proc (handler to fuzzer process) - push_dir (push testcases from external sources here) @@ -94,16 +96,22 @@ def __init__(self, envvar: str) -> None: self.proc: subprocess.Popen[bytes] self.require_seeds: bool = False - self.stats_file: str = 'deepstate-stats.txt' + self.stats_file: str = "deepstate-stats.txt" + self.output_file: str = "fuzzer-output.txt" # same as AFL's (https://github.com/google/AFL/blob/master/docs/status_screen.txt) self.stats: Dict[str, Optional[str]] = { - "last_update": None, - "start_time": None, + # guaranteed + "unique_crashes": None, "fuzzer_pid": None, - "cycles_done": None, + "start_time": None, + "sync_dir_size": None, + + # not guaranteed "execs_done": None, "execs_per_sec": None, + "last_update": None, + "cycles_done": None, "paths_total": None, "paths_favored": None, "paths_found": None, @@ -115,7 +123,6 @@ def __init__(self, envvar: str) -> None: "variable_paths": None, "stability": None, "bitmap_cvg": None, - "unique_crashes": None, "unique_hangs": None, "last_path": None, "last_crash": None, @@ -123,7 +130,6 @@ def __init__(self, envvar: str) -> None: "execs_since_crash": None, "slowest_exec_ms": None, "peak_rss_mb": None, - "sync_dir_size": None } # parsed argument attributes @@ -482,8 +488,9 @@ def pre_exec(self): if not os.path.isdir(self.output_test_dir): raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - # update stats file + # update stats and output file self.stats_file = os.path.join(self.output_test_dir, self.stats_file) + self.output_file = os.path.join(self.output_test_dir, self.output_file) # require seeds flag self.require_seeds = self.REQUIRE_SEEDS @@ -584,13 +591,7 @@ def manage(self): # invoke ensemble if sync_dir is provided if self.sync_dir: L.info("%s - Performing sync cycle %s", self.name, self.sync_count) - - # call ensemble to perform seed synchronization self.ensemble() - - # update global statistics - self.stats["sync_dir_size"] = str(len(os.listdir(self.sync_dir))) - self.sync_count += 1 @@ -657,6 +658,10 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): run_fuzzer: bool = True prev_log_level = L.level + # for fuzzer output + if not self.fuzzer_out: + fuzzer_out_file = open(self.output_file, "wb") + # run or resume fuzzer process as long as it is needed # may create new processes continuously while run_fuzzer: @@ -672,7 +677,7 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): L.info("Using DeepState output.") # TODO: frontends uses blocking read in `populate_stats`, # we may replace PIPE with normal file and do reads non-blocking - self.proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + self.proc = subprocess.Popen(command, stdout=fuzzer_out_file, stderr=fuzzer_out_file) run_one_fuzzer_process = True L.info("Started fuzzer process with PID %d.", self.proc.pid) @@ -755,16 +760,26 @@ def run(self, runner: Optional[str] = None, no_exec: bool = False): run_one_fuzzer_process = False run_fuzzer = False + if self.do_restart(): + L.info(f"Restarting fuzzer {self.name}.") + run_one_fuzzer_process = False + # cleanup try: - L.setLevel(prev_log_level) self.cleanup() + sleep(10) # wait so all fuzzer processes are killed except: pass - # TODO: resume if needed - break + if run_fuzzer: + self.post_exec() + + # and... maybe loop again! + if not self.fuzzer_out: + fuzzer_out_file.close() + + L.setLevel(prev_log_level) # calculate total execution time exec_time: float = round(time.time() - self.start_time, 2) L.info("Fuzzer exec time: %ss", exec_time) @@ -789,13 +804,38 @@ def reporter(self): return NotImplementedError("Must implement in frontend subclass.") + def do_restart(self): + """ + Some fuzzers need restart to use seeds from external sources + (can't pull seeds in runtime). + This function should determine if the fuzzer should be restarted too look + for new seeds. + This may be based on time of last new path discovered or whatever. + + Should return False if self.sync_dir is None. + """ + if not self.sync_dir: + return False + + if time.time() - self.start_time > 20: + return True + return False + + def populate_stats(self): """ Parses out stats generated by fuzzer output. Should be implemented by user, and can return custom feedback. """ crashes: int = len(os.listdir(self.crash_dir)) + if os.path.isfile(os.path.join(self.crash_dir, "README.txt")): + crashes -= 1 self.stats["unique_crashes"] = str(crashes) + self.stats["start_time"] = str(int(self.start_time)) + if self.proc: + self.stats["fuzzer_pid"] = str(self.proc.pid) + if self.sync_dir: + self.stats["sync_dir_size"] = str(len(os.listdir(self.sync_dir))) def print_stats(self): @@ -857,7 +897,8 @@ def _sync_seeds(self, src: str, dest: str, excludes: List[str] = []) -> None: dest ] - L.debug("rsync command: %s", rsync_cmd) + # L.debug("rsync command: %s", rsync_cmd) + L.debug("rsync %s: from `%s` to `%s`.", self.name, src, dest) try: subprocess.Popen(rsync_cmd) except subprocess.CalledProcessError as e: @@ -874,8 +915,10 @@ def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str L.warning("Called `ensemble`, but `--sync_dir` not provided.") return - global_queue = self.sync_dir + global_queue = os.path.join(self.sync_dir, "queue") + global_crashes = os.path.join(self.sync_dir, "crashes") local_queue = self.push_dir + local_crashes = self.crash_dir # check global queue global_len: int = len(os.listdir(self.crash_dir)) @@ -890,6 +933,7 @@ def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str # get seeds from local to global queue, rsync will deal with duplicates self._sync_seeds(src=local_queue, dest=global_queue) + self._sync_seeds(src=local_crashes, dest=global_crashes) # push seeds from global queue to local, rsync will deal with duplicates self._sync_seeds(src=global_queue, dest=local_queue) diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index 915f77ca..5747496a 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -140,14 +140,16 @@ def populate_stats(self): """ Retrieves and parses the stats file produced by AFL """ - super().populate_stats() stat_file_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "fuzzer_stats") - with open(stat_file_path, "r") as stat_file: - for line in stat_file: - key = line.split(":", 1)[0].strip() - value = line.split(":", 1)[1].strip() - if key in self.stats: - self.stats[key] = value + # with open(stat_file_path, "r") as stat_file: + # for line in stat_file: + lines = open(stat_file_path, "r").readlines() + for line in lines: + key = line.split(":", 1)[0].strip() + value = line.split(":", 1)[1].strip() + if key in self.stats: + self.stats[key] = value + super().populate_stats() def reporter(self) -> Dict[str, Optional[str]]: @@ -163,7 +165,8 @@ def reporter(self) -> Dict[str, Optional[str]]: }) - def _sync_seeds(self, src, dest, excludes=["*.cur_input", ".state"]) -> None: + def _sync_seeds(self, src, dest, excludes=[]) -> None: + excludes += ["*.cur_input", ".state"] super()._sync_seeds(src, dest, excludes=excludes) diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index f38a080e..be44bd0a 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -206,8 +206,11 @@ def populate_stats(self): super().populate_stats() stat_file_path: str = os.path.join(self.output_test_dir, "angora", "fuzzer_stats") - with open(stat_file_path, "r") as stat_file: - self.stats["fuzzer_pid"] = stat_file.read().split(":", 1)[1].strip() + try: + with open(stat_file_path, "r") as stat_file: + self.stats["fuzzer_pid"] = stat_file.read().split(":", 1)[1].strip() + except: + pass stat_file_path = os.path.join(self.output_test_dir, "angora", "chart_stat.json") new_stats: Dict[str, str] = {} @@ -216,6 +219,8 @@ def populate_stats(self): new_stats = json.loads(stat_file.read()) except json.decoder.JSONDecodeError as e: L.error(f"Error parsing {stat_file_path}: {e}.") + except: + return # previous_stats = self.stats.copy() diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index 0ccc5952..5c42c34e 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -87,7 +87,7 @@ def pre_exec(self) -> None: L.info("Blackbox option is redundant. Eclipser works on non-instrumented binaries using QEMU by default.") if self.dictionary: - L.error("Angora can't use dictionaries.") + L.error("Eclipser can't use dictionaries.") @property diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index 7e55e391..08ea285a 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -85,6 +85,7 @@ def cmd(self): # "--logfile", os.path.join(self.output_test_dir, "hfuzz_log.txt"), # "--verbose", "--rlimit_rss", str(self.mem_limit), + "--threads", "1" ]) if self.max_input_size == 0: diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index ae3fb409..bbbbefdc 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -96,8 +96,9 @@ def cmd(self): "-rss_limit_mb={}".format(self.mem_limit), "-max_len={}".format(self.max_input_size), "-artifact_prefix={}".format(self.crash_dir + "/"), - # "-jobs={}".format(2), # crashes deepstate ;/ - "-workers={}".format(1), + # "-jobs={}".format(0), + # "-workers={}".format(1), + # "-fork=1", "-reload=1", "-runs=-1", "-print_final_stats=1" @@ -128,35 +129,39 @@ def cmd(self): def populate_stats(self): super().populate_stats() - if not self.proc or not self.proc.stderr or self.proc.stderr.closed: + + if not os.path.isfile(self.output_file): return - # libFuzzer under DeepState have broken output - # splitted into multiple lines, preceeded with "EXTERNAL:" - done_reading: bool = False - for line in self.proc.stderr.readlines(100): - if done_reading: - break + with open(self.output_file, "rb") as f: + for line in f: + # libFuzzer under DeepState have broken output + # splitted into multiple lines, preceeded with "EXTERNAL:" + if line.startswith(b"EXTERNAL: "): + line = line.split(b":", 1)[1].strip() + if line.startswith(b"#"): + # new event code + self.stats["execs_done"] = line.split()[0].strip(b"#").decode() + + elif b":" in line: + line = line.split(b":", 1)[1].strip() + if b":" in line: + key, value = line.split(b":", 1) + if key == b"exec/s": + self.stats["execs_per_sec"] = value.strip().decode() + elif key == b"units": + self.stats["paths_total"] = value.strip().decode() + elif key == b"cov": + self.stats["bitmap_cvg"] = value.strip().decode() - if line.startswith(b"EXTERNAL: "): - line = line.split(b":", 1)[1].strip() - if line.startswith(b"#"): - # new event code - self.stats["execs_done"] = line.split()[0].strip(b"#").decode() - elif ":" in line: - line = line.split(b":", 1)[1].strip() - if b": " in line: - key, value = line.split(b": ", 1) - if key == b"exec/s": - self.stats["execs_per_sec"] = value.decode() - elif key == b"units": - self.stats["paths_total"] = value.decode() - elif key == b"cov": - self.stats["bitmap_cvg"] = value.decode() + def _sync_seeds(self, src, dest, excludes=[]) -> None: + excludes += ["*.cur_input", ".state"] + super()._sync_seeds(src, dest, excludes=excludes) def post_exec(self): + # TODO: remove crashes from seeds dir and from sync_dir pass diff --git a/examples/EnsembledCrash.cpp b/examples/EnsembledCrash.cpp index ba458b15..4b78a561 100644 --- a/examples/EnsembledCrash.cpp +++ b/examples/EnsembledCrash.cpp @@ -25,17 +25,16 @@ DEEPSTATE_NOINLINE static void segfault(char *first, char* second) { unsigned *p = NULL; if (hashed == 7169420828666634849U) { if (hashed2 == 10753164746288518855U) { - *(p+2) = 0xdeadbeef; /* second crash */ + *(p+2) = 0xdeadbeef; /* crash */ } - *(p+1) = 0xdeadbabe; /* first crash */ + printf("BOM\n"); } } TEST(SimpleCrash, SegFault) { - char *first = (char*)DeepState_Malloc(9); - char *second = (char*)DeepState_Malloc(9); - read(0, first, 9); - read(0, second, 9); + char *first = (char*)DeepState_CStr_C(9, 0); + char *second = (char*)DeepState_CStr_C(9, 0); + for (int i = 0; i < 9; ++i) printf("%02x", (unsigned char)first[i]); printf("\n"); diff --git a/tests/test_fuzzers_sync.py b/tests/test_fuzzers_sync.py index 7113f191..af3f1c75 100644 --- a/tests/test_fuzzers_sync.py +++ b/tests/test_fuzzers_sync.py @@ -75,7 +75,7 @@ def do_fuzz(fuzzer, workspace_dir, sync_dir, compiled_files, output_from_fuzzer= cmd = ' '.join([exe] + arguments) print(f"Running: `{cmd}`.") if output_from_fuzzer and output_from_fuzzer == fuzzer: - proc = subprocess.Popen([exe] + arguments + ["--fuzzer_out"]) + proc = subprocess.Popen([exe] + arguments) else: proc = subprocess.Popen([exe] + arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -95,14 +95,17 @@ def crashes_found(fuzzer, output): return 0 - def wait_for_crashes(fuzzers, timeout, crashes_required): - fuzzers_done = set() + def wait_for_crashes(fuzzers, timeout): + for fuzzer in fuzzers: + fuzzers[fuzzer]["no_crashes"] = 0 + start_time = int(time.time()) - while len(fuzzers_done) < len(fuzzers): - self.assertLess(time.time() - start_time, timeout) - for fuzzer, values in fuzzers.items(): + while any([v["no_crashes"] < 1 for _, v in fuzzers.items()]): + if timeout: + self.assertLess(time.time() - start_time, timeout, msg="TIMEOUT") + for fuzzer, values in fuzzers.items(): try: stats = dict() with open(values["stats_file"], "r") as f: @@ -114,18 +117,28 @@ def wait_for_crashes(fuzzers, timeout, crashes_required): stats[k] = v print("{:10s}:".format(fuzzer), end="\t") - for stat in ["unique_crashes", "sync_dir_size", "execs_done", "paths_total"]: - if stat in stats: - print(f"{stat}: {stats[stat]}", end="\t|\t") - print("") + if values["proc"].poll() is None: + for stat in ["unique_crashes", "sync_dir_size", "execs_done", "paths_total"]: + if stat in stats: + print("{}: {:10s}".format(stat, stats[stat]), end=" |\t") + print("") + fuzzers[fuzzer]["no_crashes"] = int(stats["unique_crashes"]) + else: + if "unique_crashes" in stats: + print("unique_crashes: {:10s}".format(stats["unique_crashes"]), end=" |\t") + print("DEAD " + "OoOoo"*5 + "x...") - if int(stats["unique_crashes"]) >= crashes_required: - fuzzers_done.add(fuzzer) except FileNotFoundError: - print(f"Stats for {fuzzer} (`{values['stats_file']}`) - not found") + print(f" - stats not found (`{values['stats_file']}`).") + + for _ in range(3): + print("~*~"*5, end=" - ") + sys.stderr.flush() + sys.stdout.flush() sleep(1) + print("") - print(f"CRASH {crashes_required} - done") + print(f"CRASHING - done") print("-"*50) @@ -144,18 +157,31 @@ def do_sync_test(output_from_fuzzer=None): print(f"Adding deepstate python path: {deepstate_python}.") sys.path.append(deepstate_python) - from deepstate.executors.fuzz.afl import AFL - fuzzers["afl"]["class"] = AFL - # from deepstate.executors.fuzz.angora import Angora - # fuzzers["angora"]["class"] = Angora - # from deepstate.executors.fuzz.honggfuzz import Honggfuzz - # from deepstate.executors.fuzz.eclipser import Eclipser - from deepstate.executors.fuzz.libfuzzer import LibFuzzer - fuzzers["libfuzzer"]["class"] = LibFuzzer + if "afl" in fuzzers: + from deepstate.executors.fuzz.afl import AFL + fuzzers["afl"]["class"] = AFL + if "angora" in fuzzers: + from deepstate.executors.fuzz.angora import Angora + fuzzers["angora"]["class"] = Angora + if "honggfuzz" in fuzzers: + from deepstate.executors.fuzz.honggfuzz import Honggfuzz + fuzzers["honggfuzz"]["class"] = Honggfuzz + if "eclipser" in fuzzers: + from deepstate.executors.fuzz.eclipser import Eclipser + fuzzers["eclipser"]["class"] = Eclipser + if "libfuzzer" in fuzzers: + from deepstate.executors.fuzz.libfuzzer import LibFuzzer + fuzzers["libfuzzer"]["class"] = LibFuzzer # run them for a bit - print("Fuzzers started, waiting 5 seconds.") - sleep(2) + wait_for_start = 2 + print(f"Fuzzers started, waiting {wait_for_start} seconds.") + for _ in range(wait_for_start): + sleep(1) + print('.', end="") + sys.stderr.flush() + sys.stdout.flush() + print("") # assert that all fuzzers started print("Checking if fuzzers are up and running") @@ -164,37 +190,37 @@ def do_sync_test(output_from_fuzzer=None): self.assertTrue(values["proc"].poll() is None) except Exception as e: print(f"Error for fuzzer {fuzzer}:") - print(values["proc"].stderr.read().decode('utf8')) + if values["proc"] and values["proc"].stderr: + print(values["proc"].stderr.read().decode('utf8')) raise e push_dir = os.path.join(values["output_dir"], values["class"].PUSH_DIR) self.assertTrue(os.path.isdir(push_dir)) - # manually push first crashing seed to AFL local dir - push_dir = os.path.join(fuzzers["afl"]["output_dir"], fuzzers["afl"]["class"].PUSH_DIR) - print(f"Pushing seed 1 to AFL: `{push_dir}`") - with open(os.path.join(push_dir, "id:000101,first_crash"), "wb") as f: - f.write(b64decode("R3JvcyBwemRyQUFBQUFBQUFB")) - - # check if all fuzzers find first crash using afl's seed - wait_for_crashes(fuzzers, one_crash_sync_timeout, 1) - - # # manually push second crashing seed to Angora local dir - # push_dir = os.path.join(fuzzers["angora"]["output_dir"], ANGORA_PUSH_DIR) - # print(f"Pushing seed 2 to Angora: `{push_dir}`") - # with open(os.path.join(push_dir, "id:000202,second_crash"), "wb") as f: - # f.write(b64decode("R3JvcyBwemRyIGZyb20gUEwu")) + # manually push crashing seeds to fuzzers local dirs + seeds = [b64decode("R3JvcyBwemRyIGZyb20gUEwu")] + fuzzer_id = 0 + for seed_no, seed in enumerate(seeds): + fuzzer_id %= len(fuzzers) + fuzzer = sorted(fuzzers.keys())[fuzzer_id] + values = fuzzers[fuzzer] + push_dir = os.path.join(values["output_dir"], values["class"].PUSH_DIR) + print(f"Pushing seed {seed_no} to {fuzzer}: `{push_dir}`") + with open(os.path.join(push_dir, f"id:000201,the_crash"), "wb") as f: + f.write(seed) + fuzzer_id += 1 - # # check if all fuzzers find first crash using afl's seed - # wait_for_crashes(fuzzers, one_crash_sync_timeout, 2) + # check if all fuzzers find at least two crashes + # that is: the one pushed to its local dir and at least one other + wait_for_crashes(fuzzers, timeout) # config - fuzzers_list = ["afl", "libfuzzer"] - output_from_fuzzer = None + fuzzers_list = ["afl", "libfuzzer", "angora", "eclipser", "honggfuzz"] + output_from_fuzzer = "angora" # or "afl" etc + timeout = None # init fuzzers = dict() - one_crash_sync_timeout = 4*60 test_source_file = "examples/EnsembledCrash.cpp" sync_dir = mkdtemp(prefix="syncing_") workspace_dir = mkdtemp(prefix="workspace_") @@ -212,6 +238,7 @@ def do_sync_test(output_from_fuzzer=None): except Exception as e: # cleanup # hard kill processes + print('Killing spawned processes.') for _, value in fuzzers.items(): try: proc = value["proc"] @@ -221,11 +248,13 @@ def do_sync_test(output_from_fuzzer=None): pass # filesystem + print("Clearing tmp files.") try: + sleep(1) rmtree(workspace_dir, ignore_errors=True) rmtree(sync_dir, ignore_errors=True) - except: - pass + except Exception as e2: + print(f"Error clearing: {e2}") # now can raise raise e From efb1298eddac8997d712206a0c0651e00281931c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Sun, 16 Feb 2020 00:43:41 +0100 Subject: [PATCH 30/46] update readme --- README.md | 403 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 245 insertions(+), 158 deletions(-) diff --git a/README.md b/README.md index aeac8d92..a978cf2c 100644 --- a/README.md +++ b/README.md @@ -144,14 +144,19 @@ You can also try out Deepstate with Docker, which is the easiest way to get all the fuzzers and tools up and running on any system. ```bash -$ docker build -t deepstate . -f docker/Dockerfile +$ docker build -t deepstate-base -f docker/base/Dockerfile docker/base +$ docker build -t deepstate --build-arg make_j=6 -f ./docker/Dockerfile . $ docker run -it deepstate bash -user@0f7cccd70f7b:~/deepstate/build/examples$ cd deepstate/build/examples -user@0f7cccd70f7b:~/deepstate/build/examples$ deepstate-angr ./Runlen -user@0f7cccd70f7b:~/deepstate/build/examples$ deepstate-eclipser ./Runlen --timeout 30 -user@0f7cccd70f7b:~/deepstate/build/examples$ ./Runlen_LF -max_total_time=30 -user@0f7cccd70f7b:~/deepstate/build/examples$ mkdir foo; echo foo > foo/foo -user@0f7cccd70f7b:~/deepstate/build/examples$ afl-fuzz -i foo -o afl_Runlen -- ./Runlen_AFL --input_test_file @@ --no_fork --abort_on_fail +user@a17bc44fd259:~/deepstate$ export DEEPSTATE_HOME="$HOME/deepstate" +user@a17bc44fd259:~/deepstate$ cd $DEEPSTATE_HOME/build/examples +user@a17bc44fd259:~/deepstate/build/examples$ deepstate-angr ./Runlen +user@a17bc44fd259:~/deepstate/build/examples$ mkdir tmp && deepstate-eclipser ./Runlen -o tmp --timeout 30 --fuzzer_out +user@a17bc44fd259:~/deepstate/build/examples$ cd $DEEPSTATE_HOME/build_libfuzzer/examples +user@a17bc44fd259:~/deepstate/build_libfuzzer/examples$ ./Runlen_LF -max_total_time=30 +user@a17bc44fd259:~/deepstate/build_libfuzzer/examples$ cd $DEEPSTATE_HOME/build_afl/examples +user@a17bc44fd259:~/deepstate/build_afl/examples$ mkdir foo && echo x > foo/x && mkdir afl_Runlen2 +user@a17bc44fd259:~/deepstate/build_afl/examples$ $AFL_HOME/afl-fuzz -i foo -o afl_Runlen -- ./Runlen_AFL --input_test_file @@ --no_fork --abort_on_fail +user@a17bc44fd259:~/deepstate/build_afl/examples$ deepstate-afl -o afl_Runlen2 ./Runlen_AFL ``` ## Usage @@ -293,68 +298,13 @@ CRITICAL: /Users/alex/deepstate/examples/Runlen.cpp(60): ORIGINAL: '91c499', ENC ERROR: Failed: Runlength_EncodeDecode ``` -If you're using the DeepState docker, it's easy to also try libFuzzer -and AFL on the Runlen example: - -```shell -mkdir libfuzzer_runlen -./Runlen_LF libfuzzer_runlen -max_total_time=30 -./Runlen --input_test_files_dir libfuzzer_runlen -``` - -And you'll see a number of failures, e.g.: -``` -WARNING: No test specified, defaulting to last test defined (Runlength_EncodeDecode) -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: '4af4aa', ENCODED: '4AaAfA4AaA', ROUNDTRIP: '4af4a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//9e266f6cb627ce3bb7d717a6e569ade6b3633f23 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaaaaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//d8fc60ccdd8f555c1858b9f0820f263e3d2b58ec failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: '4aaa', ENCODED: '4AaA', ROUNDTRIP: '4a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//3177c75208f2d35399842196dc8093243d5a8243 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//9842926af7ca0a8cca12604f945414f07b01e13d failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//85e53271e14006f0265921d02d4d736cdc580b0b failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaaaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//241cbd6dfb6e53c43c73b62f9384359091dcbf56 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//05a79f06cf3f67f726dae68d18a2290f6c9a50c9 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: '25aaaa', ENCODED: '2A5AaA', ROUNDTRIP: '25a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//419c3b754bacd6fc14ff9a932c5e2089d6dfcab5 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//bb589d0621e5472f470fa3425a234c74b1e202e8 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: '97aa', ENCODED: '9A7AaA', ROUNDTRIP: '97a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//ca61c43b0e3ff0a8eccf3136996c9f1d9bfd627c failed -INFO: Ran 16 tests; 10 tests failed -``` - -Using AFL is similarly easy: - -```shell -mkdir afl_seeds -echo "ok" >& seeds/seed -afl-fuzz -i seeds -o afl_runlen -- ./Runlen_AFL --input_test_file @@ --no_fork --abort_on_fail -``` - -You'll have to stop this with Ctrl-C. The `afl_runlen/crashes` -directory will contain crashing inputs AFL found. - ## Log Levels By default, DeepState is not very verbose about testing activity, -other than failing tests. The `--min_log_level` argument lowers the -threshold for output, with 0 = `DEBUG`, 1 = `TRACE` (output from the -tests, including from `printf`), 2 = INFO (DeepState messages, the default), 3 = `WARNING`, +other than failing tests. The `DEEPSTATE_LOG` environment variable +or the `--min_log_level` argument lowers the threshold for output, +with 0 = `DEBUG`, 1 = `TRACE` (output from the tests, including from `printf`), +2 = INFO (DeepState messages, the default), 3 = `WARNING`, 4 = `ERROR`, 5 = `EXTERNAL` (output from other programs such as libFuzzer), and 6 = `CRITICAL` messages. Lowering the `min_log_level` can be very useful for understanding what a DeepState harness is actually doing; @@ -389,35 +339,133 @@ replaying more than a few tests, it is highly recommended to add the `--no_fork` option on macOS, unless you need the added crash handling (that is, only when things aren't working without that option). -## Fuzzing with libFuzzer - -If you install clang 6.0 or later, and run `cmake` when you install -with the `DEEPSTATE_LIBFUZZER` environment variable defined, you can -generate tests using libFuzzer. Because both DeepState and libFuzzer -want to be `main`, this requires building a different executable for -libFuzzer. The `examples` directory shows how this can be done: just -compile with a libFuzzer-supporting clang, and add `-fsanitize=fuzzer` -as an option, and link to the right DeepState library -(`-ldeepstate_LF`). The -libFuzzer executable thus produced works like any other libFuzzer executable, and -the tests produced can be replayed using the normal DeepState executable. -For example, generating some tests of the `OneOf` example (up to 5,000 -runs), then running those tests to examine the results, would look -like: +## External fuzzers -```shell -mkdir OneOf_libFuzzer_corpus -./OneOf_LF -runs=5000 OneOf_libFuzzer_corpus -./OneOf --input_test_files_dir OneOf_libFuzzer_corpus +DeepState currently support five external fuzzers: +[libFuzzer](https://llvm.org/docs/LibFuzzer.html), +[AFL](http://lcamtuf.coredump.cx/afl), +[HonggFuzz](https://github.com/google/honggfuzz), +[Eclipser](https://github.com/SoftSec-KAIST/Eclipser) and +[Angora](https://github.com/AngoraFuzzer/Angora). + +To use one of them as DeepState backend, you need to: +* install it +* compile DeepState with it +* compile target test with it +* run executor with location of installed files provided + +To install the fuzzer follow instructions on appropriate webpage. + +To compile DeepState with the fuzzer, run `cmake` with +`-DDEEPSTATE_FUZZERNAME=on` (like `-DDEEPSTATE_AFL=on`) option and +`CC/CXX` variables set to the fuzzer's compiler. This will produce +library called `libdeepstate_FUZZERNAME.a`, which you may put to +standard location (`/usr/local/lib/`). + +To compile target test, use fuzzer's compiler and link with appropriate +DeepState library (`-ldeepstate_FUZZERNAME`). + +To provide location of fuzzer's executables to python executor you may: +* put the executables to some $PATH location +* export `FUZZERNAME_HOME` environment variable (like `ANGORA_HOME`) +with value set to the location of fuzzer's executables +* specify `--home_path` argument when running the executor + +All that, rather complicated setup may be simplified with Docker. +Just build the image (changing OS in `./docker/base/Dockerfile` if needed) +and use it with your project. All the fuzzers and evironment variables will be there. + +### Fuzzer executors usage + +Fuzzer executors (`deepstate-honggfuzz` etc.) are meant to be as uniform +as possible, thus making it easy to compile and run tests. + +Compilation: `deepstate-afl --compile_test ./SimpleCrash.cpp --out_test_name SimpleCrash` + +Run: `mkdir out && deepstate-afl --output_test_dir out ./SimpleCrash.afl` + +The only required arguments are location of output directory and the test. +Optional arguments: +``` +--input_seeds - location of directory with initial inputs +--max_input_size - maximal length of inputs +--exec_timeout - timeout for run on one input file +--timeout - timeout for whole fuzzing process +--fuzzer_out - use fuzzer output rather that deepstate (uniform) one +--mem_limit - memory limit for the fuzzer +--min_log_level - how much to log (0=DEBUG, 6=CRITICAL) +--blackbox - fuzz not-instrumented binary +--dictionary - file with words that may enhance fuzzing (fuzzer dependent format) +``` + +Each fuzzer creates following files/directories under output directory: ``` +* deepstate-stats.txt - some statistic parsed by executor +* fuzzer-output.txt - all stdout/stderr from the fuzzer +* PUSH_DIR - fuzzer will take (synchronize) additional inputs from here +* PULL_DIR - fuzzer will save produced inputs here (may be the same as PUSH_DIR) +* CRASH_DIR - fuzzer will save crashes here +``` + +Failed tests are treated as crashes when using fuzzer executors +(because of `--abort_on_fail` flag). + +Note that some fuzzers (notably AFL) requires input seeds. When not provided, +executor will create a dumb one, which may be not very efficient for fuzzing. + +Input files need to be smaller than the DeepState input size limit (8192 bytes), +which is the default limit in executors. But not all fuzzers support file size +limitation, so if your test cases grown too large, you may need to stop fuzzing +and minimalize them. + +Also, there should not be crash-producing files inside input seeds directory. + +Because AFL and other file-based fuzzers only rely on the DeepState +native test executable, they should (like DeepState's built-in simple +fuzzer) work fine on macOS and other Unix-like OSes. On macOS, you +will want to consider doing the work to use [persistent mode](http://lcamtuf.blogspot.com/2015/06/new-in-afl-persistent-mode.html), or even +running inside a VM, due to AFL (unless in persistent mode) relying +extensively on forks, which are very slow on macOS. + +#### AFL + +```bash +$ cd ./deepstate +$ mkdir -p build_afl && cd build_afl +$ export AFL_HOME="/afl-2.52b" +$ CXX="$AFL_HOME/afl-clang++" CC="$AFL_HOME/afl-clang" cmake -DDEEPSTATE_AFL=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_AFL.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/the_fuzzer/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +#### libFuzzer + +It is bundled into newer clang compilers. + +```bash +$ cd ./deepstate +$ mkdir -p build_libfuzzer && cd build_libfuzzer +$ CXX=clang++ CC=clang cmake -DDEEPSTATE_LIBFUZZER=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_LF.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes Use the `LIBFUZZER_WHICH_TEST` environment variable to control which test libFuzzer runs, using a fully qualified name (e.g., `Arithmetic_InvertibleMultiplication_CanFail`). By default, you get the first test defined (which works fine if there is only one test). -Obviously, libFuzzer may work better if you provide a non-empty -corpus, but fuzzing will work even without an initial corpus, unlike AFL. One hint when using libFuzzer is to avoid dynamically allocating memory during a test, if that memory would not be freed on a test @@ -443,6 +491,116 @@ you can see more about what DeepState under libFuzzer is doing by setting the `LIBFUZZER_LOUD` environment variable, and tell libFuzzer to stop upon finding a failing test using `LIBFUZZER_EXIT_ON_FAIL`. +#### HonggFuzz + +```bash +$ cd ./deepstate +$ mkdir -p build_honggfuzz && cd build_honggfuzz +$ export HONGGFUZZ_HOME="/honggfuzz" +$ CXX="$HONGGFUZZ_HOME/hfuzz_cc/hfuzz-clang++" CC="$HONGGFUZZ_HOME/hfuzz_cc/hfuzz-clang" cmake -DDEEPSTATE_HONGGFUZZ=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_HFUZZ.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +#### Eclipser + +Eclipser uses QEMU instrumentation and therefore doesn't require +special DeepState compilation. You should just use `libdeepstate.a` +(QEMU doesn't like special instrumentation). + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +#### Angora + +Angora uses two binaries for fuzzing, one with taint tracking information +and one without. So we need two deepstate libraries and will need to +compile each test two times. + +Angora also requires old version of llvm/clang (between 4.0.0 and 7.1.0). +Executor will need to find it, so you may want to put it under `$ANGORA_HOME/clang+llvm/`. + +```bash +# for deepstate compilation only +$ export PATH="/clang+llvm/bin:$PATH" +$ export LD_LIBRARY_PATH="/clang+llvm/lib:$LD_LIBRARY_PATH" + +$ cd ./deepstate +$ export ANGORA_HOME="/angora" +$ mkdir -p build_angora_taint && cd build_angora_taint +$ export USE_TRACK=1 +$ CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ +$ make -j4 -i # ignore errors, because Angora doesn't support 32bit builds \ +$ sudo cp ./libdeepstate_taint.a /usr/local/lib/ +$ cd ../ + +$ mkdir -p build_angora_fast && cd build_angora_fast +$ export USE_FAST=1 +$ CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ +$ make -j4 -i +$ sudo cp ./libdeepstate_fast.a /usr/local/lib/ +``` + +```bash +$ mv /clang+llvm $ANGORA_HOME/ +$ mkdir out +$ deepstate-angora --compile_test ./SimpleCrash.cpp --out_test_name SimpleCrash +$ deepstate-angora -o out ./SimpleCrash.taint.angora ./SimpleCrash.fast.angora +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/angora/queue +* CRASH_DIR - out/angora/crashes + + +### Replay + +To run saved inputs against some test, just run it with appropriate arguments: +``` +./SimpleCrash --abort_on_fail --input_test_files_dir ./out/output_afl/the_fuzzer/queue +``` +No need to use fuzzer specific compilation (so don't use `SimpleCrash_AFL` etc). + + +### Ensembler (fuzzers synchronization) + +You may run as many executors as you want (and have resources). But to synchronize +them, you need to specify `--sync_dir` option pointing to some shared directory. + +Each fuzzer will push produced test cases to that directory and pull from it as needed. + +Currently, there are some limitations in synchronization for the following fuzzers: +* Eclipser - needs to be restarted to use pulled test cases +* HonggFuzz - same as above +* Angora - pulled files need to have correct, AFL format (`id:00003`) and the id must +be greater that the biggest in Angora's local (pull) directory +* libFuzzer - stops fuzzing after first crash found, so there should be no crashes in `sync_dir` + + +## Which Fuzzer Should I Use? + +In fact, since DeepState supports libFuzzer, AFL, HonggFuzz, Angora and Eclipser, +a natural question is "which is the best fuzzer?" In +general, it depends! We suggest using them all, which DeepState makes +easy. libFuzzer is very fast, and sometimes the CMP breakdown it +provides is very useful; however, it's often bad at finding longer +paths where just covering nodes isn't helpful. AFL is still an +excellent general-purpose fuzzer, and often beats "improved" versions +over a range of programs. Finally, Eclipser has some tricks that let +it get traction in some cases where you might think only symbolic +execution (which wouldn't scale) could help. + + ## Test case reduction While tests generated by symbolic execution are likely to be highly @@ -518,77 +676,6 @@ settings, or even `--slowest` setting to try to reduce it further. Test case reduction should work on any OS. -## Fuzzing with AFL - -DeepState can also be used with a file-based fuzzer (e.g. AFL). If -you compile using `afl-clang++` and `afl-clang`, and link with -`-ldeepstate_AFL` when working with AFL. `deepstate-afl` then gives -you an easy front-end for running AFL. - -For example, to fuzz the `OneOf` -example, if we were in the `deepstate/build/examples` directory (and had -built an AFL executable for it), you -would do something like: - -```shell -deepstate-afl ./OneOf_afl -i corpus --output_test_dir afl_OneOf_out -``` - -where `corpus` contains at least one file to start fuzzing from. The -file needs to be smaller than the DeepState input size limit, but has -few other limitations (for AFL it should also not cause test -failure). The `abort_on_fail` flag makes DeepState crashes and failed -tests appear as crashes to the fuzzer. -To replay the tests from AFL: - -```shell -./OneOf --input_test_files_dir afl_OneOf_out/crashes -./OneOf --input_test_files_dir afl_OneOf_out/queue -``` - -Finally, if an example has more than one test, you need to specify, -with a fully qualified name (e.g., -`Arithmetic_InvertibleMultiplication_CanFail`), which test to run, -using the `--input_which_test` flag. By -default, DeepState will run the first test defined. - -Because AFL and other file-based fuzzers only rely on the DeepState -native test executable, they should (like DeepState's built-in simple -fuzzer) work fine on macOS and other Unix-like OSes. On macOS, you -will want to consider doing the work to use [persistent mode](http://lcamtuf.blogspot.com/2015/06/new-in-afl-persistent-mode.html), or even -running inside a VM, due to AFL (unless in persistent mode) relying -extensively on -forks, which are very slow on macOS. - -## Fuzzing with Eclipser - -[Eclipser](https://github.com/SoftSec-KAIST/Eclipser) is a powerful new fuzzer/grey-box concolic tool -with some of the advantages of symbolic execution, but with more scalability. DeepState supports Eclipser out of the box. To use it, you just need to - -- Install Eclipser as instructed at https://github.com/SoftSec-KAIST/Eclipser (you'll need to be on Linux) -- Set the `ECLIPSER_HOME` environment variable to where-ever you installed Eclipser (the root, above `build`) -- Make sure you compile your DeepState native without any sanitizers (QEMU, used by Eclipser, doesn't like them) - -After that, you can use Eclipser like this: - -`deepstate-eclipser --timeout --output_test_dir ` - -In our experience, Eclipser is quite effective, often better than -libFuzzer and sometimes better than AFL, despite having a much slower -test throughput than either. - -## Which Fuzzer Should I Use? - -In fact, since DeepState supports libFuzzer, AFL, and Eclipser (and -others), a natural question is "which is the best fuzzer?" In -general, it depends! We suggest using them all, which DeepState makes -easy. libFuzzer is very fast, and sometimes the CMP breakdown it -provides is very useful; however, it's often bad at finding longer -paths where just covering nodes isn't helpful. AFL is still an -excellent general-purpose fuzzer, and often beats "improved" versions -over a range of programs. Finally, Eclipser has some tricks that let -it get traction in some cases where you might think only symbolic -execution (which wouldn't scale) could help. ## Swarm Testing From 5560155028d59cfd8f024f513d2f7db2e1b18406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Mon, 17 Feb 2020 10:35:00 +0100 Subject: [PATCH 31/46] rebuild w/o cache --- .github/workflows/ci.yml | 48 +++++++++++++++++++++----------------- bin/deepstate/core/fuzz.py | 4 ++-- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fae3267d..9ddae741 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,28 +66,32 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: whoan/docker-build-with-cache-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - registry: docker.pkg.github.com - image_name: trailofbits/deepstate/deepstate-base - image_tag: latest - context: docker/base - dockerfile: Dockerfile - push_image_and_stages: true # because we run workflow on PRs - build_extra_args: "--tag deepstate-base" - - uses: whoan/docker-build-with-cache-action@v3 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - registry: docker.pkg.github.com - image_name: trailofbits/deepstate/deepstate - image_tag: latest - context: . - dockerfile: docker/Dockerfile - push_image_and_stages: true # because we run workflow on PRs - build_extra_args: "--cache-from=deepstate-base" + - name: docker without cache + run: | + docker build -t deepstate-base -f docker/base/Dockerfile docker/base + docker build -t deepstate --build-arg make_j=2 -f ./docker/Dockerfile . + # - uses: whoan/docker-build-with-cache-action@v3 + # with: + # username: ${{ secrets.DOCKER_USERNAME }} + # password: ${{ secrets.DOCKER_PASSWORD }} + # registry: docker.pkg.github.com + # image_name: trailofbits/deepstate/deepstate-base + # image_tag: latest + # context: docker/base + # dockerfile: Dockerfile + # push_image_and_stages: true # because we run workflow on PRs + # build_extra_args: "--tag deepstate-base" + # - uses: whoan/docker-build-with-cache-action@v3 + # with: + # username: ${{ secrets.DOCKER_USERNAME }} + # password: ${{ secrets.DOCKER_PASSWORD }} + # registry: docker.pkg.github.com + # image_name: trailofbits/deepstate/deepstate + # image_tag: latest + # context: . + # dockerfile: docker/Dockerfile + # push_image_and_stages: true # because we run workflow on PRs + # build_extra_args: "--cache-from=deepstate-base" - name: Test fuzzers run: | docker run -it deepstate bash -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index a3a184e7..096cea71 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -817,8 +817,8 @@ def do_restart(self): if not self.sync_dir: return False - if time.time() - self.start_time > 20: - return True + # if time.time() - self.start_time > 20: + # return True return False From 6661dc4607a3224f4f0d6081f8abc10694359a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Mon, 17 Feb 2020 12:30:53 +0100 Subject: [PATCH 32/46] split documentation --- README.md | 588 ++----------------------------------- docs/README.md | 5 + docs/basic_usage.md | 237 +++++++++++++++ docs/fuzzing.md | 290 ++++++++++++++++++ docs/swarm_testing.md | 45 +++ docs/symbolic_execution.md | 6 + docs/test_harness.md | 5 + 7 files changed, 607 insertions(+), 569 deletions(-) create mode 100644 docs/README.md create mode 100644 docs/basic_usage.md create mode 100644 docs/fuzzing.md create mode 100644 docs/swarm_testing.md create mode 100644 docs/symbolic_execution.md create mode 100644 docs/test_harness.md diff --git a/README.md b/README.md index a978cf2c..ea14b71f 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Runtime: - Python 3.6 (or newer) - Z3 (for the Manticore backend) -## Building on Ubuntu 16.04 (Xenial) +## Building on Ubuntu 18.04 (Bionic) First make sure you install [Python 3.6 or greater](https://askubuntu.com/a/865569). Then use this command line to install additional requirements and compile DeepState: @@ -78,6 +78,7 @@ git clone https://github.com/trailofbits/deepstate deepstate mkdir deepstate/build && cd deepstate/build cmake ../ make +sudo make install ``` ## Installing @@ -94,6 +95,9 @@ The `virtualenv`-enabled `$PATH` should now include two executables: `deepstate` If you try using Manticore, and it doesn't work, but you definitely have the latest Manticore installed, check the `.travis.yml` file. If that grabs a Manticore other than the master version, you can try using the version of Manticore we use in our CI tests. Sometimes Manticore makes a breaking change, and we are behind for a short time. + +## Installation testing + You can check your build using the test binaries that were (by default) built and emitted to `deepstate/build/examples`. For example, to use angr to symbolically execute the `IntegerOverflow` test harness with 4 workers, saving generated test cases in a directory called `out`, you would invoke: ```shell @@ -136,592 +140,38 @@ In the absence of an `--input_which_test` argument, DeepState defaults to the first-defined test. Run the native executable with the `--help` argument to see all DeepState options. -If you want to use DeepState in C/C++ code, you will likely want to run `sudo make install` from the `$DEEPSTATE/build` directory as well. The examples mentioned below (file system, databases) assume this has already been done. ### Docker You can also try out Deepstate with Docker, which is the easiest way to get all the fuzzers and tools up and running on any system. +The build may take about 40 minutes, because some fuzzers require us +building huge projects like QEMU or LLVM. + ```bash $ docker build -t deepstate-base -f docker/base/Dockerfile docker/base $ docker build -t deepstate --build-arg make_j=6 -f ./docker/Dockerfile . $ docker run -it deepstate bash -user@a17bc44fd259:~/deepstate$ export DEEPSTATE_HOME="$HOME/deepstate" -user@a17bc44fd259:~/deepstate$ cd $DEEPSTATE_HOME/build/examples +user@a17bc44fd259:~/deepstate$ cd build/examples user@a17bc44fd259:~/deepstate/build/examples$ deepstate-angr ./Runlen -user@a17bc44fd259:~/deepstate/build/examples$ mkdir tmp && deepstate-eclipser ./Runlen -o tmp --timeout 30 --fuzzer_out -user@a17bc44fd259:~/deepstate/build/examples$ cd $DEEPSTATE_HOME/build_libfuzzer/examples +user@a17bc44fd259:~/deepstate/build/examples$ mkdir tmp && deepstate-eclipser ./Runlen -o tmp --timeout 30 +user@a17bc44fd259:~/deepstate/build/examples$ cd ../../build_libfuzzer/examples user@a17bc44fd259:~/deepstate/build_libfuzzer/examples$ ./Runlen_LF -max_total_time=30 -user@a17bc44fd259:~/deepstate/build_libfuzzer/examples$ cd $DEEPSTATE_HOME/build_afl/examples +user@a17bc44fd259:~/deepstate/build_libfuzzer/examples$ cd ../../build_afl/examples user@a17bc44fd259:~/deepstate/build_afl/examples$ mkdir foo && echo x > foo/x && mkdir afl_Runlen2 -user@a17bc44fd259:~/deepstate/build_afl/examples$ $AFL_HOME/afl-fuzz -i foo -o afl_Runlen -- ./Runlen_AFL --input_test_file @@ --no_fork --abort_on_fail -user@a17bc44fd259:~/deepstate/build_afl/examples$ deepstate-afl -o afl_Runlen2 ./Runlen_AFL -``` - -## Usage - -DeepState consists of a static library, used to write test harnesses, -and command-line _executors_ written in Python. At this time, the best -documentation is in the [examples](/examples) and in our -[paper](https://agroce.github.io/bar18.pdf). A more extensive -example, using DeepState and libFuzzer to test a user-mode file -system, is available [here](https://github.com/agroce/testfs); in -particular the -[Tests.cpp](https://github.com/agroce/testfs/blob/master/Tests.cpp) -file and CMakeLists.txt show DeepState usage. Another extensive -example is a [differential tester that compares Google's leveldb and -Facebook's rocksdb](https://github.com/agroce/testleveldb). - -## Example Code - -```cpp -#include - -using namespace deepstate; - -/* Simple, buggy, run-length encoding that creates "human readable" - * encodings by adding 'A'-1 to the count, and splitting at 26. - * e.g., encode("aaabbbbbc") = "aCbEcA" since C=3 and E=5 */ - -char* encode(const char* input) { - unsigned int len = strlen(input); - char* encoded = (char*)malloc((len*2)+1); - int pos = 0; - if (len > 0) { - unsigned char last = input[0]; - int count = 1; - for (int i = 1; i < len; i++) { - if (((unsigned char)input[i] == last) && (count < 26)) - count++; - else { - encoded[pos++] = last; - encoded[pos++] = 64 + count; - last = (unsigned char)input[i]; - count = 1; - } - } - encoded[pos++] = last; - encoded[pos++] = 65; // Should be 64 + count - } - encoded[pos] = '\0'; - return encoded; -} - -char* decode(const char* output) { - unsigned int len = strlen(output); - char* decoded = (char*)malloc((len/2)*26); - int pos = 0; - for (int i = 0; i < len; i += 2) { - for (int j = 0; j < (output[i+1] - 64); j++) { - decoded[pos++] = output[i]; - } - } - decoded[pos] = '\0'; - return decoded; -} - -// Can be (much) higher (e.g., > 1024) if we're using fuzzing, not symbolic execution -#define MAX_STR_LEN 6 - -TEST(Runlength, BoringUnitTest) { - ASSERT_EQ(strcmp(encode(""), ""), 0); - ASSERT_EQ(strcmp(encode("a"), "aA"), 0); - ASSERT_EQ(strcmp(encode("aaabbbbbc"), "aCbEcA"), 0); -} - -TEST(Runlength, EncodeDecode) { - char* original = DeepState_CStrUpToLen(MAX_STR_LEN, "abcdef0123456789"); - char* encoded = encode(original); - ASSERT_LE(strlen(encoded), strlen(original)*2) << "Encoding is > length*2!"; - char* roundtrip = decode(encoded); - ASSERT_EQ(strncmp(roundtrip, original, MAX_STR_LEN), 0) << - "ORIGINAL: '" << original << "', ENCODED: '" << encoded << - "', ROUNDTRIP: '" << roundtrip << "'"; -} -``` - -The code above (which can be found -[here](https://github.com/trailofbits/deepstate/blob/master/examples/Runlen.cpp)) -shows an example of a DeepState test harness. Most of the code is -just the functions to be tested. Using DeepState to test them requires: - -- Including the DeepState C++ header and using the DeepState namespace - -- Defining at least one TEST, with names - -- Calling some DeepState APIs that produce data - - In this example, we see the `DeepState_CStrUpToLen` call tells - DeepState to produce a string that has up to `MAX_STR_LEN` - characters, chosen from those present in hex strings. - -- Optionally making some assertions about the correctness of the -results - - In `Runlen.cpp` this is the `ASSERT_LE` and `ASSERT_EQ` checks. - - In the absence of any properties to check, DeepState can still - look for memory safety violations, crashes, and other general - categories of undesirable behavior, like any fuzzer. - -DeepState will also run the "BoringUnitTest," but it (like a -traditional hand-written unit test) is simply a test of fixed inputs -devised by a programmer. These inputs do not expose the bug in -`encode`. Nor do the default values (all zero bytes) for the DeepState test: - -``` -~/deepstate/build/examples$ ./Runlen -TRACE: Running: Runlength_EncodeDecode from /Users/alex/deepstate/examples/Runlen.cpp(55) -TRACE: Passed: Runlength_EncodeDecode -TRACE: Running: Runlength_BoringUnitTest from /Users/alex/deepstate/examples/Runlen.cpp(49) -TRACE: Passed: Runlength_BoringUnitTest -``` - -Using DeepState, however, it is easy to find the bug. Just -go into the `$DEEPSTATE/build/examples` directory and try: - -```shell -deepstate-angr ./Runlen -``` - -or - -```shell -./Runlen --fuzz --exit_on_fail -``` - -The fuzzer will output something like: - -``` -INFO: Starting fuzzing -WARNING: No seed provided; using 1546631311 -WARNING: No test specified, defaulting to last test defined (Runlength_EncodeDecode) -CRITICAL: /Users/alex/deepstate/examples/Runlen.cpp(60): ORIGINAL: '91c499', ENCODED: '9A1AcA4A9A', ROUNDTRIP: '91c49' -ERROR: Failed: Runlength_EncodeDecode -``` - -## Log Levels - -By default, DeepState is not very verbose about testing activity, -other than failing tests. The `DEEPSTATE_LOG` environment variable -or the `--min_log_level` argument lowers the threshold for output, -with 0 = `DEBUG`, 1 = `TRACE` (output from the tests, including from `printf`), -2 = INFO (DeepState messages, the default), 3 = `WARNING`, -4 = `ERROR`, 5 = `EXTERNAL` (output from other programs such as -libFuzzer), and 6 = `CRITICAL` messages. Lowering the `min_log_level` can be very -useful for understanding what a DeepState harness is actually doing; -often, setting `--min_log_level 1` in either fuzzing or symbolic -execution will give sufficient information to debug your test harness. - - -## Built-In Fuzzer - -Every DeepState executable provides a simple built-in fuzzer that -generates tests using completely random data. Using this fuzzer is as -simple as calling the native executable with the `--fuzz` argument. -The fuzzer also takes a `seed` and `timeout` (default of two minutes) -to control the fuzzing. By default fuzzing saves -only failing and crashing tests, and these only when given an output -directory. If you want to actually save the test cases -generated, you need to add a `--output_test_dir` argument to tell -DeepState where to put the generated tests, and if you want the -(totally random and unlikely to be high-quality) passing tests, you -need to add `--fuzz_save_passing`. - -Note that while symbolic execution only works on Linux, without a -fairly complex cross-compilation process, the brute force fuzzer works -on macOS or (as far as we know) any Unix-like system. - -## A Note on MacOS and Forking - -Normally, when running a test for replay or fuzzing, DeepState forks -in order to cleanly handle crashes of a test. Unfortunately, `fork()` -on macOS is _extremely_ slow. When using the built-in fuzzer or -replaying more than a few tests, it is highly recommended to add the `--no_fork` -option on macOS, unless you need the added crash handling (that is, -only when things aren't working without that option). - -## External fuzzers - -DeepState currently support five external fuzzers: -[libFuzzer](https://llvm.org/docs/LibFuzzer.html), -[AFL](http://lcamtuf.coredump.cx/afl), -[HonggFuzz](https://github.com/google/honggfuzz), -[Eclipser](https://github.com/SoftSec-KAIST/Eclipser) and -[Angora](https://github.com/AngoraFuzzer/Angora). - -To use one of them as DeepState backend, you need to: -* install it -* compile DeepState with it -* compile target test with it -* run executor with location of installed files provided - -To install the fuzzer follow instructions on appropriate webpage. - -To compile DeepState with the fuzzer, run `cmake` with -`-DDEEPSTATE_FUZZERNAME=on` (like `-DDEEPSTATE_AFL=on`) option and -`CC/CXX` variables set to the fuzzer's compiler. This will produce -library called `libdeepstate_FUZZERNAME.a`, which you may put to -standard location (`/usr/local/lib/`). - -To compile target test, use fuzzer's compiler and link with appropriate -DeepState library (`-ldeepstate_FUZZERNAME`). - -To provide location of fuzzer's executables to python executor you may: -* put the executables to some $PATH location -* export `FUZZERNAME_HOME` environment variable (like `ANGORA_HOME`) -with value set to the location of fuzzer's executables -* specify `--home_path` argument when running the executor - -All that, rather complicated setup may be simplified with Docker. -Just build the image (changing OS in `./docker/base/Dockerfile` if needed) -and use it with your project. All the fuzzers and evironment variables will be there. - -### Fuzzer executors usage - -Fuzzer executors (`deepstate-honggfuzz` etc.) are meant to be as uniform -as possible, thus making it easy to compile and run tests. - -Compilation: `deepstate-afl --compile_test ./SimpleCrash.cpp --out_test_name SimpleCrash` - -Run: `mkdir out && deepstate-afl --output_test_dir out ./SimpleCrash.afl` - -The only required arguments are location of output directory and the test. -Optional arguments: -``` ---input_seeds - location of directory with initial inputs ---max_input_size - maximal length of inputs ---exec_timeout - timeout for run on one input file ---timeout - timeout for whole fuzzing process ---fuzzer_out - use fuzzer output rather that deepstate (uniform) one ---mem_limit - memory limit for the fuzzer ---min_log_level - how much to log (0=DEBUG, 6=CRITICAL) ---blackbox - fuzz not-instrumented binary ---dictionary - file with words that may enhance fuzzing (fuzzer dependent format) -``` - -Each fuzzer creates following files/directories under output directory: -``` -* deepstate-stats.txt - some statistic parsed by executor -* fuzzer-output.txt - all stdout/stderr from the fuzzer -* PUSH_DIR - fuzzer will take (synchronize) additional inputs from here -* PULL_DIR - fuzzer will save produced inputs here (may be the same as PUSH_DIR) -* CRASH_DIR - fuzzer will save crashes here -``` - -Failed tests are treated as crashes when using fuzzer executors -(because of `--abort_on_fail` flag). - -Note that some fuzzers (notably AFL) requires input seeds. When not provided, -executor will create a dumb one, which may be not very efficient for fuzzing. - -Input files need to be smaller than the DeepState input size limit (8192 bytes), -which is the default limit in executors. But not all fuzzers support file size -limitation, so if your test cases grown too large, you may need to stop fuzzing -and minimalize them. - -Also, there should not be crash-producing files inside input seeds directory. - -Because AFL and other file-based fuzzers only rely on the DeepState -native test executable, they should (like DeepState's built-in simple -fuzzer) work fine on macOS and other Unix-like OSes. On macOS, you -will want to consider doing the work to use [persistent mode](http://lcamtuf.blogspot.com/2015/06/new-in-afl-persistent-mode.html), or even -running inside a VM, due to AFL (unless in persistent mode) relying -extensively on forks, which are very slow on macOS. - -#### AFL - -```bash -$ cd ./deepstate -$ mkdir -p build_afl && cd build_afl -$ export AFL_HOME="/afl-2.52b" -$ CXX="$AFL_HOME/afl-clang++" CC="$AFL_HOME/afl-clang" cmake -DDEEPSTATE_AFL=ON ../ -$ make -j4 -$ sudo cp ./libdeepstate_AFL.a /usr/local/lib/ -``` - -Dirs: -* PUSH_DIR - out/sync_dir/queue -* PULL_DIR - out/the_fuzzer/queue -* CRASH_DIR - out/the_fuzzer/crashes - - -#### libFuzzer - -It is bundled into newer clang compilers. - -```bash -$ cd ./deepstate -$ mkdir -p build_libfuzzer && cd build_libfuzzer -$ CXX=clang++ CC=clang cmake -DDEEPSTATE_LIBFUZZER=ON ../ -$ make -j4 -$ sudo cp ./libdeepstate_LF.a /usr/local/lib/ -``` - -Dirs: -* PUSH_DIR - out/sync_dir/queue -* PULL_DIR - out/sync_dir/queue -* CRASH_DIR - out/the_fuzzer/crashes - -Use the `LIBFUZZER_WHICH_TEST` -environment variable to control which test libFuzzer runs, using a -fully qualified name (e.g., -`Arithmetic_InvertibleMultiplication_CanFail`). By default, you get -the first test defined (which works fine if there is only one test). - -One hint when using libFuzzer is to avoid dynamically allocating -memory during a test, if that memory would not be freed on a test -failure. This will leak memory and libFuzzer will run out of memory -very quickly in each fuzzing session. Using libFuzzer on macOS -requires compiling DeepState and your program with a clang that -supports libFuzzer (which the Apple built-in probably won't); this can be as simple as doing: - -```shell -brew install llvm@7 -CC=/usr/local/opt/llvm\@7/bin/clang CXX=/usr/local/opt/llvm\@7/bin/clang++ DEEPSTATE_LIBFUZZER=TRUE cmake .. -make install -``` - -Other ways of getting an appropriate LLVM may also work. - -On macOS, libFuzzer's normal output is not visible. Because libFuzzer -does not fork to execute tests, there is no issue with fork speed on -macOS for this kind of fuzzing. - -On any platform, -you can see more about what DeepState under libFuzzer is doing by -setting the `LIBFUZZER_LOUD` environment variable, and tell libFuzzer -to stop upon finding a failing test using `LIBFUZZER_EXIT_ON_FAIL`. - -#### HonggFuzz - -```bash -$ cd ./deepstate -$ mkdir -p build_honggfuzz && cd build_honggfuzz -$ export HONGGFUZZ_HOME="/honggfuzz" -$ CXX="$HONGGFUZZ_HOME/hfuzz_cc/hfuzz-clang++" CC="$HONGGFUZZ_HOME/hfuzz_cc/hfuzz-clang" cmake -DDEEPSTATE_HONGGFUZZ=ON ../ -$ make -j4 -$ sudo cp ./libdeepstate_HFUZZ.a /usr/local/lib/ -``` - -Dirs: -* PUSH_DIR - out/sync_dir/queue -* PULL_DIR - out/sync_dir/queue -* CRASH_DIR - out/the_fuzzer/crashes - - -#### Eclipser - -Eclipser uses QEMU instrumentation and therefore doesn't require -special DeepState compilation. You should just use `libdeepstate.a` -(QEMU doesn't like special instrumentation). - -Dirs: -* PUSH_DIR - out/sync_dir/queue -* PULL_DIR - out/sync_dir/queue -* CRASH_DIR - out/the_fuzzer/crashes - - -#### Angora - -Angora uses two binaries for fuzzing, one with taint tracking information -and one without. So we need two deepstate libraries and will need to -compile each test two times. - -Angora also requires old version of llvm/clang (between 4.0.0 and 7.1.0). -Executor will need to find it, so you may want to put it under `$ANGORA_HOME/clang+llvm/`. - -```bash -# for deepstate compilation only -$ export PATH="/clang+llvm/bin:$PATH" -$ export LD_LIBRARY_PATH="/clang+llvm/lib:$LD_LIBRARY_PATH" - -$ cd ./deepstate -$ export ANGORA_HOME="/angora" -$ mkdir -p build_angora_taint && cd build_angora_taint -$ export USE_TRACK=1 -$ CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ -$ make -j4 -i # ignore errors, because Angora doesn't support 32bit builds \ -$ sudo cp ./libdeepstate_taint.a /usr/local/lib/ -$ cd ../ - -$ mkdir -p build_angora_fast && cd build_angora_fast -$ export USE_FAST=1 -$ CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ -$ make -j4 -i -$ sudo cp ./libdeepstate_fast.a /usr/local/lib/ -``` - -```bash -$ mv /clang+llvm $ANGORA_HOME/ -$ mkdir out -$ deepstate-angora --compile_test ./SimpleCrash.cpp --out_test_name SimpleCrash -$ deepstate-angora -o out ./SimpleCrash.taint.angora ./SimpleCrash.fast.angora +user@a17bc44fd259:~/deepstate/build_afl/examples$ $AFL_HOME/afl-fuzz -i foo -o afl_Runlen -- ./Runlen_AFL --input_test_file @@ --no_fork --abort_on_fail +user@a17bc44fd259:~/deepstate/build_afl/examples$ deepstate-afl -o afl_Runlen2 ./Runlen_AFL --fuzzer_out ``` -Dirs: -* PUSH_DIR - out/sync_dir/queue -* PULL_DIR - out/angora/queue -* CRASH_DIR - out/angora/crashes - +### Documentation -### Replay +Check out [docs](/docs) folder: -To run saved inputs against some test, just run it with appropriate arguments: -``` -./SimpleCrash --abort_on_fail --input_test_files_dir ./out/output_afl/the_fuzzer/queue -``` -No need to use fuzzer specific compilation (so don't use `SimpleCrash_AFL` etc). - - -### Ensembler (fuzzers synchronization) - -You may run as many executors as you want (and have resources). But to synchronize -them, you need to specify `--sync_dir` option pointing to some shared directory. - -Each fuzzer will push produced test cases to that directory and pull from it as needed. - -Currently, there are some limitations in synchronization for the following fuzzers: -* Eclipser - needs to be restarted to use pulled test cases -* HonggFuzz - same as above -* Angora - pulled files need to have correct, AFL format (`id:00003`) and the id must -be greater that the biggest in Angora's local (pull) directory -* libFuzzer - stops fuzzing after first crash found, so there should be no crashes in `sync_dir` - - -## Which Fuzzer Should I Use? - -In fact, since DeepState supports libFuzzer, AFL, HonggFuzz, Angora and Eclipser, -a natural question is "which is the best fuzzer?" In -general, it depends! We suggest using them all, which DeepState makes -easy. libFuzzer is very fast, and sometimes the CMP breakdown it -provides is very useful; however, it's often bad at finding longer -paths where just covering nodes isn't helpful. AFL is still an -excellent general-purpose fuzzer, and often beats "improved" versions -over a range of programs. Finally, Eclipser has some tricks that let -it get traction in some cases where you might think only symbolic -execution (which wouldn't scale) could help. - - -## Test case reduction - -While tests generated by symbolic execution are likely to be highly -concise already, fuzzer-generated tests may be much larger than they -need to be. - -DeepState provides a test case reducer to shrink tests intelligently, -aware of the structure of a DeepState test. For example, if your -executable is named `TestFileSystem` and the test you want to reduce -is named `rmdirfail.test` you would use it like this: - -```shell -deepstate-reduce ./TestFileSystem create.test mincreate.test -``` - -In many cases, this will result in finding a different failure or -crash that allows smaller test cases, so you can also provide a string -that controls the criterion for which test outputs are considered valid -reductions (by default, the reducer looks for any test that fails or -crashes). Only outputs containing the `--criterion` are considered to -be valid reductions (`--regexpCriterion` lets you use a Python regexp -for more complex checks): - -```shell -deepstate-reduce ./TestFileSystem create.test mincreate.test --criteria "Assertion failed: ((testfs_inode_get_type(in) == I_FILE)" -``` - -The output will look something like: - -``` -Original test has 8192 bytes -Applied 128 range conversions -Last byte read: 527 -Shrinking to ignore unread bytes -Writing reduced test with 528 bytes to rnew -================================================================================ -Iteration #1 0.39 secs / 2 execs / 0.0% reduction -Structured deletion reduced test to 520 bytes -Writing reduced test with 520 bytes to rnew -0.77 secs / 3 execs / 1.52% reduction - -... - -Structured swap: PASS FINISHED IN 0.01 SECONDS, RUN: 5.1 secs / 151 execs / 97.54% reduction -Reduced byte 12 from 4 to 1 -Writing reduced test with 13 bytes to rnew -5.35 secs / 169 execs / 97.54% reduction -================================================================================ -Byte reduce: PASS FINISHED IN 0.5 SECONDS, RUN: 5.6 secs / 186 execs / 97.54% reduction -================================================================================ -Iteration #2 5.6 secs / 186 execs / 97.54% reduction -Structured deletion: PASS FINISHED IN 0.03 SECONDS, RUN: 5.62 secs / 188 execs / 97.54% reduction -Structured edge deletion: PASS FINISHED IN 0.03 SECONDS, RUN: 5.65 secs / 190 execs / 97.54% reduction -1-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 5.84 secs / 203 execs / 97.54% reduction -4-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 6.03 secs / 216 execs / 97.54% reduction -8-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 6.22 secs / 229 execs / 97.54% reduction -1-byte reduce and delete: PASS FINISHED IN 0.04 SECONDS, RUN: 6.26 secs / 232 execs / 97.54% reduction -4-byte reduce and delete: PASS FINISHED IN 0.03 SECONDS, RUN: 6.29 secs / 234 execs / 97.54% reduction -8-byte reduce and delete: PASS FINISHED IN 0.01 SECONDS, RUN: 6.31 secs / 235 execs / 97.54% reduction -Byte range removal: PASS FINISHED IN 0.76 SECONDS, RUN: 7.06 secs / 287 execs / 97.54% reduction -Structured swap: PASS FINISHED IN 0.01 SECONDS, RUN: 7.08 secs / 288 execs / 97.54% reduction -================================================================================ -Completed 2 iterations: 7.08 secs / 288 execs / 97.54% reduction -Padding test with 23 zeroes -Writing reduced test with 36 bytes to mincreate.test -``` +* [Basic usage](/docs/basic_usage.md) +* [Fuzzing](/docs/fuzzing.md) +* [Swarm testing](/docs/swarm_testing.md) -You can use `--which_test ` to specify which test to -run, as with the `--input_which_test` options to test replay. If you -find that test reduction is taking too long, you can try the `--fast` -option to get a quick-and-dirty reduction, and later use the default -settings, or even `--slowest` setting to try to reduce it further. - -Test case reduction should work on any OS. - - -## Swarm Testing - - [Swarm testing](https://agroce.github.io/issta12.pdf) is an approach - to test generation that [modifies the distributions of finite choices](https://blog.regehr.org/archives/591) - (e.g., string generation and `OneOf` choices of which functions to - call). It has a long history of improving compiler testing, and - usually (but not always) API testing. The Hypothesis Python testing - tool - [recently added swarm to its' stable of heuristics](https://github.com/HypothesisWorks/hypothesis/pull/2238). - -The basic idea is simple. Let's say we are generating tests of a -stack that overflows when a 64th item is pushed on the stack, due to a -typo in the overflow check. Our tests are -256 calls to push/pop/top/clear. Obviously the odds of getting 64 -pushes in a row, without popping or clearing, are very low (for a dumb -fuzzer, the odds are astronomically low). -Coverage-feedback and various byte-copying heuristics in AFL and -libFuzzer etc. can sometimes work around such problems, but in other, -more complex cases, they are stumped. Swarm testing "flips a coin" -before each test, and only includes API calls in the test if the coin -came up heads for that test. That means we just need some test to run -with heads for push and tails for pop and clear. - -DeepState supports fully automated swarm testing. Just compile your -harness with `-DDEEPSTATE_PURE_SWARM` and all your `OneOf`s _and_ -DeepState string generation functions will use swarm testing. This is -a huge help for the built-in fuzzer (for example, it more than doubles -the fault detection rate for the `Runlen` example above). Eclipser -can get "stuck" with swarm testing, but AFL and libFuzzer can -certainly sometimes benefit from swarm testing. There is also an option -`-DDEEPSTATE_MIXED_SWARM` that mixes swarm and regular generation. It -flips an additional coin for each potentially swarmable thing, and -decides to use swarm or not for that test. This can produce a mix of -swarm and regular generation that is unique to DeepState. If you -aren't finding any bugs using a harness that involves `OneOf` or -generating strings, it's a good idea to try both swarm methods before -declaring the code bug-free! There is another, more experimental, -swarm-like method, `-DDEEPSTATE_PROB_SWARM`, that is of possible interest. -Instead of pure binary inclusion/exclusion of choices, this varies the -actual distribution of choices. However, because this often ends up behaving -more like a non-swarm selection, it may not be as good at ferreting out -unusual behaviors due to extreme imbalance of choices. - -Note that tests produced under a particular swarm option are _not_ -binary compatible with other settings for swarm, due to the added coin flips. ## Contributing diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..307891b8 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,5 @@ +# Documentation + +* [Basic usage](/docs/basic_usage.md) +* [Fuzzing](/docs/fuzzing.md) +* [Swarm testing](/docs/swarm_testing.md) \ No newline at end of file diff --git a/docs/basic_usage.md b/docs/basic_usage.md new file mode 100644 index 00000000..6ac2f1a1 --- /dev/null +++ b/docs/basic_usage.md @@ -0,0 +1,237 @@ +# Basic usage + +DeepState consists of a static library, used to write test harnesses, +and command-line _executors_ written in Python. At this time, the best +documentation is in the [examples](/examples) and in our +[paper](https://agroce.github.io/bar18.pdf). A more extensive +example, using DeepState and libFuzzer to test a user-mode file +system, is available [here](https://github.com/agroce/testfs); in +particular the +[Tests.cpp](https://github.com/agroce/testfs/blob/master/Tests.cpp) +file and CMakeLists.txt show DeepState usage. Another extensive +example is a [differential tester that compares Google's leveldb and +Facebook's rocksdb](https://github.com/agroce/testleveldb). + +## Writing test harness + +```cpp +#include + +using namespace deepstate; + +/* Simple, buggy, run-length encoding that creates "human readable" + * encodings by adding 'A'-1 to the count, and splitting at 26. + * e.g., encode("aaabbbbbc") = "aCbEcA" since C=3 and E=5 */ + +char* encode(const char* input) { + unsigned int len = strlen(input); + char* encoded = (char*)malloc((len*2)+1); + int pos = 0; + if (len > 0) { + unsigned char last = input[0]; + int count = 1; + for (int i = 1; i < len; i++) { + if (((unsigned char)input[i] == last) && (count < 26)) + count++; + else { + encoded[pos++] = last; + encoded[pos++] = 64 + count; + last = (unsigned char)input[i]; + count = 1; + } + } + encoded[pos++] = last; + encoded[pos++] = 65; // Should be 64 + count + } + encoded[pos] = '\0'; + return encoded; +} + +char* decode(const char* output) { + unsigned int len = strlen(output); + char* decoded = (char*)malloc((len/2)*26); + int pos = 0; + for (int i = 0; i < len; i += 2) { + for (int j = 0; j < (output[i+1] - 64); j++) { + decoded[pos++] = output[i]; + } + } + decoded[pos] = '\0'; + return decoded; +} + +// Can be (much) higher (e.g., > 1024) if we're using fuzzing, not symbolic execution +#define MAX_STR_LEN 6 + +TEST(Runlength, BoringUnitTest) { + ASSERT_EQ(strcmp(encode(""), ""), 0); + ASSERT_EQ(strcmp(encode("a"), "aA"), 0); + ASSERT_EQ(strcmp(encode("aaabbbbbc"), "aCbEcA"), 0); +} + +TEST(Runlength, EncodeDecode) { + char* original = DeepState_CStrUpToLen(MAX_STR_LEN, "abcdef0123456789"); + char* encoded = encode(original); + ASSERT_LE(strlen(encoded), strlen(original)*2) << "Encoding is > length*2!"; + char* roundtrip = decode(encoded); + ASSERT_EQ(strncmp(roundtrip, original, MAX_STR_LEN), 0) << + "ORIGINAL: '" << original << "', ENCODED: '" << encoded << + "', ROUNDTRIP: '" << roundtrip << "'"; +} +``` + +The code above (which can be found +[here](https://github.com/trailofbits/deepstate/blob/master/examples/Runlen.cpp)) +shows an example of a DeepState test harness. Most of the code is +just the functions to be tested. Using DeepState to test them requires: + +- Including the DeepState C++ header and using the DeepState namespace + +- Defining at least one TEST, with names + +- Calling some DeepState APIs that produce data + - In this example, we see the `DeepState_CStrUpToLen` call tells + DeepState to produce a string that has up to `MAX_STR_LEN` + characters, chosen from those present in hex strings. + +- Optionally making some assertions about the correctness of the +results + - In `Runlen.cpp` this is the `ASSERT_LE` and `ASSERT_EQ` checks. + - In the absence of any properties to check, DeepState can still + look for memory safety violations, crashes, and other general + categories of undesirable behavior, like any fuzzer. + + +## Running the test + +``` +~/deepstate/build/examples$ ./Runlen +TRACE: Running: Runlength_EncodeDecode from /Users/alex/deepstate/examples/Runlen.cpp(55) +TRACE: Passed: Runlength_EncodeDecode +TRACE: Running: Runlength_BoringUnitTest from /Users/alex/deepstate/examples/Runlen.cpp(49) +TRACE: Passed: Runlength_BoringUnitTest +``` + +Executing DeepState executable will run the "BoringUnitTest" and "EncodeDecode". +The first one is like a traditional hand-written unit test and simply tests +fixed inputs devised by a programmer. The second one uses default (all zero bytes) +values. These inputs do not expose the bug in `encode`. + +Using DeepState, however, it is easy to find the bug. Just try: + +```shell +deepstate-angr ./Runlen --output_test_dir out +``` + +or + +```shell +./Runlen --fuzz --exit_on_fail --output_test_dir out +``` + +The fuzzer will output something like: + +``` +INFO: Starting fuzzing +WARNING: No seed provided; using 1546631311 +WARNING: No test specified, defaulting to last test defined (Runlength_EncodeDecode) +CRITICAL: /Users/alex/deepstate/examples/Runlen.cpp(60): ORIGINAL: '91c499', ENCODED: '9A1AcA4A9A', ROUNDTRIP: '91c49' +ERROR: Failed: Runlength_EncodeDecode +``` + + +## Tests replay + +To run saved inputs against some test, just run the executable with appropriate arguments: +```shell +./Runlen --input_test_dir ./out +``` + +## Test case reduction + +While tests generated by symbolic execution are likely to be highly +concise already, fuzzer-generated tests may be much larger than they +need to be. + +DeepState provides a test case reducer to shrink tests intelligently, +aware of the structure of a DeepState test. For example, if your +executable is named `TestFileSystem` and the test you want to reduce +is named `rmdirfail.test` you would use it like this: + +```shell +deepstate-reduce ./TestFileSystem create.test mincreate.test +``` + +In many cases, this will result in finding a different failure or +crash that allows smaller test cases, so you can also provide a string +that controls the criterion for which test outputs are considered valid +reductions (by default, the reducer looks for any test that fails or +crashes). Only outputs containing the `--criterion` are considered to +be valid reductions (`--regexpCriterion` lets you use a Python regexp +for more complex checks): + +```shell +deepstate-reduce ./TestFileSystem create.test mincreate.test --criteria "Assertion failed: ((testfs_inode_get_type(in) == I_FILE)" +``` + +The output will look something like: + +``` +Original test has 8192 bytes +Applied 128 range conversions +Last byte read: 527 +Shrinking to ignore unread bytes +Writing reduced test with 528 bytes to rnew +================================================================================ +Iteration #1 0.39 secs / 2 execs / 0.0% reduction +Structured deletion reduced test to 520 bytes +Writing reduced test with 520 bytes to rnew +0.77 secs / 3 execs / 1.52% reduction + +... + +Structured swap: PASS FINISHED IN 0.01 SECONDS, RUN: 5.1 secs / 151 execs / 97.54% reduction +Reduced byte 12 from 4 to 1 +Writing reduced test with 13 bytes to rnew +5.35 secs / 169 execs / 97.54% reduction +================================================================================ +Byte reduce: PASS FINISHED IN 0.5 SECONDS, RUN: 5.6 secs / 186 execs / 97.54% reduction +================================================================================ +Iteration #2 5.6 secs / 186 execs / 97.54% reduction +Structured deletion: PASS FINISHED IN 0.03 SECONDS, RUN: 5.62 secs / 188 execs / 97.54% reduction +Structured edge deletion: PASS FINISHED IN 0.03 SECONDS, RUN: 5.65 secs / 190 execs / 97.54% reduction +1-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 5.84 secs / 203 execs / 97.54% reduction +4-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 6.03 secs / 216 execs / 97.54% reduction +8-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 6.22 secs / 229 execs / 97.54% reduction +1-byte reduce and delete: PASS FINISHED IN 0.04 SECONDS, RUN: 6.26 secs / 232 execs / 97.54% reduction +4-byte reduce and delete: PASS FINISHED IN 0.03 SECONDS, RUN: 6.29 secs / 234 execs / 97.54% reduction +8-byte reduce and delete: PASS FINISHED IN 0.01 SECONDS, RUN: 6.31 secs / 235 execs / 97.54% reduction +Byte range removal: PASS FINISHED IN 0.76 SECONDS, RUN: 7.06 secs / 287 execs / 97.54% reduction +Structured swap: PASS FINISHED IN 0.01 SECONDS, RUN: 7.08 secs / 288 execs / 97.54% reduction +================================================================================ +Completed 2 iterations: 7.08 secs / 288 execs / 97.54% reduction +Padding test with 23 zeroes +Writing reduced test with 36 bytes to mincreate.test +``` + +You can use `--which_test ` to specify which test to +run, as with the `--input_which_test` options to test replay. If you +find that test reduction is taking too long, you can try the `--fast` +option to get a quick-and-dirty reduction, and later use the default +settings, or even `--slowest` setting to try to reduce it further. + +Test case reduction should work on any OS. + + +## Log Levels + +By default, DeepState is not very verbose about testing activity, +other than failing tests. The `DEEPSTATE_LOG` environment variable +or the `--min_log_level` argument lowers the threshold for output, +with 0 = `DEBUG`, 1 = `TRACE` (output from the tests, including from `printf`), +2 = INFO (DeepState messages, the default), 3 = `WARNING`, +4 = `ERROR`, 5 = `EXTERNAL` (output from other programs such as +libFuzzer), and 6 = `CRITICAL` messages. Lowering the `min_log_level` can be very +useful for understanding what a DeepState harness is actually doing; +often, setting `--min_log_level 1` in either fuzzing or symbolic +execution will give sufficient information to debug your test harness. \ No newline at end of file diff --git a/docs/fuzzing.md b/docs/fuzzing.md new file mode 100644 index 00000000..971b3fbc --- /dev/null +++ b/docs/fuzzing.md @@ -0,0 +1,290 @@ +# Built-In Fuzzer + +Every DeepState executable provides a simple built-in fuzzer that +generates tests using completely random data. Using this fuzzer is as +simple as calling the native executable with the `--fuzz` argument. +The fuzzer also takes a `seed` and `timeout` (default of two minutes) +to control the fuzzing. By default fuzzing saves +only failing and crashing tests, and these only when given an output +directory. If you want to actually save the test cases +generated, you need to add a `--output_test_dir` argument to tell +DeepState where to put the generated tests, and if you want the +(totally random and unlikely to be high-quality) passing tests, you +need to add `--fuzz_save_passing`. + +Note that while symbolic execution only works on Linux, without a +fairly complex cross-compilation process, the brute force fuzzer works +on macOS or (as far as we know) any Unix-like system. + +## A Note on MacOS and Forking + +Normally, when running a test for replay or fuzzing, DeepState forks +in order to cleanly handle crashes of a test. Unfortunately, `fork()` +on macOS is _extremely_ slow. When using the built-in fuzzer or +replaying more than a few tests, it is highly recommended to add the `--no_fork` +option on macOS, unless you need the added crash handling (that is, +only when things aren't working without that option). + +# External fuzzers + +DeepState currently support five external fuzzers: +[libFuzzer](https://llvm.org/docs/LibFuzzer.html), +[AFL](http://lcamtuf.coredump.cx/afl), +[HonggFuzz](https://github.com/google/honggfuzz), +[Eclipser](https://github.com/SoftSec-KAIST/Eclipser) and +[Angora](https://github.com/AngoraFuzzer/Angora). + +To use one of them as DeepState backend, you need to: +* install it +* compile DeepState with it +* compile target test with it +* run executor with location of installed files provided + +To install the fuzzer follow instructions on appropriate webpage. + +To compile DeepState with the fuzzer, run `cmake` with +`-DDEEPSTATE_FUZZERNAME=on` (like `-DDEEPSTATE_AFL=on`) option and +`CC/CXX` variables set to the fuzzer's compiler. This will produce +library called `libdeepstate_FUZZERNAME.a`, which you may put to +standard location (`/usr/local/lib/`). + +To compile target test, use fuzzer's compiler and link with appropriate +DeepState library (`-ldeepstate_FUZZERNAME`). + +To provide location of fuzzer's executables to python executor you may: +* put the executables to some $PATH location +* export `FUZZERNAME_HOME` environment variable (like `ANGORA_HOME`) +with value set to the location of fuzzer's executables +* specify `--home_path` argument when running the executor + +All that, rather complicated setup may be simplified with Docker. +Just build the image (changing OS in `./docker/base/Dockerfile` if needed) +and use it with your project. All the fuzzers and evironment variables will be there. + +## Fuzzer executors usage + +Fuzzer executors (`deepstate-honggfuzz` etc.) are meant to be as uniform +as possible, thus making it easy to compile and run tests. + +Compilation: `deepstate-afl --compile_test ./SimpleCrash.cpp --out_test_name SimpleCrash` + +Run: `mkdir out && deepstate-afl --output_test_dir out ./SimpleCrash.afl` + +The only required arguments are location of output directory and the test. +Optional arguments: +``` +--input_seeds - location of directory with initial inputs +--max_input_size - maximal length of inputs +--exec_timeout - timeout for run on one input file +--timeout - timeout for whole fuzzing process +--fuzzer_out - use fuzzer output rather that deepstate (uniform) one +--mem_limit - memory limit for the fuzzer +--min_log_level - how much to log (0=DEBUG, 6=CRITICAL) +--blackbox - fuzz not-instrumented binary +--dictionary - file with words that may enhance fuzzing (fuzzer dependent format) +``` + +Each fuzzer creates following files/directories under output directory: +``` +* deepstate-stats.txt - some statistic parsed by executor +* fuzzer-output.txt - all stdout/stderr from the fuzzer +* PUSH_DIR - fuzzer will take (synchronize) additional inputs from here +* PULL_DIR - fuzzer will save produced inputs here (may be the same as PUSH_DIR) +* CRASH_DIR - fuzzer will save crashes here +``` + +Failed tests are treated as crashes when using fuzzer executors +(because of `--abort_on_fail` flag). + +Note that some fuzzers (notably AFL) requires input seeds. When not provided, +executor will create a dumb one, which may be not very efficient for fuzzing. + +Input files need to be smaller than the DeepState input size limit (8192 bytes), +which is the default limit in executors. But not all fuzzers support file size +limitation, so if your test cases grown too large, you may need to stop fuzzing +and minimalize them. + +Also, there should not be crash-producing files inside input seeds directory. + +Because AFL and other file-based fuzzers only rely on the DeepState +native test executable, they should (like DeepState's built-in simple +fuzzer) work fine on macOS and other Unix-like OSes. On macOS, you +will want to consider doing the work to use [persistent mode](http://lcamtuf.blogspot.com/2015/06/new-in-afl-persistent-mode.html), or even +running inside a VM, due to AFL (unless in persistent mode) relying +extensively on forks, which are very slow on macOS. + +### AFL + +```bash +$ cd ./deepstate +$ mkdir -p build_afl && cd build_afl +$ export AFL_HOME="/afl-2.52b" +$ CXX="$AFL_HOME/afl-clang++" CC="$AFL_HOME/afl-clang" cmake -DDEEPSTATE_AFL=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_AFL.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/the_fuzzer/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +### libFuzzer + +It is bundled into newer clang compilers. + +```bash +$ cd ./deepstate +$ mkdir -p build_libfuzzer && cd build_libfuzzer +$ CXX=clang++ CC=clang cmake -DDEEPSTATE_LIBFUZZER=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_LF.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes + +Use the `LIBFUZZER_WHICH_TEST` +environment variable to control which test libFuzzer runs, using a +fully qualified name (e.g., +`Arithmetic_InvertibleMultiplication_CanFail`). By default, you get +the first test defined (which works fine if there is only one test). + +One hint when using libFuzzer is to avoid dynamically allocating +memory during a test, if that memory would not be freed on a test +failure. This will leak memory and libFuzzer will run out of memory +very quickly in each fuzzing session. Using libFuzzer on macOS +requires compiling DeepState and your program with a clang that +supports libFuzzer (which the Apple built-in probably won't); this can be as simple as doing: + +```shell +brew install llvm@7 +CC=/usr/local/opt/llvm\@7/bin/clang CXX=/usr/local/opt/llvm\@7/bin/clang++ DEEPSTATE_LIBFUZZER=TRUE cmake .. +make install +``` + +Other ways of getting an appropriate LLVM may also work. + +On macOS, libFuzzer's normal output is not visible. Because libFuzzer +does not fork to execute tests, there is no issue with fork speed on +macOS for this kind of fuzzing. + +On any platform, +you can see more about what DeepState under libFuzzer is doing by +setting the `LIBFUZZER_LOUD` environment variable, and tell libFuzzer +to stop upon finding a failing test using `LIBFUZZER_EXIT_ON_FAIL`. + +### HonggFuzz + +```bash +$ cd ./deepstate +$ mkdir -p build_honggfuzz && cd build_honggfuzz +$ export HONGGFUZZ_HOME="/honggfuzz" +$ CXX="$HONGGFUZZ_HOME/hfuzz_cc/hfuzz-clang++" CC="$HONGGFUZZ_HOME/hfuzz_cc/hfuzz-clang" cmake -DDEEPSTATE_HONGGFUZZ=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_HFUZZ.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +### Eclipser + +Eclipser uses QEMU instrumentation and therefore doesn't require +special DeepState compilation. You should just use `libdeepstate.a` +(QEMU doesn't like special instrumentation). + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +### Angora + +Angora uses two binaries for fuzzing, one with taint tracking information +and one without. So we need two deepstate libraries and will need to +compile each test two times. + +Angora also requires old version of llvm/clang (between 4.0.0 and 7.1.0). +Executor will need to find it, so you may want to put it under `$ANGORA_HOME/clang+llvm/`. + +```bash +# for deepstate compilation only +$ export PATH="/clang+llvm/bin:$PATH" +$ export LD_LIBRARY_PATH="/clang+llvm/lib:$LD_LIBRARY_PATH" + +$ cd ./deepstate +$ export ANGORA_HOME="/angora" +$ mkdir -p build_angora_taint && cd build_angora_taint +$ export USE_TRACK=1 +$ CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ +$ make -j4 -i # ignore errors, because Angora doesn't support 32bit builds \ +$ sudo cp ./libdeepstate_taint.a /usr/local/lib/ +$ cd ../ + +$ mkdir -p build_angora_fast && cd build_angora_fast +$ export USE_FAST=1 +$ CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ +$ make -j4 -i +$ sudo cp ./libdeepstate_fast.a /usr/local/lib/ +``` + +```bash +$ mv /clang+llvm $ANGORA_HOME/ +$ mkdir out +$ deepstate-angora --compile_test ./SimpleCrash.cpp --out_test_name SimpleCrash +$ deepstate-angora -o out ./SimpleCrash.taint.angora ./SimpleCrash.fast.angora +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/angora/queue +* CRASH_DIR - out/angora/crashes + + +## Replay + +To run saved inputs against some test, just run it with appropriate arguments: + +``` +./Runlen --abort_on_fail --input_test_files_dir ./out/output_afl/the_fuzzer/queue +``` + +No need to use fuzzer specific compilation (so don't use `SimpleCrash_AFL` etc. +They are slower due to instrumentation). + + +## Ensembler (fuzzers synchronization) + +You may run as many executors as you want (and have resources). But to synchronize +them, you need to specify `--sync_dir` option pointing to some shared directory. + +Each fuzzer will push produced test cases to that directory and pull from it as needed. + +Currently, there are some limitations in synchronization for the following fuzzers: +* Eclipser - needs to be restarted to use pulled test cases +* HonggFuzz - same as above +* Angora - pulled files need to have correct, AFL format (`id:00003`) and the id must +be greater that the biggest in Angora's local (pull) directory +* libFuzzer - stops fuzzing after first crash found, so there should be no crashes in `sync_dir` + + +## Which Fuzzer Should I Use? + +In fact, since DeepState supports libFuzzer, AFL, HonggFuzz, Angora and Eclipser, +a natural question is "which is the best fuzzer?" In +general, it depends! We suggest using them all, which DeepState makes +easy. libFuzzer is very fast, and sometimes the CMP breakdown it +provides is very useful; however, it's often bad at finding longer +paths where just covering nodes isn't helpful. AFL is still an +excellent general-purpose fuzzer, and often beats "improved" versions +over a range of programs. Finally, Eclipser has some tricks that let +it get traction in some cases where you might think only symbolic +execution (which wouldn't scale) could help. diff --git a/docs/swarm_testing.md b/docs/swarm_testing.md new file mode 100644 index 00000000..df7be322 --- /dev/null +++ b/docs/swarm_testing.md @@ -0,0 +1,45 @@ +# Swarm Testing + +[Swarm testing](https://agroce.github.io/issta12.pdf) is an approach +to test generation that [modifies the distributions of finite choices](https://blog.regehr.org/archives/591) +(e.g., string generation and `OneOf` choices of which functions to +call). It has a long history of improving compiler testing, and +usually (but not always) API testing. The Hypothesis Python testing +tool +[recently added swarm to its' stable of heuristics](https://github.com/HypothesisWorks/hypothesis/pull/2238). + +The basic idea is simple. Let's say we are generating tests of a +stack that overflows when a 64th item is pushed on the stack, due to a +typo in the overflow check. Our tests are +256 calls to push/pop/top/clear. Obviously the odds of getting 64 +pushes in a row, without popping or clearing, are very low (for a dumb +fuzzer, the odds are astronomically low). +Coverage-feedback and various byte-copying heuristics in AFL and +libFuzzer etc. can sometimes work around such problems, but in other, +more complex cases, they are stumped. Swarm testing "flips a coin" +before each test, and only includes API calls in the test if the coin +came up heads for that test. That means we just need some test to run +with heads for push and tails for pop and clear. + +DeepState supports fully automated swarm testing. Just compile your +harness with `-DDEEPSTATE_PURE_SWARM` and all your `OneOf`s _and_ +DeepState string generation functions will use swarm testing. This is +a huge help for the built-in fuzzer (for example, it more than doubles +the fault detection rate for the `Runlen` example above). Eclipser +can get "stuck" with swarm testing, but AFL and libFuzzer can +certainly sometimes benefit from swarm testing. There is also an option +`-DDEEPSTATE_MIXED_SWARM` that mixes swarm and regular generation. It +flips an additional coin for each potentially swarmable thing, and +decides to use swarm or not for that test. This can produce a mix of +swarm and regular generation that is unique to DeepState. If you +aren't finding any bugs using a harness that involves `OneOf` or +generating strings, it's a good idea to try both swarm methods before +declaring the code bug-free! There is another, more experimental, +swarm-like method, `-DDEEPSTATE_PROB_SWARM`, that is of possible interest. +Instead of pure binary inclusion/exclusion of choices, this varies the +actual distribution of choices. However, because this often ends up behaving +more like a non-swarm selection, it may not be as good at ferreting out +unusual behaviors due to extreme imbalance of choices. + +Note that tests produced under a particular swarm option are _not_ +binary compatible with other settings for swarm, due to the added coin flips. diff --git a/docs/symbolic_execution.md b/docs/symbolic_execution.md new file mode 100644 index 00000000..8bd254d9 --- /dev/null +++ b/docs/symbolic_execution.md @@ -0,0 +1,6 @@ +# Symbolic execution + +TODO: +- something general about SE +- something about angr and manticore +- how DeepState integrates SE (simplified stuff from the paper) diff --git a/docs/test_harness.md b/docs/test_harness.md new file mode 100644 index 00000000..6f15dcee --- /dev/null +++ b/docs/test_harness.md @@ -0,0 +1,5 @@ +# Test harness + +TODO: +- how it works +- API - what methods can be used in the harness From e8940a4f554c2b65d4544264f526244bfc7ae8e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Mon, 17 Feb 2020 12:31:08 +0100 Subject: [PATCH 33/46] revert CI --- .github/workflows/ci.yml | 48 ++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9ddae741..2217be29 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,32 +66,28 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: docker without cache - run: | - docker build -t deepstate-base -f docker/base/Dockerfile docker/base - docker build -t deepstate --build-arg make_j=2 -f ./docker/Dockerfile . - # - uses: whoan/docker-build-with-cache-action@v3 - # with: - # username: ${{ secrets.DOCKER_USERNAME }} - # password: ${{ secrets.DOCKER_PASSWORD }} - # registry: docker.pkg.github.com - # image_name: trailofbits/deepstate/deepstate-base - # image_tag: latest - # context: docker/base - # dockerfile: Dockerfile - # push_image_and_stages: true # because we run workflow on PRs - # build_extra_args: "--tag deepstate-base" - # - uses: whoan/docker-build-with-cache-action@v3 - # with: - # username: ${{ secrets.DOCKER_USERNAME }} - # password: ${{ secrets.DOCKER_PASSWORD }} - # registry: docker.pkg.github.com - # image_name: trailofbits/deepstate/deepstate - # image_tag: latest - # context: . - # dockerfile: docker/Dockerfile - # push_image_and_stages: true # because we run workflow on PRs - # build_extra_args: "--cache-from=deepstate-base" + - uses: whoan/docker-build-with-cache-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + registry: docker.pkg.github.com + image_name: trailofbits/deepstate/deepstate-base + image_tag: latest + context: docker/base + dockerfile: Dockerfile + push_image_and_stages: true # because we run workflow on PRs + build_extra_args: "--tag deepstate-base" + - uses: whoan/docker-build-with-cache-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + registry: docker.pkg.github.com + image_name: trailofbits/deepstate/deepstate + image_tag: latest + context: . + dockerfile: docker/Dockerfile + push_image_and_stages: true # because we run workflow on PRs + build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=2" - name: Test fuzzers run: | docker run -it deepstate bash -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' From 286e4da5f1330c6465ab281e1e432b889d34f067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Mon, 17 Feb 2020 20:45:59 +0100 Subject: [PATCH 34/46] fix for symex str formatting --- bin/deepstate/core/symex.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/deepstate/core/symex.py b/bin/deepstate/core/symex.py index e17c51cc..bbfea5ab 100644 --- a/bin/deepstate/core/symex.py +++ b/bin/deepstate/core/symex.py @@ -296,6 +296,9 @@ def _stream_to_message(self, stream): data = struct.pack('BBBBBBBB', *val_bytes) val = struct.unpack(unpack_str, data[:struct.calcsize(unpack_str)])[0] + if type(val) == bytes: + val = val.decode('unicode_escape') + # Remove length specifiers that are not supported. format_str = format_str.replace('l', '') format_str = format_str.replace('h', '') From e8d271ece8e4bc98f2eaa9353bf28c8073a3e182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Mon, 17 Feb 2020 20:46:19 +0100 Subject: [PATCH 35/46] fix for CI fuzz test --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2217be29..2aa9e50f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,4 +90,7 @@ jobs: build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=2" - name: Test fuzzers run: | - docker run -it deepstate bash -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' + docker run -it deepstate bash -c 'sudo pip3 install nose && \ + echo core | sudo tee /proc/sys/kernel/core_pattern && \ + echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor && \ + nosetests tests/test_fuzzers.py' From fab17579b82bbabb476503fc9d93bea8e4843335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Tue, 18 Feb 2020 04:58:18 +0100 Subject: [PATCH 36/46] fixes in docker build --- docker/Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 70fe83ce..576eaa82 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -50,6 +50,7 @@ RUN sudo chown user:user -R ./deepstate WORKDIR ./deepstate # Angora part 2 +# ignore errors in `make`, because Angora doesn't support 32bit builds RUN echo 'Building deepstate with Angora - taint' \ && mkdir -p build_angora_taint && cd build_angora_taint \ && export PATH="$DEPS_DIR/angora/clang+llvm/bin:$PATH" \ @@ -57,7 +58,7 @@ RUN echo 'Building deepstate with Angora - taint' \ && export USE_TRACK=1 \ && export ANGORA_HOME="$DEPS_DIR/angora" \ && CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ \ - && make -j $make_j -i # ignore errors, because Angora doesn't support 32bit builds \ + && make -j $make_j -i \ && sudo cp ./libdeepstate_taint.a /usr/local/lib/ RUN echo 'Building deepstate with Angora - fast' \ @@ -95,6 +96,7 @@ RUN echo 'Building deepstate with AFL' \ # Honggfuzz COPY --from=Honggfuzz /home/user/honggfuzz $DEPS_DIR/honggfuzz +RUN sudo apt-get -y install libunwind-dev RUN echo 'Building deepstate with Honggfuzz' \ && mkdir -p build_honggfuzz && cd build_honggfuzz \ && export HONGGFUZZ_HOME="$DEPS_DIR/honggfuzz" \ From 5d5f83a9bd0d3ca18d063022d63f1dfaad4a2cba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Tue, 18 Feb 2020 04:58:30 +0100 Subject: [PATCH 37/46] fixes in docs --- docs/basic_usage.md | 7 ++++++- docs/fuzzing.md | 5 +++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/basic_usage.md b/docs/basic_usage.md index 6ac2f1a1..99c604a8 100644 --- a/docs/basic_usage.md +++ b/docs/basic_usage.md @@ -142,9 +142,14 @@ ERROR: Failed: Runlength_EncodeDecode ## Tests replay -To run saved inputs against some test, just run the executable with appropriate arguments: +To run saved inputs against the test, just run the executable with appropriate arguments: ```shell ./Runlen --input_test_dir ./out +INFO: Ran 0 tests for Runlength_BoringUnitTest; 0 tests failed +CRITICAL: /home/gros/studia/mgr/fuzzing/tools/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'abbbbb', ENCODED: 'aAbA', ROUNDTRIP: 'ab' +ERROR: Failed: Runlength_EncodeDecode +... +INFO: Ran 64 tests for Runlength_EncodeDecode; 31 tests failed ``` ## Test case reduction diff --git a/docs/fuzzing.md b/docs/fuzzing.md index 971b3fbc..bea46bf7 100644 --- a/docs/fuzzing.md +++ b/docs/fuzzing.md @@ -40,7 +40,8 @@ To use one of them as DeepState backend, you need to: * compile target test with it * run executor with location of installed files provided -To install the fuzzer follow instructions on appropriate webpage. +To install the fuzzer follow instructions on its website or +run Deepstate via Docker, as described in [README.md](/README.md) To compile DeepState with the fuzzer, run `cmake` with `-DDEEPSTATE_FUZZERNAME=on` (like `-DDEEPSTATE_AFL=on`) option and @@ -249,7 +250,7 @@ Dirs: * CRASH_DIR - out/angora/crashes -## Replay +## Tests replay To run saved inputs against some test, just run it with appropriate arguments: From b9700595b933f7fce171f29976eb18ed52fc0022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Tue, 18 Feb 2020 05:03:28 +0100 Subject: [PATCH 38/46] fix afl on MacOS --- bin/deepstate/executors/fuzz/afl.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index 5747496a..96da023a 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -70,17 +70,19 @@ def pre_exec(self): super().pre_exec() # check if core dump pattern is set as `core` - with open("/proc/sys/kernel/core_pattern") as f: - if not "core" in f.read(): - raise FuzzFrontendError("No core dump pattern set. Execute 'echo core | sudo tee /proc/sys/kernel/core_pattern'") + if os.path.isfile("/proc/sys/kernel/core_pattern"): + with open("/proc/sys/kernel/core_pattern") as f: + if not "core" in f.read(): + raise FuzzFrontendError("No core dump pattern set. Execute 'echo core | sudo tee /proc/sys/kernel/core_pattern'") # check if CPU scaling governor is set to `performance` - with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor") as f: - if not "perf" in f.read(4): - with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq") as f_min: - with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq") as f_max: - if f_min.read() != f_max.read(): - raise FuzzFrontendError("Suboptimal CPU scaling governor. Execute 'echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'") + if os.path.isfile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"): + with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor") as f: + if not "perf" in f.read(4): + with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq") as f_min: + with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq") as f_max: + if f_min.read() != f_max.read(): + raise FuzzFrontendError("Suboptimal CPU scaling governor. Execute 'echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'") # if we are in dumb mode and we are not using crash mode if 'n' in self.fuzzer_args and 'C' not in self.fuzzer_args: From e3b7690bba571b18e70cd287d027e8fbe86f0331 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Tue, 18 Feb 2020 11:38:22 +0100 Subject: [PATCH 39/46] clear registry cache --- .github/workflows/ci.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2aa9e50f..ef53892f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,6 +65,14 @@ jobs: container: runs-on: ubuntu-latest steps: + - name: Clear cache + run: | + docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} docker.pkg.github.com + docker images + docker rmi -f trailofbits/deepstate/deepstate-base + docker rmi -f trailofbits/deepstate/deepstate + docker images | grep trailofbits/deepstate/deepstate-base | tr -s ' ' | cut -d ' ' -f 2 | xargs -I {} docker rmi trailofbits/deepstate/deepstate-base:{} + docker images | grep trailofbits/deepstate/deepstate | tr -s ' ' | cut -d ' ' -f 2 | xargs -I {} docker rmi trailofbits/deepstate/deepstate:{} - uses: actions/checkout@v2 - uses: whoan/docker-build-with-cache-action@v3 with: From 3bf101da36c75cc0774a9b774683fc8421e37f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Tue, 18 Feb 2020 11:52:27 +0100 Subject: [PATCH 40/46] clear registry cache2 --- .github/workflows/ci.yml | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ef53892f..f20cc580 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,16 +65,8 @@ jobs: container: runs-on: ubuntu-latest steps: - - name: Clear cache - run: | - docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} docker.pkg.github.com - docker images - docker rmi -f trailofbits/deepstate/deepstate-base - docker rmi -f trailofbits/deepstate/deepstate - docker images | grep trailofbits/deepstate/deepstate-base | tr -s ' ' | cut -d ' ' -f 2 | xargs -I {} docker rmi trailofbits/deepstate/deepstate-base:{} - docker images | grep trailofbits/deepstate/deepstate | tr -s ' ' | cut -d ' ' -f 2 | xargs -I {} docker rmi trailofbits/deepstate/deepstate:{} - uses: actions/checkout@v2 - - uses: whoan/docker-build-with-cache-action@v3 + - uses: GrosQuildu/docker-build-with-cache-action@master with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} @@ -85,7 +77,7 @@ jobs: dockerfile: Dockerfile push_image_and_stages: true # because we run workflow on PRs build_extra_args: "--tag deepstate-base" - - uses: whoan/docker-build-with-cache-action@v3 + - uses: GrosQuildu/docker-build-with-cache-action@master with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} @@ -94,6 +86,7 @@ jobs: image_tag: latest context: . dockerfile: docker/Dockerfile + pull_image_and_stages: false push_image_and_stages: true # because we run workflow on PRs build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=2" - name: Test fuzzers From 0c14227348dc6acb3dbd5cfe8ef26ae5d7534073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Tue, 18 Feb 2020 20:05:11 +0100 Subject: [PATCH 41/46] fix docker angora --- docker/install_angora.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/install_angora.sh b/docker/install_angora.sh index dbe426cc..74ceb50f 100644 --- a/docker/install_angora.sh +++ b/docker/install_angora.sh @@ -6,6 +6,7 @@ sudo sed -i -- 's/#deb-src/deb-src/g' /etc/apt/sources.list sudo sed -i -- 's/# deb-src/deb-src/g' /etc/apt/sources.list # Install dependencies +sudo apt-get update sudo apt-get install -y rustc \ cargo libstdc++-7-dev zlib1g-dev \ && sudo rm -rf /var/lib/apt/lists/* From 18a134cbbb7967f119e94548d2ccbb432e41335d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Tue, 18 Feb 2020 23:40:10 +0100 Subject: [PATCH 42/46] fix CI fuzz tests --- .github/workflows/ci.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f20cc580..379ffdfb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,7 +66,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: GrosQuildu/docker-build-with-cache-action@master + - uses: whoan/docker-build-with-cache-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} @@ -77,7 +77,7 @@ jobs: dockerfile: Dockerfile push_image_and_stages: true # because we run workflow on PRs build_extra_args: "--tag deepstate-base" - - uses: GrosQuildu/docker-build-with-cache-action@master + - uses: whoan/docker-build-with-cache-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} @@ -86,12 +86,11 @@ jobs: image_tag: latest context: . dockerfile: docker/Dockerfile - pull_image_and_stages: false + # pull_image_and_stages: false # if OOM error push_image_and_stages: true # because we run workflow on PRs build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=2" - name: Test fuzzers run: | - docker run -it deepstate bash -c 'sudo pip3 install nose && \ - echo core | sudo tee /proc/sys/kernel/core_pattern && \ - echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor && \ - nosetests tests/test_fuzzers.py' + echo core | sudo tee /proc/sys/kernel/core_pattern + echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + docker run deepstate sh -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' From 40879e1372d9573f38ef3fb9f229ad66a7848b9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Tue, 18 Feb 2020 23:52:45 +0100 Subject: [PATCH 43/46] fix CI fuzz tests2 --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 379ffdfb..d85611ac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,5 +92,4 @@ jobs: - name: Test fuzzers run: | echo core | sudo tee /proc/sys/kernel/core_pattern - echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor docker run deepstate sh -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' From 7e193e92c812321b1bc9e9cf15721d1ce283eccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 19 Feb 2020 00:08:29 +0100 Subject: [PATCH 44/46] fix CI fuzz tests3 --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d85611ac..e581a663 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,4 +92,4 @@ jobs: - name: Test fuzzers run: | echo core | sudo tee /proc/sys/kernel/core_pattern - docker run deepstate sh -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' + docker run trailofbits/deepstate/deepstate sh -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' From 6e656cec8f46b11a7b17814f8e693b3408a37510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 19 Feb 2020 09:22:01 +0100 Subject: [PATCH 45/46] fix CI fuzz tests4 --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e581a663..9887a2fe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,4 +92,9 @@ jobs: - name: Test fuzzers run: | echo core | sudo tee /proc/sys/kernel/core_pattern + docker image ls + docker images ls + docker container ls + echo "${{ secrets.DOCKER_PASSWORD }}" | docker login --username ${{ secrets.DOCKER_USERNAME }} --password-stdin docker.pkg.github.com docker run trailofbits/deepstate/deepstate sh -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' + docker logout docker.pkg.github.com From eb7c9e65e8de1c24bfdaa46e8a2dfc5e125641c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20P=C5=82atek?= Date: Wed, 19 Feb 2020 09:38:18 +0100 Subject: [PATCH 46/46] fix CI fuzz tests5 --- .github/workflows/ci.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9887a2fe..7c766d8c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,9 +92,4 @@ jobs: - name: Test fuzzers run: | echo core | sudo tee /proc/sys/kernel/core_pattern - docker image ls - docker images ls - docker container ls - echo "${{ secrets.DOCKER_PASSWORD }}" | docker login --username ${{ secrets.DOCKER_USERNAME }} --password-stdin docker.pkg.github.com - docker run trailofbits/deepstate/deepstate sh -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' - docker logout docker.pkg.github.com + docker run docker.pkg.github.com/trailofbits/deepstate/deepstate sh -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py'