diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3703bfa4..7c766d8c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,5 +86,10 @@ jobs: image_tag: latest context: . dockerfile: docker/Dockerfile + # pull_image_and_stages: false # if OOM error push_image_and_stages: true # because we run workflow on PRs - build_extra_args: "--cache-from=deepstate-base" + build_extra_args: "--cache-from=deepstate-base --build-arg=make_j=2" + - name: Test fuzzers + run: | + echo core | sudo tee /proc/sys/kernel/core_pattern + docker run docker.pkg.github.com/trailofbits/deepstate/deepstate sh -c 'sudo pip3 install nose && nosetests tests/test_fuzzers.py' diff --git a/README.md b/README.md index aeac8d92..ea14b71f 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Runtime: - Python 3.6 (or newer) - Z3 (for the Manticore backend) -## Building on Ubuntu 16.04 (Xenial) +## Building on Ubuntu 18.04 (Bionic) First make sure you install [Python 3.6 or greater](https://askubuntu.com/a/865569). Then use this command line to install additional requirements and compile DeepState: @@ -78,6 +78,7 @@ git clone https://github.com/trailofbits/deepstate deepstate mkdir deepstate/build && cd deepstate/build cmake ../ make +sudo make install ``` ## Installing @@ -94,6 +95,9 @@ The `virtualenv`-enabled `$PATH` should now include two executables: `deepstate` If you try using Manticore, and it doesn't work, but you definitely have the latest Manticore installed, check the `.travis.yml` file. If that grabs a Manticore other than the master version, you can try using the version of Manticore we use in our CI tests. Sometimes Manticore makes a breaking change, and we are behind for a short time. + +## Installation testing + You can check your build using the test binaries that were (by default) built and emitted to `deepstate/build/examples`. For example, to use angr to symbolically execute the `IntegerOverflow` test harness with 4 workers, saving generated test cases in a directory called `out`, you would invoke: ```shell @@ -136,505 +140,38 @@ In the absence of an `--input_which_test` argument, DeepState defaults to the first-defined test. Run the native executable with the `--help` argument to see all DeepState options. -If you want to use DeepState in C/C++ code, you will likely want to run `sudo make install` from the `$DEEPSTATE/build` directory as well. The examples mentioned below (file system, databases) assume this has already been done. ### Docker You can also try out Deepstate with Docker, which is the easiest way to get all the fuzzers and tools up and running on any system. +The build may take about 40 minutes, because some fuzzers require us +building huge projects like QEMU or LLVM. + ```bash -$ docker build -t deepstate . -f docker/Dockerfile +$ docker build -t deepstate-base -f docker/base/Dockerfile docker/base +$ docker build -t deepstate --build-arg make_j=6 -f ./docker/Dockerfile . $ docker run -it deepstate bash -user@0f7cccd70f7b:~/deepstate/build/examples$ cd deepstate/build/examples -user@0f7cccd70f7b:~/deepstate/build/examples$ deepstate-angr ./Runlen -user@0f7cccd70f7b:~/deepstate/build/examples$ deepstate-eclipser ./Runlen --timeout 30 -user@0f7cccd70f7b:~/deepstate/build/examples$ ./Runlen_LF -max_total_time=30 -user@0f7cccd70f7b:~/deepstate/build/examples$ mkdir foo; echo foo > foo/foo -user@0f7cccd70f7b:~/deepstate/build/examples$ afl-fuzz -i foo -o afl_Runlen -- ./Runlen_AFL --input_test_file @@ --no_fork --abort_on_fail -``` - -## Usage - -DeepState consists of a static library, used to write test harnesses, -and command-line _executors_ written in Python. At this time, the best -documentation is in the [examples](/examples) and in our -[paper](https://agroce.github.io/bar18.pdf). A more extensive -example, using DeepState and libFuzzer to test a user-mode file -system, is available [here](https://github.com/agroce/testfs); in -particular the -[Tests.cpp](https://github.com/agroce/testfs/blob/master/Tests.cpp) -file and CMakeLists.txt show DeepState usage. Another extensive -example is a [differential tester that compares Google's leveldb and -Facebook's rocksdb](https://github.com/agroce/testleveldb). - -## Example Code - -```cpp -#include - -using namespace deepstate; - -/* Simple, buggy, run-length encoding that creates "human readable" - * encodings by adding 'A'-1 to the count, and splitting at 26. - * e.g., encode("aaabbbbbc") = "aCbEcA" since C=3 and E=5 */ - -char* encode(const char* input) { - unsigned int len = strlen(input); - char* encoded = (char*)malloc((len*2)+1); - int pos = 0; - if (len > 0) { - unsigned char last = input[0]; - int count = 1; - for (int i = 1; i < len; i++) { - if (((unsigned char)input[i] == last) && (count < 26)) - count++; - else { - encoded[pos++] = last; - encoded[pos++] = 64 + count; - last = (unsigned char)input[i]; - count = 1; - } - } - encoded[pos++] = last; - encoded[pos++] = 65; // Should be 64 + count - } - encoded[pos] = '\0'; - return encoded; -} - -char* decode(const char* output) { - unsigned int len = strlen(output); - char* decoded = (char*)malloc((len/2)*26); - int pos = 0; - for (int i = 0; i < len; i += 2) { - for (int j = 0; j < (output[i+1] - 64); j++) { - decoded[pos++] = output[i]; - } - } - decoded[pos] = '\0'; - return decoded; -} - -// Can be (much) higher (e.g., > 1024) if we're using fuzzing, not symbolic execution -#define MAX_STR_LEN 6 - -TEST(Runlength, BoringUnitTest) { - ASSERT_EQ(strcmp(encode(""), ""), 0); - ASSERT_EQ(strcmp(encode("a"), "aA"), 0); - ASSERT_EQ(strcmp(encode("aaabbbbbc"), "aCbEcA"), 0); -} - -TEST(Runlength, EncodeDecode) { - char* original = DeepState_CStrUpToLen(MAX_STR_LEN, "abcdef0123456789"); - char* encoded = encode(original); - ASSERT_LE(strlen(encoded), strlen(original)*2) << "Encoding is > length*2!"; - char* roundtrip = decode(encoded); - ASSERT_EQ(strncmp(roundtrip, original, MAX_STR_LEN), 0) << - "ORIGINAL: '" << original << "', ENCODED: '" << encoded << - "', ROUNDTRIP: '" << roundtrip << "'"; -} -``` - -The code above (which can be found -[here](https://github.com/trailofbits/deepstate/blob/master/examples/Runlen.cpp)) -shows an example of a DeepState test harness. Most of the code is -just the functions to be tested. Using DeepState to test them requires: - -- Including the DeepState C++ header and using the DeepState namespace - -- Defining at least one TEST, with names - -- Calling some DeepState APIs that produce data - - In this example, we see the `DeepState_CStrUpToLen` call tells - DeepState to produce a string that has up to `MAX_STR_LEN` - characters, chosen from those present in hex strings. - -- Optionally making some assertions about the correctness of the -results - - In `Runlen.cpp` this is the `ASSERT_LE` and `ASSERT_EQ` checks. - - In the absence of any properties to check, DeepState can still - look for memory safety violations, crashes, and other general - categories of undesirable behavior, like any fuzzer. - -DeepState will also run the "BoringUnitTest," but it (like a -traditional hand-written unit test) is simply a test of fixed inputs -devised by a programmer. These inputs do not expose the bug in -`encode`. Nor do the default values (all zero bytes) for the DeepState test: - -``` -~/deepstate/build/examples$ ./Runlen -TRACE: Running: Runlength_EncodeDecode from /Users/alex/deepstate/examples/Runlen.cpp(55) -TRACE: Passed: Runlength_EncodeDecode -TRACE: Running: Runlength_BoringUnitTest from /Users/alex/deepstate/examples/Runlen.cpp(49) -TRACE: Passed: Runlength_BoringUnitTest -``` - -Using DeepState, however, it is easy to find the bug. Just -go into the `$DEEPSTATE/build/examples` directory and try: - -```shell -deepstate-angr ./Runlen +user@a17bc44fd259:~/deepstate$ cd build/examples +user@a17bc44fd259:~/deepstate/build/examples$ deepstate-angr ./Runlen +user@a17bc44fd259:~/deepstate/build/examples$ mkdir tmp && deepstate-eclipser ./Runlen -o tmp --timeout 30 +user@a17bc44fd259:~/deepstate/build/examples$ cd ../../build_libfuzzer/examples +user@a17bc44fd259:~/deepstate/build_libfuzzer/examples$ ./Runlen_LF -max_total_time=30 +user@a17bc44fd259:~/deepstate/build_libfuzzer/examples$ cd ../../build_afl/examples +user@a17bc44fd259:~/deepstate/build_afl/examples$ mkdir foo && echo x > foo/x && mkdir afl_Runlen2 +user@a17bc44fd259:~/deepstate/build_afl/examples$ $AFL_HOME/afl-fuzz -i foo -o afl_Runlen -- ./Runlen_AFL --input_test_file @@ --no_fork --abort_on_fail +user@a17bc44fd259:~/deepstate/build_afl/examples$ deepstate-afl -o afl_Runlen2 ./Runlen_AFL --fuzzer_out ``` -or - -```shell -./Runlen --fuzz --exit_on_fail -``` - -The fuzzer will output something like: - -``` -INFO: Starting fuzzing -WARNING: No seed provided; using 1546631311 -WARNING: No test specified, defaulting to last test defined (Runlength_EncodeDecode) -CRITICAL: /Users/alex/deepstate/examples/Runlen.cpp(60): ORIGINAL: '91c499', ENCODED: '9A1AcA4A9A', ROUNDTRIP: '91c49' -ERROR: Failed: Runlength_EncodeDecode -``` - -If you're using the DeepState docker, it's easy to also try libFuzzer -and AFL on the Runlen example: - -```shell -mkdir libfuzzer_runlen -./Runlen_LF libfuzzer_runlen -max_total_time=30 -./Runlen --input_test_files_dir libfuzzer_runlen -``` - -And you'll see a number of failures, e.g.: -``` -WARNING: No test specified, defaulting to last test defined (Runlength_EncodeDecode) -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: '4af4aa', ENCODED: '4AaAfA4AaA', ROUNDTRIP: '4af4a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//9e266f6cb627ce3bb7d717a6e569ade6b3633f23 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaaaaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//d8fc60ccdd8f555c1858b9f0820f263e3d2b58ec failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: '4aaa', ENCODED: '4AaA', ROUNDTRIP: '4a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//3177c75208f2d35399842196dc8093243d5a8243 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//9842926af7ca0a8cca12604f945414f07b01e13d failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//85e53271e14006f0265921d02d4d736cdc580b0b failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaaaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//241cbd6dfb6e53c43c73b62f9384359091dcbf56 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//05a79f06cf3f67f726dae68d18a2290f6c9a50c9 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: '25aaaa', ENCODED: '2A5AaA', ROUNDTRIP: '25a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//419c3b754bacd6fc14ff9a932c5e2089d6dfcab5 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'aaaa', ENCODED: 'aA', ROUNDTRIP: 'a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//bb589d0621e5472f470fa3425a234c74b1e202e8 failed -CRITICAL: /home/user/deepstate/examples/Runlen.cpp(60): ORIGINAL: '97aa', ENCODED: '9A7AaA', ROUNDTRIP: '97a' -ERROR: Failed: Runlength_EncodeDecode -ERROR: Test case libfuzzer_runlen//ca61c43b0e3ff0a8eccf3136996c9f1d9bfd627c failed -INFO: Ran 16 tests; 10 tests failed -``` - -Using AFL is similarly easy: - -```shell -mkdir afl_seeds -echo "ok" >& seeds/seed -afl-fuzz -i seeds -o afl_runlen -- ./Runlen_AFL --input_test_file @@ --no_fork --abort_on_fail -``` +### Documentation -You'll have to stop this with Ctrl-C. The `afl_runlen/crashes` -directory will contain crashing inputs AFL found. - -## Log Levels - -By default, DeepState is not very verbose about testing activity, -other than failing tests. The `--min_log_level` argument lowers the -threshold for output, with 0 = `DEBUG`, 1 = `TRACE` (output from the -tests, including from `printf`), 2 = INFO (DeepState messages, the default), 3 = `WARNING`, -4 = `ERROR`, 5 = `EXTERNAL` (output from other programs such as -libFuzzer), and 6 = `CRITICAL` messages. Lowering the `min_log_level` can be very -useful for understanding what a DeepState harness is actually doing; -often, setting `--min_log_level 1` in either fuzzing or symbolic -execution will give sufficient information to debug your test harness. - - -## Built-In Fuzzer - -Every DeepState executable provides a simple built-in fuzzer that -generates tests using completely random data. Using this fuzzer is as -simple as calling the native executable with the `--fuzz` argument. -The fuzzer also takes a `seed` and `timeout` (default of two minutes) -to control the fuzzing. By default fuzzing saves -only failing and crashing tests, and these only when given an output -directory. If you want to actually save the test cases -generated, you need to add a `--output_test_dir` argument to tell -DeepState where to put the generated tests, and if you want the -(totally random and unlikely to be high-quality) passing tests, you -need to add `--fuzz_save_passing`. - -Note that while symbolic execution only works on Linux, without a -fairly complex cross-compilation process, the brute force fuzzer works -on macOS or (as far as we know) any Unix-like system. - -## A Note on MacOS and Forking - -Normally, when running a test for replay or fuzzing, DeepState forks -in order to cleanly handle crashes of a test. Unfortunately, `fork()` -on macOS is _extremely_ slow. When using the built-in fuzzer or -replaying more than a few tests, it is highly recommended to add the `--no_fork` -option on macOS, unless you need the added crash handling (that is, -only when things aren't working without that option). - -## Fuzzing with libFuzzer - -If you install clang 6.0 or later, and run `cmake` when you install -with the `DEEPSTATE_LIBFUZZER` environment variable defined, you can -generate tests using libFuzzer. Because both DeepState and libFuzzer -want to be `main`, this requires building a different executable for -libFuzzer. The `examples` directory shows how this can be done: just -compile with a libFuzzer-supporting clang, and add `-fsanitize=fuzzer` -as an option, and link to the right DeepState library -(`-ldeepstate_LF`). The -libFuzzer executable thus produced works like any other libFuzzer executable, and -the tests produced can be replayed using the normal DeepState executable. -For example, generating some tests of the `OneOf` example (up to 5,000 -runs), then running those tests to examine the results, would look -like: +Check out [docs](/docs) folder: -```shell -mkdir OneOf_libFuzzer_corpus -./OneOf_LF -runs=5000 OneOf_libFuzzer_corpus -./OneOf --input_test_files_dir OneOf_libFuzzer_corpus -``` - -Use the `LIBFUZZER_WHICH_TEST` -environment variable to control which test libFuzzer runs, using a -fully qualified name (e.g., -`Arithmetic_InvertibleMultiplication_CanFail`). By default, you get -the first test defined (which works fine if there is only one test). -Obviously, libFuzzer may work better if you provide a non-empty -corpus, but fuzzing will work even without an initial corpus, unlike AFL. - -One hint when using libFuzzer is to avoid dynamically allocating -memory during a test, if that memory would not be freed on a test -failure. This will leak memory and libFuzzer will run out of memory -very quickly in each fuzzing session. Using libFuzzer on macOS -requires compiling DeepState and your program with a clang that -supports libFuzzer (which the Apple built-in probably won't); this can be as simple as doing: - -```shell -brew install llvm@7 -CC=/usr/local/opt/llvm\@7/bin/clang CXX=/usr/local/opt/llvm\@7/bin/clang++ DEEPSTATE_LIBFUZZER=TRUE cmake .. -make install -``` - -Other ways of getting an appropriate LLVM may also work. - -On macOS, libFuzzer's normal output is not visible. Because libFuzzer -does not fork to execute tests, there is no issue with fork speed on -macOS for this kind of fuzzing. - -On any platform, -you can see more about what DeepState under libFuzzer is doing by -setting the `LIBFUZZER_LOUD` environment variable, and tell libFuzzer -to stop upon finding a failing test using `LIBFUZZER_EXIT_ON_FAIL`. - -## Test case reduction - -While tests generated by symbolic execution are likely to be highly -concise already, fuzzer-generated tests may be much larger than they -need to be. - -DeepState provides a test case reducer to shrink tests intelligently, -aware of the structure of a DeepState test. For example, if your -executable is named `TestFileSystem` and the test you want to reduce -is named `rmdirfail.test` you would use it like this: - -```shell -deepstate-reduce ./TestFileSystem create.test mincreate.test -``` - -In many cases, this will result in finding a different failure or -crash that allows smaller test cases, so you can also provide a string -that controls the criterion for which test outputs are considered valid -reductions (by default, the reducer looks for any test that fails or -crashes). Only outputs containing the `--criterion` are considered to -be valid reductions (`--regexpCriterion` lets you use a Python regexp -for more complex checks): - -```shell -deepstate-reduce ./TestFileSystem create.test mincreate.test --criteria "Assertion failed: ((testfs_inode_get_type(in) == I_FILE)" -``` - -The output will look something like: - -``` -Original test has 8192 bytes -Applied 128 range conversions -Last byte read: 527 -Shrinking to ignore unread bytes -Writing reduced test with 528 bytes to rnew -================================================================================ -Iteration #1 0.39 secs / 2 execs / 0.0% reduction -Structured deletion reduced test to 520 bytes -Writing reduced test with 520 bytes to rnew -0.77 secs / 3 execs / 1.52% reduction - -... - -Structured swap: PASS FINISHED IN 0.01 SECONDS, RUN: 5.1 secs / 151 execs / 97.54% reduction -Reduced byte 12 from 4 to 1 -Writing reduced test with 13 bytes to rnew -5.35 secs / 169 execs / 97.54% reduction -================================================================================ -Byte reduce: PASS FINISHED IN 0.5 SECONDS, RUN: 5.6 secs / 186 execs / 97.54% reduction -================================================================================ -Iteration #2 5.6 secs / 186 execs / 97.54% reduction -Structured deletion: PASS FINISHED IN 0.03 SECONDS, RUN: 5.62 secs / 188 execs / 97.54% reduction -Structured edge deletion: PASS FINISHED IN 0.03 SECONDS, RUN: 5.65 secs / 190 execs / 97.54% reduction -1-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 5.84 secs / 203 execs / 97.54% reduction -4-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 6.03 secs / 216 execs / 97.54% reduction -8-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 6.22 secs / 229 execs / 97.54% reduction -1-byte reduce and delete: PASS FINISHED IN 0.04 SECONDS, RUN: 6.26 secs / 232 execs / 97.54% reduction -4-byte reduce and delete: PASS FINISHED IN 0.03 SECONDS, RUN: 6.29 secs / 234 execs / 97.54% reduction -8-byte reduce and delete: PASS FINISHED IN 0.01 SECONDS, RUN: 6.31 secs / 235 execs / 97.54% reduction -Byte range removal: PASS FINISHED IN 0.76 SECONDS, RUN: 7.06 secs / 287 execs / 97.54% reduction -Structured swap: PASS FINISHED IN 0.01 SECONDS, RUN: 7.08 secs / 288 execs / 97.54% reduction -================================================================================ -Completed 2 iterations: 7.08 secs / 288 execs / 97.54% reduction -Padding test with 23 zeroes -Writing reduced test with 36 bytes to mincreate.test -``` - -You can use `--which_test ` to specify which test to -run, as with the `--input_which_test` options to test replay. If you -find that test reduction is taking too long, you can try the `--fast` -option to get a quick-and-dirty reduction, and later use the default -settings, or even `--slowest` setting to try to reduce it further. - -Test case reduction should work on any OS. - -## Fuzzing with AFL - -DeepState can also be used with a file-based fuzzer (e.g. AFL). If -you compile using `afl-clang++` and `afl-clang`, and link with -`-ldeepstate_AFL` when working with AFL. `deepstate-afl` then gives -you an easy front-end for running AFL. - -For example, to fuzz the `OneOf` -example, if we were in the `deepstate/build/examples` directory (and had -built an AFL executable for it), you -would do something like: - -```shell -deepstate-afl ./OneOf_afl -i corpus --output_test_dir afl_OneOf_out -``` - -where `corpus` contains at least one file to start fuzzing from. The -file needs to be smaller than the DeepState input size limit, but has -few other limitations (for AFL it should also not cause test -failure). The `abort_on_fail` flag makes DeepState crashes and failed -tests appear as crashes to the fuzzer. -To replay the tests from AFL: - -```shell -./OneOf --input_test_files_dir afl_OneOf_out/crashes -./OneOf --input_test_files_dir afl_OneOf_out/queue -``` +* [Basic usage](/docs/basic_usage.md) +* [Fuzzing](/docs/fuzzing.md) +* [Swarm testing](/docs/swarm_testing.md) -Finally, if an example has more than one test, you need to specify, -with a fully qualified name (e.g., -`Arithmetic_InvertibleMultiplication_CanFail`), which test to run, -using the `--input_which_test` flag. By -default, DeepState will run the first test defined. - -Because AFL and other file-based fuzzers only rely on the DeepState -native test executable, they should (like DeepState's built-in simple -fuzzer) work fine on macOS and other Unix-like OSes. On macOS, you -will want to consider doing the work to use [persistent mode](http://lcamtuf.blogspot.com/2015/06/new-in-afl-persistent-mode.html), or even -running inside a VM, due to AFL (unless in persistent mode) relying -extensively on -forks, which are very slow on macOS. - -## Fuzzing with Eclipser - -[Eclipser](https://github.com/SoftSec-KAIST/Eclipser) is a powerful new fuzzer/grey-box concolic tool -with some of the advantages of symbolic execution, but with more scalability. DeepState supports Eclipser out of the box. To use it, you just need to - -- Install Eclipser as instructed at https://github.com/SoftSec-KAIST/Eclipser (you'll need to be on Linux) -- Set the `ECLIPSER_HOME` environment variable to where-ever you installed Eclipser (the root, above `build`) -- Make sure you compile your DeepState native without any sanitizers (QEMU, used by Eclipser, doesn't like them) - -After that, you can use Eclipser like this: - -`deepstate-eclipser --timeout --output_test_dir ` - -In our experience, Eclipser is quite effective, often better than -libFuzzer and sometimes better than AFL, despite having a much slower -test throughput than either. - -## Which Fuzzer Should I Use? - -In fact, since DeepState supports libFuzzer, AFL, and Eclipser (and -others), a natural question is "which is the best fuzzer?" In -general, it depends! We suggest using them all, which DeepState makes -easy. libFuzzer is very fast, and sometimes the CMP breakdown it -provides is very useful; however, it's often bad at finding longer -paths where just covering nodes isn't helpful. AFL is still an -excellent general-purpose fuzzer, and often beats "improved" versions -over a range of programs. Finally, Eclipser has some tricks that let -it get traction in some cases where you might think only symbolic -execution (which wouldn't scale) could help. - -## Swarm Testing - - [Swarm testing](https://agroce.github.io/issta12.pdf) is an approach - to test generation that [modifies the distributions of finite choices](https://blog.regehr.org/archives/591) - (e.g., string generation and `OneOf` choices of which functions to - call). It has a long history of improving compiler testing, and - usually (but not always) API testing. The Hypothesis Python testing - tool - [recently added swarm to its' stable of heuristics](https://github.com/HypothesisWorks/hypothesis/pull/2238). - -The basic idea is simple. Let's say we are generating tests of a -stack that overflows when a 64th item is pushed on the stack, due to a -typo in the overflow check. Our tests are -256 calls to push/pop/top/clear. Obviously the odds of getting 64 -pushes in a row, without popping or clearing, are very low (for a dumb -fuzzer, the odds are astronomically low). -Coverage-feedback and various byte-copying heuristics in AFL and -libFuzzer etc. can sometimes work around such problems, but in other, -more complex cases, they are stumped. Swarm testing "flips a coin" -before each test, and only includes API calls in the test if the coin -came up heads for that test. That means we just need some test to run -with heads for push and tails for pop and clear. - -DeepState supports fully automated swarm testing. Just compile your -harness with `-DDEEPSTATE_PURE_SWARM` and all your `OneOf`s _and_ -DeepState string generation functions will use swarm testing. This is -a huge help for the built-in fuzzer (for example, it more than doubles -the fault detection rate for the `Runlen` example above). Eclipser -can get "stuck" with swarm testing, but AFL and libFuzzer can -certainly sometimes benefit from swarm testing. There is also an option -`-DDEEPSTATE_MIXED_SWARM` that mixes swarm and regular generation. It -flips an additional coin for each potentially swarmable thing, and -decides to use swarm or not for that test. This can produce a mix of -swarm and regular generation that is unique to DeepState. If you -aren't finding any bugs using a harness that involves `OneOf` or -generating strings, it's a good idea to try both swarm methods before -declaring the code bug-free! There is another, more experimental, -swarm-like method, `-DDEEPSTATE_PROB_SWARM`, that is of possible interest. -Instead of pure binary inclusion/exclusion of choices, this varies the -actual distribution of choices. However, because this often ends up behaving -more like a non-swarm selection, it may not be as good at ferreting out -unusual behaviors due to extreme imbalance of choices. - -Note that tests produced under a particular swarm option are _not_ -binary compatible with other settings for swarm, due to the added coin flips. ## Contributing diff --git a/bin/deepstate/__init__.py b/bin/deepstate/__init__.py index 87f43acd..7016b47d 100644 --- a/bin/deepstate/__init__.py +++ b/bin/deepstate/__init__.py @@ -9,11 +9,13 @@ def __init__(self, name: str) -> None: logging.Logger.__init__(self, name=name) self.trace = functools.partial(self.log, 15) # type: ignore self.external = functools.partial(self.log, 45) # type: ignore + self.fuzz_stats = functools.partial(self.log, 46) # type: ignore logging.basicConfig() logging.addLevelName(15, "TRACE") logging.addLevelName(45, "EXTERNAL") +logging.addLevelName(46, "FUZZ_STATS") logging.setLoggerClass(DeepStateLogger) logger = logging.getLogger(__name__) @@ -48,7 +50,9 @@ def __init__(self, name: str) -> None: log_level_from_env: str = os.environ.get("DEEPSTATE_LOG", "2") try: - logger.setLevel(LOG_LEVEL_INT_TO_STR[int(log_level_from_env)]) + log_level_from_env_int: int = int(log_level_from_env) + logger.setLevel(LOG_LEVEL_INT_TO_STR[log_level_from_env_int]) + logger.info("Setting log level from DEEPSTATE_LOG: %d", log_level_from_env_int) except ValueError: print("$DEEPSTATE_LOG contains invalid value `%s`, " "should be int in 0-6 (debug, trace, info, warning, error, external, critical).", diff --git a/bin/deepstate/core/base.py b/bin/deepstate/core/base.py index 900db753..b60ac80c 100644 --- a/bin/deepstate/core/base.py +++ b/bin/deepstate/core/base.py @@ -74,10 +74,9 @@ def __init__(self): AnalysisBackend.compiler_exe = self.EXECUTABLES.pop("COMPILER", None) # parsed argument attributes - self.binary: str = None - self.output_test_dir: str = f"{self}_out" + self.binary: Optional[str] = None + self.output_test_dir: str self.timeout: int = 0 - self.num_workers: int = 1 self.mem_limit: int = 50 self.min_log_level: int = 2 @@ -124,7 +123,8 @@ def parse_args(cls) -> Optional[argparse.Namespace]: help="Linker flags (space seperated) to include for external libraries.") compile_group.add_argument("--out_test_name", type=str, - help="Set name of generated instrumented binary.") + help=("Set name of generated instrumented binary. Default is `out`. " + "Automatically adds `.frontend_name_lowercase` suffix.")) compile_group.add_argument("--no_exit_compile", action="store_true", help="Continue execution after compiling a harness (set as default if `--config` is set).") @@ -135,8 +135,8 @@ def parse_args(cls) -> Optional[argparse.Namespace]: # Analysis-related configurations parser.add_argument( - "-o", "--output_test_dir", type=str, default="out", - help="Output directory where tests will be saved (default is `out`).") + "-o", "--output_test_dir", type=str, + help="Output directory where tests will be saved. Required. If not empty, will try to resume.") parser.add_argument( "-c", "--config", type=str, @@ -146,10 +146,6 @@ def parse_args(cls) -> Optional[argparse.Namespace]: "-t", "--timeout", default=0, type=int, help="Time to kill analysis worker processes, in seconds (default is 0 for none).") - parser.add_argument( - "-w", "--num_workers", default=1, type=int, - help="Number of worker jobs to spawn for analysis (default is 1).") - parser.add_argument("--mem_limit", type=int, default=50, help="Child process memory limit in MiB (default is 50). 0 for unlimited.") @@ -183,6 +179,7 @@ def parse_args(cls) -> Optional[argparse.Namespace]: target_args_parsed.append((key, val)) _args['target_args'] = target_args_parsed + # if configuration is specified, parse and replace argument instantiations if args.config: _args.update(cls.build_from_config(args.config)) # type: ignore @@ -193,15 +190,16 @@ def parse_args(cls) -> Optional[argparse.Namespace]: del _args["config"] # log level fixing - if os.environ.get("DEEPSTATE_LOG", None) is None: + if not os.environ.get("DEEPSTATE_LOG"): if _args["min_log_level"] < 0 or _args["min_log_level"] > 6: raise AnalysisBackendError(f"`--min_log_level` is in invalid range, should be in 0-6 " "(debug, trace, info, warning, error, external, critical).") + L.info("Setting log level from --min_log_level: %d", _args["min_log_level"]) logger = logging.getLogger("deepstate") logger.setLevel(LOG_LEVEL_INT_TO_STR[_args["min_log_level"]]) else: - L.info("Using log level from $DEEPSTATE_LOG.") + L.debug("Using log level from $DEEPSTATE_LOG.") cls._ARGS = args return cls._ARGS diff --git a/bin/deepstate/core/fuzz.py b/bin/deepstate/core/fuzz.py index 3f759cfa..096cea71 100644 --- a/bin/deepstate/core/fuzz.py +++ b/bin/deepstate/core/fuzz.py @@ -19,11 +19,14 @@ import time import sys import subprocess +import psutil # type: ignore import argparse import shutil -import multiprocessing as mp +import traceback -from multiprocessing.pool import ApplyResult +from tempfile import mkdtemp +from time import sleep +from pathlib import Path from typing import Optional, Dict, List, Any, Tuple from deepstate.core.base import AnalysisBackend, AnalysisBackendError @@ -44,12 +47,27 @@ class FuzzerFrontend(AnalysisBackend): Defines a base front-end object for using DeepState to interact with fuzzers. """ + REQUIRE_SEEDS: bool = False + + PUSH_DIR: str + PULL_DIR: str + CRASH_DIR: str + def __init__(self, envvar: str) -> None: """ Create and store variables: - fuzzer_exe (fuzzer executable file) - env (environment variable name) - search_dirs (directories inside fuzzer home dir where to look for executables) + - require_seeds + - stats (dict that frontend should populate in populate_stats method) + - stats_file (file where to put stats from fuzzer in common format) + - output_file (file where stdout of fuzzer will be redirected) + - proc (handler to fuzzer process) + + - push_dir (push testcases from external sources here) + - pull_dir (pull new testcases from this dir) + - crash_dir (crashes will be in this dir) Inherits: - name (name for pretty printing) @@ -76,6 +94,44 @@ def __init__(self, envvar: str) -> None: # flag to ensure fuzzer processes do not persist self._on: bool = False + self.proc: subprocess.Popen[bytes] + self.require_seeds: bool = False + self.stats_file: str = "deepstate-stats.txt" + self.output_file: str = "fuzzer-output.txt" + + # same as AFL's (https://github.com/google/AFL/blob/master/docs/status_screen.txt) + self.stats: Dict[str, Optional[str]] = { + # guaranteed + "unique_crashes": None, + "fuzzer_pid": None, + "start_time": None, + "sync_dir_size": None, + + # not guaranteed + "execs_done": None, + "execs_per_sec": None, + "last_update": None, + "cycles_done": None, + "paths_total": None, + "paths_favored": None, + "paths_found": None, + "paths_imported": None, + "max_depth": None, + "cur_path": None, + "pending_favs": None, + "pending_total": None, + "variable_paths": None, + "stability": None, + "bitmap_cvg": None, + "unique_hangs": None, + "last_path": None, + "last_crash": None, + "last_hang": None, + "execs_since_crash": None, + "slowest_exec_ms": None, + "peak_rss_mb": None, + } + # parsed argument attributes self.input_seeds: Optional[str] = None self.max_input_size: int = 8192 @@ -83,13 +139,16 @@ def __init__(self, envvar: str) -> None: self.exec_timeout: Optional[int] = None self.blackbox: Optional[bool] = None self.fuzzer_args: List[Any] = [] + self.fuzzer_out: bool = False - self.enable_sync: bool = False self.sync_cycle: int = 5 self.sync_out: bool = True - self.sync_dir: str = "out_sync" + self.sync_dir: Optional[str] = None + + self.push_dir: str = '' + self.pull_dir: str = '' + self.crash_dir: str = '' - self.post_stats: bool = False self.home_path: Optional[str] = None @@ -111,7 +170,6 @@ def parse_args(cls) -> Optional[argparse.Namespace]: - max_input_size (default: 8192B) - fuzzer_args (default: {}) - blackbox (default: False) - - post_stats (default: False) Optional arguments (may be None): - input_seeds @@ -158,6 +216,10 @@ def parse_args(cls) -> Optional[argparse.Namespace]: "--blackbox", action="store_true", help="Black-box fuzzing without compile-time instrumentation.") + parser.add_argument( + "--fuzzer_out", action="store_true", + help="Show fuzzer-specific output (graphical interface) instead of deepstate one.") + parser.add_argument( "--fuzzer_args", default=[], nargs='*', help="Flags to pass to the fuzzer. Format: `a arg1=val` -> `-a --arg val`.") @@ -166,28 +228,13 @@ def parse_args(cls) -> Optional[argparse.Namespace]: # Parallel / Ensemble Fuzzing ensemble_group = parser.add_argument_group("Parallel/Ensemble Fuzzing") ensemble_group.add_argument( - "--enable_sync", action="store_true", - help="Enable seed synchronization to another seed queue directory.") - - ensemble_group.add_argument( - "--sync_out", action="store_true", - help="When set, output individual fuzzer stat summary, instead of a global summary from the ensembler") - - ensemble_group.add_argument( - "--sync_dir", type=str, default="out_sync", - help="Directory representing seed queue for synchronization between local queue.") + "--sync_dir", type=str, + help="Directory representing seed queue for synchronization between fuzzers.") ensemble_group.add_argument( "--sync_cycle", type=int, default=5, help="Time in seconds the executor should sync to sync directory (default is 5 seconds).") - - # Post-processing - post_group = parser.add_argument_group("Execution Post-processing") - post_group.add_argument("--post_stats", action="store_true", - help="Output post-fuzzing statistics to user (if any).") - - # Miscellaneous options parser.add_argument( "--fuzzer_help", action="store_true", @@ -243,14 +290,14 @@ def _search_for_executable(self, exe_name): if self.env: for one_env_path in self.env.split(":"): for search_dir in [""] + self.search_dirs: - exe_path: Optional[str] = shutil.which(exe_name, path=os.path.join(one_env_path, search_dir)) + exe_path: Optional[str] = shutil.which(exe_name, mode=os.F_OK, path=os.path.join(one_env_path, search_dir)) if exe_path is not None: return exe_path # search in current dir and $PATH where_to_search = ['.', None] for search_env in where_to_search: - exe_path: Optional[str] = shutil.which(exe_name, path=search_env) + exe_path: Optional[str] = shutil.which(exe_name, mode=os.F_OK, path=search_env) if exe_path is not None: return exe_path @@ -310,6 +357,8 @@ def compile(self, lib_path: str, flags: List[str], _out_bin: str, env = os.envir :param env: optional envvars to set during compilation """ + _out_bin += f".{self.NAME.lower()}" + if self.compiler_exe is None: raise FuzzFrontendError(f"No compiler specified for compile-time instrumentation.") @@ -331,11 +380,8 @@ def compile(self, lib_path: str, flags: List[str], _out_bin: str, env = os.envir L.debug("Compilation command: %s", compile_cmd) # call compiler, and deal with exceptions accordingly - L.info("Compiling test harness `%s` with %s", self.compile_test, self.compiler_exe) - try: - subprocess.Popen(compile_cmd, env=env).communicate() - except BaseException as e: - raise FuzzFrontendError(f"{self.compiler_exe} interrupted due to exception:", e) + L.info("Compiling test harness `%s`", compile_cmd) + subprocess.Popen(compile_cmd, env=env).communicate() # extra check if target binary was successfully compiled, and set that as target binary out_bin = os.path.join(os.getcwd(), _out_bin) @@ -343,23 +389,57 @@ def compile(self, lib_path: str, flags: List[str], _out_bin: str, env = os.envir self.binary = out_bin + def create_fake_seeds(self): + if not self.input_seeds: + self.input_seeds = mkdtemp(prefix="deepstate_fake_seed") + with open(os.path.join(self.input_seeds, "fake_seed"), 'wb') as f: + f.write(b'X') + L.info("Creating fake input seed file in directory `%s`", self.input_seeds) + + + def check_required_directories(self, required_dirs): + for required_dir in required_dirs: + if not os.path.isdir(required_dir): + raise FuzzFrontendError(f"Can't resume with output directory `{self.output_test_dir}`. " + f"No `{required_dir}` directory inside.") + + + def setup_new_session(self, dirs_to_create=[]): + for dir_to_create in dirs_to_create: + Path(dir_to_create).mkdir(parents=True, exist_ok=True) + L.debug(f"Creating directory {dir_to_create}.") + + if self.require_seeds is True and not self.input_seeds: + self.create_fake_seeds() + + def pre_exec(self): """ Called before fuzzer execution in order to perform sanity checks. Base method contains default argument checks. Users should implement inherited method for any other environment checks or initializations before execution. + + Do: + - search for executables (update self.EXECUTABLES) + - may print fuzzer help (and exit) + - may compile + - check for targets (self.binary) + - may check for input_seeds + - check for output directory + - check for sync_dir + - update stats_file path """ if self.parser is None: raise FuzzFrontendError("No arguments parsed yet. Call parse_args() before pre_exec().") + # search for executables and set proper variables + self._set_executables() + if self.fuzzer_help: self.print_help() sys.exit(0) - # search for executables and set proper variables - self._set_executables() - # if compile_test is set, call compile for user if self.compile_test: self.compile() @@ -384,18 +464,51 @@ def pre_exec(self): raise FuzzFrontendError(f"Binary {self.binary} doesn't exists.") L.debug("Target binary: %s", self.binary) - # no sanity check, since some fuzzers require optional input seeds + # if input_seeds is provided run checks if self.input_seeds: L.debug("Input seeds directory: %s", self.input_seeds) + if not os.path.exists(self.input_seeds): + raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") + + if not os.path.isdir(self.input_seeds): + raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) is not a directory.") + + if len(os.listdir(self.input_seeds)) == 0: + raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") + + # require output directory L.debug("Output directory: %s", self.output_test_dir) + if not self.output_test_dir: + raise FuzzFrontendError("Must provide -o/--output_test_dir.") + + if not os.path.exists(self.output_test_dir): + raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) doesn't exist.") + + if not os.path.isdir(self.output_test_dir): + raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") + + # update stats and output file + self.stats_file = os.path.join(self.output_test_dir, self.stats_file) + self.output_file = os.path.join(self.output_test_dir, self.output_file) + + # require seeds flag + self.require_seeds = self.REQUIRE_SEEDS + + # push/pull/crash paths + self.push_dir = os.path.join(self.output_test_dir, self.PUSH_DIR) + self.pull_dir = os.path.join(self.output_test_dir, self.PULL_DIR) + self.crash_dir = os.path.join(self.output_test_dir, self.CRASH_DIR) # check if we enabled seed synchronization, and initialize directory - if self.enable_sync: + if self.sync_dir: + if not os.path.exists(self.sync_dir): + raise FuzzFrontendError(f"Seed synchronization dir (`{self.sync_dir}`) doesn't exist.") + if not os.path.isdir(self.sync_dir): - L.info("Initializing sync directory for ensembling seeds.") - os.mkdir(self.sync_dir) - L.debug("Sync directory: %s", self.sync_dir) + raise FuzzFrontendError(f"Seed synchronization dir (`{self.sync_dir}`) is not a directory.") + + L.info("Will synchronize seed using `%s` directory.", self.sync_dir) ################################## @@ -467,12 +580,55 @@ def main(self): # Fuzzer process execution methods ############################################## - def run(self, compiler: Optional[str] = None, no_exec: bool = False): + + def manage(self): + # print and save statistics + self.populate_stats() + self.save_stats() + if not self.fuzzer_out: + self.print_stats() + + # invoke ensemble if sync_dir is provided + if self.sync_dir: + L.info("%s - Performing sync cycle %s", self.name, self.sync_count) + self.ensemble() + self.sync_count += 1 + + + def cleanup(self): + if not self.proc: + return + + L.info(f"Killing process {self.proc.pid} and childs.") + + # terminate + try: + for some_proc in psutil.Process(self.proc.pid).children(recursive=True) + [self.proc]: + some_proc.terminate() + except psutil.NoSuchProcess: + self.proc = None + return + + # hard kill + for some_proc in psutil.Process(self.proc.pid).children(recursive=True) + [self.proc]: + try: + some_proc.communicate(timeout=1) + L.info("Fuzzer subprocess (PID %d) exited with `%d`", some_proc.pid, some_proc.returncode) + except subprocess.TimeoutExpired: + L.warning("Subprocess (PID %d) could not terminate in time, killing.", some_proc.pid) + some_proc.kill() + except psutil.NoSuchProcess: + self.proc = None + return + + self.proc = None + + + def run(self, runner: Optional[str] = None, no_exec: bool = False): """ - Interface for spawning and executing fuzzer jobs. Uses the configured `num_workers` in order to - create a multiprocessing pool to parallelize fuzzers for execution in self._run. + Interface for spawning and executing fuzzer job. - :param compiler: if necessary, a compiler that is invoked before fuzzer executable (ie `dotnet`) + :param runner: if necessary, a runner that is invoked before fuzzer executable (ie `dotnet`) :param no_exec: skips pre- and post-processing steps during execution """ @@ -488,153 +644,150 @@ def run(self, compiler: Optional[str] = None, no_exec: bool = False): # initialize cmd from property command = [self.fuzzer_exe] + self.cmd # type: ignore - # prepend compiler that invokes fuzzer - if compiler: - command.insert(0, compiler) - - results: List[ApplyResult[int]] - results_outputs: List[int] - mp.set_start_method('fork') - with mp.Pool(processes=self.num_workers) as pool: - results = [pool.apply_async(self._run, args=(command,)) for _ in range(self.num_workers)] - results_outputs = [result.get() for result in results] - - L.debug(results_outputs) - - # TODO: check results for failures - - # do post-fuzz operations - if not no_exec: - if callable(getattr(self, "post_exec")): - L.info("Calling post-exec for fuzzer post-processing") - self.post_exec() - - - def _run(self, command: List[str]) -> int: - """ - Spawns a singular fuzzer process for execution with proper error-handling and foreground STDOUT output. - Also supports rsync-style seed synchronization if configured to share seeds between a global queue. - - :param command: list of arguments representing fuzzer command to execute. - """ + # prepend runner that invokes fuzzer + if runner: + command.insert(0, runner) L.info("Executing command `%s`", command) + self.start_time: int = int(time.time()) + self.command: str = ' '.join(command) + self.sync_count = 0 + + total_execution_time: int = 0 + wait_time: int = self.sync_cycle + run_fuzzer: bool = True + prev_log_level = L.level + + # for fuzzer output + if not self.fuzzer_out: + fuzzer_out_file = open(self.output_file, "wb") + + # run or resume fuzzer process as long as it is needed + # may create new processes continuously + while run_fuzzer: + run_one_fuzzer_process: bool = False + try: + if self.fuzzer_out: + # disable deepstate output + L.info("Using fuzzer output.") + L.setLevel("ERROR") + self.proc = subprocess.Popen(command) - self._on = True - self._start_time = int(time.time()) - - try: - - # if we are syncing seeds, we background the process and all of the output generated - if self.enable_sync or self.num_workers > 1: - self.proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - L.info("Starting fuzzer with seed synchronization with PID `%d`", self.proc.pid) - else: - self.proc = subprocess.Popen(command) - L.info("Starting fuzzer with PID `%d`", self.proc.pid) - - L.info("Fuzzer start time: %s", self._start_time) - - # while fuzzers may configure timeout, subprocess can ensure exit and is useful when parallelizing - # processes or doing ensemble-based testing. - stdout, stderr = self.proc.communicate(timeout=self.timeout if self.timeout != 0 else None) - if self.proc.returncode != 0: - self._kill() - if self.enable_sync: - err = stdout if stderr is None else stderr - raise FuzzFrontendError(f"{self.name} run interrupted with non-zero return status. Message: {err.decode('utf-8')}") else: - raise FuzzFrontendError(f"{self.name} run interrupted with non-zero return status. Error code: {self.proc.returncode}") - - # invoke ensemble if seed synchronization option is set - if self.enable_sync: - - # do not ensemble as fuzzer initializes - time.sleep(5) - self.sync_count = 0 - - # ensemble "event" loop - while self._is_alive(): - - L.debug("%s - Performing sync cycle %s", self.name, self.sync_count) - - # sleep for execution cycle - time.sleep(self.sync_cycle) - - # call ensemble to perform seed synchronization - self.ensemble() - - # if sync_out argument set, output individual fuzzer statistics - # rather than have our ensembler report global stats - if self.sync_out: - print(f"\n{self.name} Fuzzer Stats\n") - for head, stat in self.reporter().items(): - print(f"{head}\t:\t{stat}") - - self.sync_count += 1 - - - # any OS-specific errors encountered - except OSError as e: - self._kill() - raise FuzzFrontendError(f"{self.name} run interrupted due to exception {e}.") - - # SIGINT stops fuzzer, but continues execution - except KeyboardInterrupt: - print(f"Killing fuzzer {self.name} with PID {self.proc.pid}") - self._kill() - return 1 - - except AnalysisBackendError as e: - raise e + L.info("Using DeepState output.") + # TODO: frontends uses blocking read in `populate_stats`, + # we may replace PIPE with normal file and do reads non-blocking + self.proc = subprocess.Popen(command, stdout=fuzzer_out_file, stderr=fuzzer_out_file) + + run_one_fuzzer_process = True + L.info("Started fuzzer process with PID %d.", self.proc.pid) + + except (OSError, ValueError): + L.setLevel(prev_log_level) + L.error(traceback.format_exc()) + raise FuzzFrontendError("Exception during fuzzer startup.") + + # run-manage loop, until somethings happens (error, interrupt, fuzzer exits) + # use only one process + while run_one_fuzzer_process: + # general timeout + time_left = float('inf') + total_execution_time = int(time.time() - self.start_time) + if self.timeout != 0: + time_left = self.timeout - total_execution_time + if time_left < 0: + run_one_fuzzer_process = False + run_fuzzer = False + wait_time = 0 + L.info("Timeout") + + try: + # sleep/communicate for `self.sync_cycle` time + timeout_one_cycle: int = wait_time + if wait_time > time_left: + timeout_one_cycle = int(time_left) + + L.debug("One cycle `communicate` with timeout %d.", timeout_one_cycle) + stdout, stderr = self.proc.communicate(timeout=timeout_one_cycle) + + # fuzzer process exited + # it's fine if returncode is 0 or 1 for libfuzzer + if self.proc.returncode == 0 or \ + (self.proc.returncode == 1 and self.name == "libFuzzer"): + L.info("Fuzzer %s (PID %d) exited with return code %d.", + self.name, self.proc.pid, self.proc.returncode) + run_one_fuzzer_process = False + + else: + if stdout: + L.error(stdout.decode('utf8')) + if stderr: + L.error(stderr.decode('utf8')) + raise FuzzFrontendError(f"Fuzzer {self.name} (PID {self.proc.pid}) exited " + f"with return code {self.proc.returncode}.") + + # Timeout, just continue to management step + except subprocess.TimeoutExpired: + L.debug("One cycle timeout.") + + # Any OS-specific errors encountered + except OSError as e: + L.error("%s run interrupted due to OSError: %s.", self.name, e) + run_one_fuzzer_process = False + + # SIGINT stops fuzzer, but continues frontend execution + except KeyboardInterrupt: + L.info("Stopped the %s fuzzer.", self.name) + run_one_fuzzer_process = False + run_fuzzer = False + + # bad things happed, inform user and exit + except Exception: + L.error(traceback.format_exc()) + L.error("Exception during fuzzer %s run.", self.name) + run_one_fuzzer_process = False + run_fuzzer = False + + # manage + try: + L.debug("Management cycle starts after %ss.", total_execution_time) + self.manage() + + # error in management, exit + except Exception: + L.error(traceback.format_exc()) + L.error("Exception during fuzzer %s run.", self.name) + run_one_fuzzer_process = False + run_fuzzer = False + + if self.do_restart(): + L.info(f"Restarting fuzzer {self.name}.") + run_one_fuzzer_process = False + + # cleanup + try: + self.cleanup() + sleep(10) # wait so all fuzzer processes are killed + except: + pass + + if run_fuzzer: + self.post_exec() - except Exception: - import traceback - L.error(traceback.format_exc()) + # and... maybe loop again! - finally: - self._kill() + if not self.fuzzer_out: + fuzzer_out_file.close() + L.setLevel(prev_log_level) # calculate total execution time - exec_time: float = round(time.time() - self._start_time, 2) + exec_time: float = round(time.time() - self.start_time, 2) L.info("Fuzzer exec time: %ss", exec_time) - return 0 - - - def _is_alive(self) -> bool: - """ - Checks to see if fuzzer PID is running, but tossing SIGT (0) to see if we can - interact. Ideally used in an event loop during a running process. - """ - - if self._on: - return True - - try: - os.kill(self.proc.pid, 0) - except (OSError, ProcessLookupError): - return False - - return True - - - def _kill(self) -> None: - """ - Kills running fuzzer process. Can be used forcefully if - KeyboardInterrupt signal falls through and process continues execution. - """ - if not hasattr(self, "proc"): - raise FuzzFrontendError("Attempted to kill non-running PID.") - - self.proc.terminate() - try: - self.proc.wait(timeout=0.5) - L.info("Fuzzer subprocess exited with `%d`", self.proc.returncode) - except subprocess.TimeoutExpired: - raise FuzzFrontendError("Subprocess could not terminate in time") - - self._on = False + # do post-fuzz operations + if not no_exec: + L.info("Calling post-exec for fuzzer post-processing") + self.post_exec() ############################################ @@ -651,13 +804,52 @@ def reporter(self): return NotImplementedError("Must implement in frontend subclass.") - @property - def stats(self): + def do_restart(self): + """ + Some fuzzers need restart to use seeds from external sources + (can't pull seeds in runtime). + This function should determine if the fuzzer should be restarted too look + for new seeds. + This may be based on time of last new path discovered or whatever. + + Should return False if self.sync_dir is None. + """ + if not self.sync_dir: + return False + + # if time.time() - self.start_time > 20: + # return True + return False + + + def populate_stats(self): """ Parses out stats generated by fuzzer output. Should be implemented by user, and can return custom feedback. """ - raise NotImplementedError("Must implement in frontend subclass.") + crashes: int = len(os.listdir(self.crash_dir)) + if os.path.isfile(os.path.join(self.crash_dir, "README.txt")): + crashes -= 1 + self.stats["unique_crashes"] = str(crashes) + self.stats["start_time"] = str(int(self.start_time)) + if self.proc: + self.stats["fuzzer_pid"] = str(self.proc.pid) + if self.sync_dir: + self.stats["sync_dir_size"] = str(len(os.listdir(self.sync_dir))) + + + def print_stats(self): + for key, value in self.stats.items(): + if value: + L.fuzz_stats("%s:%s", key, value) + L.fuzz_stats("-"*30) + + + def save_stats(self): + with open(self.stats_file, 'w') as f: + for key, value in self.stats.items(): + if value: + f.write(f"{key}:{value}\n") def post_exec(self): @@ -666,7 +858,8 @@ def post_exec(self): things like crash triaging, testcase minimization (ie with `deepstate-reduce`), or any other manipulations with produced testcases. """ - raise NotImplementedError("Must implement in frontend subclass.") + # make sure that child processes are killed + self.cleanup() ################################### @@ -674,51 +867,42 @@ def post_exec(self): ################################### - def _sync_seeds(self, mode: str, src: str, dest: str, excludes: List[str] = []) -> None: + def _sync_seeds(self, src: str, dest: str, excludes: List[str] = []) -> None: """ Helper that invokes rsync for convenient file syncing between two files. TODO(alan): implement functionality for syncing across servers. TODO(alan): consider implementing "native" syncing alongside current "rsync mode". - :param mode: str representing mode (either 'GET' or 'PUSH') :param src: path to source queue :param dest: path to destination queue :param excludes: list of string patterns for paths to ignore when rsync-ing """ - if not mode in ["GET", "PUSH"]: - raise FuzzFrontendError(f"Unknown mode for seed syncing: `{mode}`") - - rsync_cmd: List[str] = ["rsync", "-racz", "--ignore-existing"] + rsync_cmd: List[str] = [ + "rsync", + "--recursive", + "--archive", + "--checksum", + "--compress", + "--ignore-existing" + ] # subclass should invoke with list of pattern ignores if len(excludes) > 0: rsync_cmd += [f"--exclude={e}" for e in excludes] - if mode == "GET": - rsync_cmd += [dest, src] - elif mode == "PUSH": - rsync_cmd += [src, dest] + rsync_cmd += [ + os.path.join(src, ""), # append trailing / + dest + ] - L.debug("rsync command: %s", rsync_cmd) + # L.debug("rsync command: %s", rsync_cmd) + L.debug("rsync %s: from `%s` to `%s`.", self.name, src, dest) try: subprocess.Popen(rsync_cmd) except subprocess.CalledProcessError as e: - raise FuzzFrontendError(f"{self.name} run interrupted due to exception {e}.") - - - @staticmethod - def _queue_len(queue_path: str) -> int: - """ - Helper that checks the number of seeds in queue, returns 0 if path doesn't - exist yet. - - :param queue_path: path to queue (ie AFL_out/queue/) - """ - if not os.path.exists(queue_path): - return 0 - return len([path for path in os.listdir(queue_path)]) + raise FuzzFrontendError(f"{self.name} rsync interrupted due to exception {e}.") def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str] = None): @@ -727,26 +911,29 @@ def ensemble(self, local_queue: Optional[str] = None, global_queue: Optional[str implement any additional logic for determining whether to sync/get seeds as if in event loop. """ - if global_queue is None: - global_queue = self.sync_dir + "/" + if not self.sync_dir: + L.warning("Called `ensemble`, but `--sync_dir` not provided.") + return - global_len: int = self._queue_len(global_queue) - L.debug("Global seed queue: %s with %d files", global_queue, global_len) + global_queue = os.path.join(self.sync_dir, "queue") + global_crashes = os.path.join(self.sync_dir, "crashes") + local_queue = self.push_dir + local_crashes = self.crash_dir - if local_queue is None: - local_queue = self.output_test_dir + "/queue/" + # check global queue + global_len: int = len(os.listdir(self.crash_dir)) + L.debug("Global seed queue: `%s` with %d files", global_queue, global_len) - local_len: int = self._queue_len(local_queue) - L.debug("Fuzzer local seed queue: %s with %d files", local_queue, local_len) + # update local queue with new findings + self._sync_seeds(src=self.pull_dir, dest=self.push_dir) - # sanity check: if global queue is empty, populate from local queue - if (global_len == 0) and (local_len > 0): - L.info("Nothing in global queue, pushing seeds from local queue") - self._sync_seeds("PUSH", local_queue, global_queue) - return + # check local queue + local_len: int = len(os.listdir(self.push_dir)) + L.debug("Fuzzer local seed queue: `%s` with %d files", local_queue, local_len) # get seeds from local to global queue, rsync will deal with duplicates - self._sync_seeds("GET", global_queue, local_queue) + self._sync_seeds(src=local_queue, dest=global_queue) + self._sync_seeds(src=local_crashes, dest=global_crashes) # push seeds from global queue to local, rsync will deal with duplicates - self._sync_seeds("PUSH", global_queue, local_queue) + self._sync_seeds(src=global_queue, dest=local_queue) diff --git a/bin/deepstate/core/symex.py b/bin/deepstate/core/symex.py index c889206b..bbfea5ab 100644 --- a/bin/deepstate/core/symex.py +++ b/bin/deepstate/core/symex.py @@ -19,12 +19,11 @@ import argparse import hashlib -from deepstate import (DeepStateLogger, LOG_LEVEL_INT_TO_LOGGER, +from deepstate import (LOG_LEVEL_INT_TO_LOGGER, LOG_LEVEL_TRACE, LOG_LEVEL_ERROR, LOG_LEVEL_CRITICAL) from deepstate.core.base import AnalysisBackend -logging.setLoggerClass(DeepStateLogger) # fails without it, don't know why LOGGER = logging.getLogger(__name__) @@ -47,7 +46,7 @@ class SymexFrontend(AnalysisBackend): """Wrapper around a symbolic executor for making it easy to do common DeepState- specific things.""" def __init__(self): - pass + self.num_workers: int = 1 def get_context(self): raise NotImplementedError("Must be implemented by engine.") @@ -113,6 +112,10 @@ def parse_args(cls): "--verbosity", default=1, type=int, help="Verbosity level for symbolic execution tool (default: 1, lower means less output).") + parser.add_argument( + "-w", "--num_workers", default=1, type=int, + help="Number of worker jobs to spawn for analysis (default is 1).") + cls.parser = parser return super(SymexFrontend, cls).parse_args() @@ -230,7 +233,7 @@ def begin_test(self, info): # Create the output directory for this test case. args = self.parse_args() - if args.output_test_dir is not None: + if args.output_test_dir: test_dir = os.path.join(args.output_test_dir, os.path.basename(info.file_name), info.name) @@ -243,6 +246,8 @@ def begin_test(self, info): LOGGER.critical("Cannot create test output directory: %s", test_dir) self.context['test_dir'] = test_dir + else: + LOGGER.warning("Argument `--output_test_dir` not given, will not save test cases.") def log_message(self, level, message): """Add `message` to the `level`-specific log as a `Stream` object for @@ -291,6 +296,9 @@ def _stream_to_message(self, stream): data = struct.pack('BBBBBBBB', *val_bytes) val = struct.unpack(unpack_str, data[:struct.calcsize(unpack_str)])[0] + if type(val) == bytes: + val = val.decode('unicode_escape') + # Remove length specifiers that are not supported. format_str = format_str.replace('l', '') format_str = format_str.replace('h', '') diff --git a/bin/deepstate/executors/fuzz/afl.py b/bin/deepstate/executors/fuzz/afl.py index adb72b7f..96da023a 100644 --- a/bin/deepstate/executors/fuzz/afl.py +++ b/bin/deepstate/executors/fuzz/afl.py @@ -16,7 +16,6 @@ import os import logging import argparse -import shutil from typing import List, Dict, Optional @@ -34,6 +33,12 @@ class AFL(FuzzerFrontend): "COMPILER": "afl-clang++" } + REQUIRE_SEEDS = True + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("the_fuzzer", "queue") + CRASH_DIR = os.path.join("the_fuzzer", "crashes") + @classmethod def parse_args(cls) -> None: parser: argparse.ArgumentParser = argparse.ArgumentParser( @@ -51,55 +56,45 @@ def compile(self) -> None: # type: ignore flags += [arg for arg in self.compiler_args.split(" ")] flags.append("-ldeepstate_AFL") - super().compile(lib_path, flags, self.out_test_name + ".afl") + super().compile(lib_path, flags, self.out_test_name) def pre_exec(self): """ Perform argparse and environment-related sanity checks. """ + # check for afl-qemu-trace if in QEMU mode + if 'Q' in self.fuzzer_args or self.blackbox == True: + self.EXECUTABLES["AFL-QEMU-TRACE"] = "afl-qemu-trace" + super().pre_exec() # check if core dump pattern is set as `core` - with open("/proc/sys/kernel/core_pattern") as f: - if not "core" in f.read(): - raise FuzzFrontendError("No core dump pattern set. Execute 'echo core | sudo tee /proc/sys/kernel/core_pattern'") + if os.path.isfile("/proc/sys/kernel/core_pattern"): + with open("/proc/sys/kernel/core_pattern") as f: + if not "core" in f.read(): + raise FuzzFrontendError("No core dump pattern set. Execute 'echo core | sudo tee /proc/sys/kernel/core_pattern'") # check if CPU scaling governor is set to `performance` - with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor") as f: - if not "perf" in f.read(4): - with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq") as f_min: - with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq") as f_max: - if f_min.read() != f_max.read(): - raise FuzzFrontendError("Suboptimal CPU scaling governor. Execute 'echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'") - - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if not os.path.exists(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) doesn't exist.") - - if not os.path.isdir(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - - # check for afl-qemu if in QEMU mode - if 'Q' in self.fuzzer_args or self.blackbox == True: - if not shutil.which('afl-qemu-trace'): - raise FuzzFrontendError("Must provide `afl-qemu-trace` executable in PATH") - - # require input seeds if we aren't in dumb mode, or we are using crash mode - if 'n' not in self.fuzzer_args or 'C' in self.fuzzer_args: - if self.input_seeds is None: - raise FuzzFrontendError(f"Must provide -i/--input_seeds option for {self.name}.") - - # AFL uses "-" to tell it to resume fuzzing, don't treat as a real seed dir - if self.input_seeds != "-": - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") - - if len(os.listdir(self.input_seeds)) == 0: - raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") + if os.path.isfile("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"): + with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor") as f: + if not "perf" in f.read(4): + with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq") as f_min: + with open("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq") as f_max: + if f_min.read() != f_max.read(): + raise FuzzFrontendError("Suboptimal CPU scaling governor. Execute 'echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'") + + # if we are in dumb mode and we are not using crash mode + if 'n' in self.fuzzer_args and 'C' not in self.fuzzer_args: + self.require_seeds = False + + # resume fuzzing + if len(os.listdir(self.output_test_dir)) > 1: + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) + self.input_seeds = '-' + L.info(f"Resuming fuzzing using seeds from {self.pull_dir} (skipping --input_seeds option).") + else: + self.setup_new_session([self.push_dir]) @property @@ -107,7 +102,10 @@ def cmd(self): cmd_list: List[str] = list() # guaranteed arguments - cmd_list.extend(["-o", self.output_test_dir]) # auto-create, reusable + cmd_list.extend([ + "-o", self.output_test_dir, # auto-create, reusable + "-M", "the_fuzzer" # TODO, detect when to use -S + ]) if self.mem_limit == 0: cmd_list.extend(["-m", "1099511627776"]) # use 1TiB as unlimited @@ -140,56 +138,27 @@ def cmd(self): return self.build_cmd(cmd_list) - @property - def stats(self) -> Dict[str, Optional[str]]: + def populate_stats(self): """ Retrieves and parses the stats file produced by AFL """ - stat_file: str = self.output_test_dir + "/fuzzer_stats" - with open(stat_file, "r") as sf: - lines = sf.readlines() - - stats: Dict[str, Optional[str]] = { - "last_update": None, - "start_time": None, - "fuzzer_pid": None, - "cycles_done": None, - "execs_done": None, - "execs_per_sec": None, - "paths_total": None, - "paths_favored": None, - "paths_found": None, - "paths_imported": None, - "max_depth": None, - "cur_path": None, - "pending_favs": None, - "pending_total": None, - "variable_paths": None, - "stability": None, - "bitmap_cvg": None, - "unique_crashes": None, - "unique_hangs": None, - "last_path": None, - "last_crash": None, - "last_hang": None, - "execs_since_crash": None, - "exec_timeout": None, - "afl_banner": None, - "afl_version": None, - "command_line": None - } - - for l in lines: - for k in stats.keys(): - if k in l: - stats[k] = l[19:].strip(": %\r\n") - return stats + stat_file_path: str = os.path.join(self.output_test_dir, "the_fuzzer", "fuzzer_stats") + # with open(stat_file_path, "r") as stat_file: + # for line in stat_file: + lines = open(stat_file_path, "r").readlines() + for line in lines: + key = line.split(":", 1)[0].strip() + value = line.split(":", 1)[1].strip() + if key in self.stats: + self.stats[key] = value + super().populate_stats() def reporter(self) -> Dict[str, Optional[str]]: """ Report a summarized version of statistics, ideal for ensembler output. """ + self.populate_stats() return dict({ "Execs Done": self.stats["execs_done"], "Cycle Completed": self.stats["cycles_done"], @@ -198,8 +167,9 @@ def reporter(self) -> Dict[str, Optional[str]]: }) - def _sync_seeds(self, mode, src, dest, excludes=["*.cur_input"]) -> None: - super()._sync_seeds(mode, src, dest, excludes=excludes) + def _sync_seeds(self, src, dest, excludes=[]) -> None: + excludes += ["*.cur_input", ".state"] + super()._sync_seeds(src, dest, excludes=excludes) def post_exec(self) -> None: @@ -208,11 +178,8 @@ def post_exec(self) -> None: and (TODO) performs crash triaging with seeds from both sync_dir and local queue. """ - if self.post_stats: - print(f"\n{self.name} RUN STATS:\n") - for stat, val in self.stats.items(): - fstat: str = stat.replace("_", " ").upper() - print(f"{fstat}:\t\t\t{val}") + # TODO: merge output_test_dir/the_fuzzer/crashes* into one dir + super().post_exec() def main(): diff --git a/bin/deepstate/executors/fuzz/angora.py b/bin/deepstate/executors/fuzz/angora.py index a61c0383..be44bd0a 100644 --- a/bin/deepstate/executors/fuzz/angora.py +++ b/bin/deepstate/executors/fuzz/angora.py @@ -18,6 +18,7 @@ import logging import argparse import subprocess +import time from typing import List, Dict, Optional, Any @@ -31,12 +32,19 @@ class Angora(FuzzerFrontend): # these classvars are set under the assumption that $ANGORA_PATH is set to the built source NAME = "Angora" - SEARCH_DIRS = ["bin", "clang+llvm", "tools"] + SEARCH_DIRS = ["clang+llvm/bin", "bin", "tools"] EXECUTABLES = {"FUZZER": "angora_fuzzer", "COMPILER": "angora-clang++", - "GEN_LIB_ABILIST": "gen_library_abilist.sh" + "GEN_LIB_ABILIST": "gen_library_abilist.sh", + "CLANG_COMPILER": "clang++" } + REQUIRE_SEEDS = True + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("angora", "queue") + CRASH_DIR = os.path.join("angora", "crashes") + @classmethod def parse_args(cls) -> None: @@ -86,7 +94,6 @@ def compile(self) -> None: # type: ignore out: bytes = subprocess.check_output(cmd) ignore_bufs += [out] - # write all to final out_file with open(out_file, "wb") as f: for buf in ignore_bufs: @@ -95,8 +102,26 @@ def compile(self) -> None: # type: ignore # set envvar for fuzzer compilers env["ANGORA_TAINT_RULE_LIST"] = os.path.abspath(out_file) + # make a binary with taint tracking information + # env["USE_PIN"] = "1" # TODO, add pin support + env["USE_TRACK"] = "1" + + taint_path: str = "/usr/local/lib/libdeepstate_taint.a" + L.debug("Static library path: %s", taint_path) + + taint_flags: List[str] = ["-ldeepstate_taint"] + if self.compiler_args: + taint_flags += [arg for arg in self.compiler_args.split(' ')] + L.info("Compiling %s for %s with taint tracking", self.compile_test, self.name) + super().compile(taint_path, taint_flags, self.out_test_name + ".taint", env=env) + + self.taint_binary = self.binary + self.binary = None + env.pop("USE_TRACK") # make a binary with light instrumentation + env["USE_FAST"] = "1" + fast_path: str = "/usr/local/lib/libdeepstate_fast.a" L.debug("Static library path: %s", fast_path) @@ -106,24 +131,14 @@ def compile(self) -> None: # type: ignore L.info("Compiling %s for %s with light instrumentation.", self.compile_test, self.name) super().compile(fast_path, fast_flags, self.out_test_name + ".fast", env=env) - # initialize envvar for instrumentation framework - if self.mode == "pin": # type: ignore - env["USE_PIN"] = "1" - else: - env["USE_TRACK"] = "1" - - # make a binary with taint tracking information - taint_path: str = "/usr/local/lib/libdeepstate_taint.a" - L.debug("Static library path: %s", taint_path) - - taint_flags: List[str] = ["-ldeepstate_taint"] - if self.compiler_args: - taint_flags += [arg for arg in self.compiler_args.split(' ')] - L.info("Compiling %s for %s with taint tracking", self.compile_test, self.name) - super().compile(taint_path, taint_flags, self.out_test_name + ".taint", env=env) - def pre_exec(self): + # correct version of clang is required + self._set_executables() + clang_for_angora_path = os.path.dirname(self.EXECUTABLES["CLANG_COMPILER"]) + os.environ["PATH"] = ":".join((clang_for_angora_path, os.environ.get("PATH", ""))) + L.info(f"Adding `{clang_for_angora_path}` to $PATH.") + super().pre_exec() # since base method checks for self.binary by default @@ -134,14 +149,13 @@ def pre_exec(self): if not os.path.exists(self.taint_binary): raise FuzzFrontendError("Taint binary doesn't exist") - if not self.input_seeds: - raise FuzzFrontendError(f"Must provide -i/--input_seeds option for {self.name}.") - - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") - - if len(os.listdir(self.input_seeds)) == 0: - raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") + # resume fuzzing + if len(os.listdir(self.output_test_dir)) > 1: + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) + self.input_seeds = '-' + L.info(f"Resuming fuzzing using seeds from {self.pull_dir} (skipping --input_seeds option).") + else: + self.setup_new_session([self.push_dir]) if self.blackbox is True: raise FuzzFrontendError(f"Blackbox fuzzing is not supported by {self.name}.") @@ -149,13 +163,6 @@ def pre_exec(self): if self.dictionary: L.error("%s can't use dictionaries.", self.name) - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if os.path.exists(self.output_test_dir): - raise FuzzFrontendError(f"Remove previous output directory (`{self.output_test_dir}`) before running {self.name}.") - @property def cmd(self): @@ -166,7 +173,8 @@ def cmd(self): "--mode", "llvm", # TODO, add pin support "--track", os.path.abspath(self.taint_binary), "--memory_limit", str(self.mem_limit), - "--output", self.output_test_dir # auto-create, not reusable + "--output", self.output_test_dir, # auto-create, not reusable + "--sync_afl" ]) for key, val in self.fuzzer_args: @@ -191,26 +199,63 @@ def cmd(self): return self.build_cmd(cmd_list) - @property - def stats(self) -> Optional[Dict[str, str]]: + def populate_stats(self): """ Parses Angora output JSON config to dict for reporting. """ - stat_file: str = self.output_test_dir + "/chart_stat.json" + super().populate_stats() - if not hasattr(self, "prev_stats"): - self.prev_stats: Optional[Dict[str, str]] = None + stat_file_path: str = os.path.join(self.output_test_dir, "angora", "fuzzer_stats") + try: + with open(stat_file_path, "r") as stat_file: + self.stats["fuzzer_pid"] = stat_file.read().split(":", 1)[1].strip() + except: + pass + stat_file_path = os.path.join(self.output_test_dir, "angora", "chart_stat.json") + new_stats: Dict[str, str] = {} try: - with open(stat_file, "r") as handle: - stats: Optional[Dict[str, str]] = json.loads(handle.read()) - self.prev_stats = stats + with open(stat_file_path, "r") as stat_file: + new_stats = json.loads(stat_file.read()) + except json.decoder.JSONDecodeError as e: + L.error(f"Error parsing {stat_file_path}: {e}.") + except: + return + + # previous_stats = self.stats.copy() + + if new_stats.get("init_time"): + self.stats["start_time"] = str(int(time.time() - int(new_stats.get("init_time")))) + elif self.proc: + self.stats["start_time"] = str(int(self.start_time)) + + self.stats["last_update"] = str(int(os.path.getmtime(stat_file_path))) - # fallback on initially parsed stats if failed to decode - except json.decoder.JSONDecodeError: - stats = self.prev_stats + self.stats["execs_done"] = new_stats.get("num_exec", 0) + self.stats["execs_per_sec"] = new_stats.get("speed", [0])[0] + self.stats["paths_total"] = new_stats.get("num_inputs", 0) - return stats + if new_stats.get("num_crashes"): + self.stats["unique_crashes"] = new_stats.get("num_crashes") + self.stats["unique_hangs"] = new_stats.get("num_hangs", 0) + + # all_fuzz = [] + # for one_fuzz in new_stats.get("fuzz", []): + # time_key = one_fuzz.pop("time", {}) + # s = time_key.get("secs", 0) + # ns = time_key.get("nanos", 0) + # t = float('{}.{:09d}'.format(s, ns)) + # all_fuzz.append((t, one_fuzz)) + # all_fuzz = sorted(all_fuzz, key=operator.itemgetter(0), reverse=True) + + # if len(all_fuzz) >= 2: + # last_crash_execs = 0 + # for one_fuzz in all_fuzz: + # if one_fuzz.get("num_crashes") < self.stats["unique_crashes"]: + # last_crash_execs = one_fuzz["num_exec"] + # self.stats["execs_since_crash"] = self.stats["execs_done"] - last_crash_execs + + # self.stats["command_line"] = self.command def reporter(self) -> Optional[Dict[str, Any]]: @@ -226,6 +271,10 @@ def reporter(self) -> Optional[Dict[str, Any]]: }) + def post_exec(self): + pass + + def main(): fuzzer = Angora(envvar="ANGORA_HOME") return fuzzer.main() diff --git a/bin/deepstate/executors/fuzz/eclipser.py b/bin/deepstate/executors/fuzz/eclipser.py index 202bbd44..5c42c34e 100644 --- a/bin/deepstate/executors/fuzz/eclipser.py +++ b/bin/deepstate/executors/fuzz/eclipser.py @@ -36,12 +36,19 @@ class Eclipser(FuzzerFrontend): NAME = "Eclipser" SEARCH_DIRS = ["build"] EXECUTABLES = {"FUZZER": "Eclipser.dll", - "COMPILER": "clang++" # for regular compilation + "COMPILER": "clang++", # for regular compilation + "RUNNER": "dotnet" } + REQUIRE_SEEDS = False + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("sync_dir", "queue") + CRASH_DIR = os.path.join("the_fuzzer", "crashes") + def print_help(self): - subprocess.call(["dotnet", self.fuzzer_exe, "fuzz", "--help"]) + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "fuzz", "--help"]) def compile(self) -> None: # type: ignore @@ -54,7 +61,7 @@ def compile(self) -> None: # type: ignore flags: List[str] = ["-ldeepstate"] if self.compiler_args: flags += [arg for arg in self.compiler_args.split(" ")] - super().compile(lib_path, flags, self.out_test_name + ".eclipser") + super().compile(lib_path, flags, self.out_test_name) def pre_exec(self) -> None: @@ -63,26 +70,24 @@ def pre_exec(self) -> None: # TODO handle that somehow L.warning("Eclipser doesn't limit child processes memory.") + self.encoded_testcases_dir: str = os.path.join(self.output_test_dir, "the_fuzzer", "testcase") + self.encoded_crash_dir: str = os.path.join(self.output_test_dir, "the_fuzzer", "crash") + + # resume fuzzing + if len(os.listdir(self.output_test_dir)) > 1: + self.check_required_directories([self.push_dir, self.crash_dir, + self.encoded_crash_dir, self.encoded_testcases_dir]) + L.info(f"Resuming fuzzing using seeds from {self.pull_dir} (skipping --input_seeds option).") + self.decode_testcases() + self.input_seeds = self.push_dir + else: + self.setup_new_session([self.crash_dir, self.push_dir]) + if self.blackbox == True: L.info("Blackbox option is redundant. Eclipser works on non-instrumented binaries using QEMU by default.") if self.dictionary: - L.error("Angora can't use dictionaries.") - - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if os.path.exists(self.output_test_dir): - if not os.path.isdir(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - - if self.input_seeds: - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") - - if len(os.listdir(self.input_seeds)) == 0: - raise FuzzFrontendError(f"No seeds present in directory `{self.input_seeds}`.") + L.error("Eclipser can't use dictionaries.") @property @@ -102,7 +107,7 @@ def cmd(self): "--src", "file", "--fixfilepath", "eclipser.input", "--initarg", " ".join(deepstate_args), - "--outputdir", self.output_test_dir, # auto-create, reusable + "--outputdir", os.path.join(self.output_test_dir, "the_fuzzer"), # auto-create, reusable ]) if self.max_input_size == 0: @@ -145,18 +150,39 @@ def ensemble(self) -> None: # type: ignore super().ensemble(local_queue) + def decode_testcases(self): + L.info("Performing decoding on testcases and crashes") + decoded_path: str = os.path.join(self.output_test_dir, "decoded") + + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", + "-i", self.encoded_crash_dir, "-o", decoded_path], + stdout=subprocess.PIPE) + for f in glob.glob(os.path.join(decoded_path, "decoded_files", "*")): + shutil.copy(f, self.crash_dir) + shutil.rmtree(decoded_path) + + subprocess.call([self.EXECUTABLES["RUNNER"], self.fuzzer_exe, "decode", + "-i", self.encoded_testcases_dir, "-o", decoded_path], + stdout=subprocess.PIPE) + for f in glob.glob(os.path.join(decoded_path, "decoded_files", "*")): + shutil.copy(f, self.pull_dir) + shutil.rmtree(decoded_path) + + + def manage(self): + self.decode_testcases() + super().manage() + + def post_exec(self) -> None: """ Decode and minimize testcases after fuzzing. """ - out: str = self.output_test_dir + self.decode_testcases() + - L.info("Performing post-processing decoding on testcases and crashes") - subprocess.call(["dotnet", self.fuzzer_exe, "decode", "-i", out + "/testcase", "-o", out + "/decoded"]) - subprocess.call(["dotnet", self.fuzzer_exe, "decode", "-i", out + "/crash", "-o", out + "/decoded"]) - for f in glob.glob(out + "/decoded/decoded_files/*"): - shutil.copy(f, out) - shutil.rmtree(out + "/decoded") + def populate_stats(self): + super().populate_stats() def reporter(self) -> Dict[str, int]: @@ -175,7 +201,7 @@ def main(): try: fuzzer = Eclipser(envvar="ECLIPSER_HOME") fuzzer.parse_args() - fuzzer.run(compiler="dotnet") + fuzzer.run(runner=fuzzer.EXECUTABLES["RUNNER"]) return 0 except FuzzFrontendError as e: L.error(e) diff --git a/bin/deepstate/executors/fuzz/honggfuzz.py b/bin/deepstate/executors/fuzz/honggfuzz.py index b4599fe2..08ea285a 100644 --- a/bin/deepstate/executors/fuzz/honggfuzz.py +++ b/bin/deepstate/executors/fuzz/honggfuzz.py @@ -18,7 +18,7 @@ from typing import List, Dict, Optional -from deepstate.core import FuzzerFrontend, FuzzFrontendError +from deepstate.core import FuzzerFrontend L = logging.getLogger(__name__) @@ -31,6 +31,12 @@ class Honggfuzz(FuzzerFrontend): "COMPILER": "hfuzz-clang++" } + REQUIRE_SEEDS = True + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("sync_dir", "queue") + CRASH_DIR = os.path.join("the_fuzzer", "crashes") + @classmethod def parse_args(cls) -> None: @@ -52,25 +58,19 @@ def compile(self) -> None: # type: ignore if self.compiler_args: flags += [arg for arg in self.compiler_args.split(" ")] - super().compile(lib_path, flags, self.out_test_name + ".hfuzz") + super().compile(lib_path, flags, self.out_test_name) def pre_exec(self): super().pre_exec() - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if os.path.exists(self.output_test_dir): - if not os.path.isdir(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - - if not self.input_seeds: - raise FuzzFrontendError(f"Must provide -i/--input_seeds option for {self.name}.") - - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") + # resume fuzzing + if len(os.listdir(self.output_test_dir)) > 1: + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) + self.input_seeds = self.push_dir + L.info(f"Resuming fuzzing using seeds from {self.push_dir} (skipping --input_seeds option).") + else: + self.setup_new_session([self.pull_dir, self.crash_dir]) @property @@ -79,9 +79,13 @@ def cmd(self): # guaranteed arguments cmd_list.extend([ - "--output", self.output_test_dir, # auto-create, reusable "--workspace", self.output_test_dir, + "--output", self.push_dir, # auto-create, reusable + "--crashdir", self.crash_dir, + # "--logfile", os.path.join(self.output_test_dir, "hfuzz_log.txt"), + # "--verbose", "--rlimit_rss", str(self.mem_limit), + "--threads", "1" ]) if self.max_input_size == 0: @@ -118,54 +122,11 @@ def cmd(self): return self.build_cmd(cmd_list, input_symbol="___FILE___") - @property - def stats(self) -> Dict[str, Optional[str]]: + def populate_stats(self): """ Retrieves and parses the stats file produced by Honggfuzz """ - out_dir: str = os.path.abspath(self.output_test_dir) - report_file: str = "HONGGFUZZ.REPORT.TXT" - - # read report file generated by honggfuzz - stat_file: str = os.path.join(out_dir + report_file) - with open(stat_file, "r") as sf: - lines = sf.readlines() - - stats: Dict[str, Optional[str]] = { - "mutationsPerRun": None, - "externalCmd": None, - "fuzzStdin": None, - "timeout": None, - "ignoreAddr": None, - "ASLimit": None, - "RSSLimit": None, - "DATALimit": None, - "wordlistFile": None, - "fuzzTarget": None, - "ORIG_FNAME": None, - "FUZZ_FNAME": None, - "PID": None, - "SIGNAL": None, - "FAULT ADDRESS": None, - "INSTRUCTION": None, - "STACK HASH": None, - } - - # strip first 4 and last 5 lines to make a parseable file - lines = lines[4:][:-5] - - for l in lines: - for k in stats.keys(): - if k in l: - stats[k] = l.split(":")[1].strip() - - # add crash metrics - crashes: int = len([name for name in os.listdir(out_dir) if name != report_file]) - stats.update({ - "CRASHES": str(crashes) - }) - - return stats + super().populate_stats() def reporter(self) -> Dict[str, Optional[str]]: @@ -179,10 +140,7 @@ def reporter(self) -> Dict[str, Optional[str]]: def post_exec(self) -> None: - if self.post_stats: - print("\n") - for k, v in self.stats.items(): - print(f"{k} : {v}") + super().post_exec() def main(): diff --git a/bin/deepstate/executors/fuzz/libfuzzer.py b/bin/deepstate/executors/fuzz/libfuzzer.py index b5355eff..bbbbefdc 100644 --- a/bin/deepstate/executors/fuzz/libfuzzer.py +++ b/bin/deepstate/executors/fuzz/libfuzzer.py @@ -30,6 +30,12 @@ class LibFuzzer(FuzzerFrontend): "COMPILER": "clang++" } + REQUIRE_SEEDS = False + + PUSH_DIR = os.path.join("sync_dir", "queue") + PULL_DIR = os.path.join("sync_dir", "queue") + CRASH_DIR = os.path.join("the_fuzzer", "crashes") + @classmethod def parse_args(cls) -> None: parser: argparse.ArgumentParser = argparse.ArgumentParser( @@ -45,7 +51,7 @@ def compile(self) -> None: # type: ignore flags: List[str] = ["-ldeepstate_LF", "-fsanitize=fuzzer,undefined"] if self.compiler_args: flags += [arg for arg in self.compiler_args.split(" ")] - super().compile(lib_path, flags, self.out_test_name + ".lfuzz") + super().compile(lib_path, flags, self.out_test_name) def pre_exec(self) -> None: @@ -60,25 +66,21 @@ def pre_exec(self) -> None: super().pre_exec() # again, because we may had run compiler + if not self.binary: + raise FuzzFrontendError("Binary not set.") self.binary = os.path.abspath(self.binary) self.fuzzer_exe = self.binary # type: ignore - # require output directory - if not self.output_test_dir: - raise FuzzFrontendError("Must provide -o/--output_test_dir.") - - if not os.path.exists(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) doesn't exist.") - - if not os.path.isdir(self.output_test_dir): - raise FuzzFrontendError(f"Output test dir (`{self.output_test_dir}`) is not a directory.") - if self.blackbox is True: raise FuzzFrontendError("Blackbox fuzzing is not supported by libFuzzer.") - if self.input_seeds: - if not os.path.exists(self.input_seeds): - raise FuzzFrontendError(f"Input seeds dir (`{self.input_seeds}`) doesn't exist.") + # resuming fuzzing + if len(os.listdir(self.output_test_dir)) > 0: + self.check_required_directories([self.push_dir, self.pull_dir, self.crash_dir]) + self.input_seeds = None + L.info(f"Resuming fuzzing using seeds from {self.push_dir} (skipping --input_seeds option).") + else: + self.setup_new_session([self.pull_dir, self.crash_dir]) @property @@ -92,7 +94,14 @@ def cmd(self): # guaranteed arguments cmd_list.extend([ "-rss_limit_mb={}".format(self.mem_limit), - "-max_len={}".format(self.max_input_size) + "-max_len={}".format(self.max_input_size), + "-artifact_prefix={}".format(self.crash_dir + "/"), + # "-jobs={}".format(0), + # "-workers={}".format(1), + # "-fork=1", + "-reload=1", + "-runs=-1", + "-print_final_stats=1" ]) for key, val in self.fuzzer_args: @@ -108,13 +117,8 @@ def cmd(self): if self.exec_timeout: cmd_list.append("-timeout={}".format(self.exec_timeout / 1000)) - if self.post_stats: - cmd_list.append("-print_final_stats={}".format(1)) - - cmd_list.append("-artifact_prefix={}".format("deepstate_")) - # must be here, this are positional args - cmd_list.append(self.output_test_dir) # no auto-create, reusable + cmd_list.append(self.push_dir) # no auto-create, reusable # not required, if provided: not auto-create and not require any files inside if self.input_seeds: @@ -123,6 +127,44 @@ def cmd(self): return cmd_list + def populate_stats(self): + super().populate_stats() + + if not os.path.isfile(self.output_file): + return + + with open(self.output_file, "rb") as f: + for line in f: + # libFuzzer under DeepState have broken output + # splitted into multiple lines, preceeded with "EXTERNAL:" + if line.startswith(b"EXTERNAL: "): + line = line.split(b":", 1)[1].strip() + if line.startswith(b"#"): + # new event code + self.stats["execs_done"] = line.split()[0].strip(b"#").decode() + + elif b":" in line: + line = line.split(b":", 1)[1].strip() + if b":" in line: + key, value = line.split(b":", 1) + if key == b"exec/s": + self.stats["execs_per_sec"] = value.strip().decode() + elif key == b"units": + self.stats["paths_total"] = value.strip().decode() + elif key == b"cov": + self.stats["bitmap_cvg"] = value.strip().decode() + + + def _sync_seeds(self, src, dest, excludes=[]) -> None: + excludes += ["*.cur_input", ".state"] + super()._sync_seeds(src, dest, excludes=excludes) + + + def post_exec(self): + # TODO: remove crashes from seeds dir and from sync_dir + pass + + def main(): fuzzer = LibFuzzer(envvar="LIBFUZZER_HOME") return fuzzer.main() diff --git a/bin/deepstate/executors/symex/__init__.py b/bin/deepstate/executors/symex/__init__.py index e69de29b..2fcc3d5b 100644 --- a/bin/deepstate/executors/symex/__init__.py +++ b/bin/deepstate/executors/symex/__init__.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3.6 +# Copyright (c) 2019 Trail of Bits, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import pkgutil +import importlib + + +def import_fuzzers(pkg_name): + """ + dynamically load fuzzer frontends using importlib + """ + package = sys.modules[pkg_name] + return [ + importlib.import_module(pkg_name + '.' + submod) + for _, submod, _ in pkgutil.walk_packages(package.__path__) + ] + +__all__ = import_fuzzers(__name__) diff --git a/docker/Dockerfile b/docker/Dockerfile index 84033642..576eaa82 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -27,8 +27,12 @@ FROM deepstate-base ENV DEPS_DIR /home/user/deps ARG make_j +# Angr, Manticore +RUN echo 'Installing angr and manticore' \ + && pip3 install z3-solver angr git+git://github.com/trailofbits/manticore.git --user + # Eclipser - not deepstate dependent -COPY --from=Eclipser /home/user/Eclipser $DEPS_DIR/eclipser +COPY --from=Eclipser /home/user/Eclipser/build $DEPS_DIR/eclipser RUN echo 'Eclipser - installing dotnet' \ && wget -q https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb \ && sudo dpkg -i packages-microsoft-prod.deb \ @@ -46,6 +50,7 @@ RUN sudo chown user:user -R ./deepstate WORKDIR ./deepstate # Angora part 2 +# ignore errors in `make`, because Angora doesn't support 32bit builds RUN echo 'Building deepstate with Angora - taint' \ && mkdir -p build_angora_taint && cd build_angora_taint \ && export PATH="$DEPS_DIR/angora/clang+llvm/bin:$PATH" \ @@ -53,7 +58,7 @@ RUN echo 'Building deepstate with Angora - taint' \ && export USE_TRACK=1 \ && export ANGORA_HOME="$DEPS_DIR/angora" \ && CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ \ - && make -j $make_j -i # ignore errors, because Angora doesn't support 32bit builds \ + && make -j $make_j -i \ && sudo cp ./libdeepstate_taint.a /usr/local/lib/ RUN echo 'Building deepstate with Angora - fast' \ @@ -91,6 +96,7 @@ RUN echo 'Building deepstate with AFL' \ # Honggfuzz COPY --from=Honggfuzz /home/user/honggfuzz $DEPS_DIR/honggfuzz +RUN sudo apt-get -y install libunwind-dev RUN echo 'Building deepstate with Honggfuzz' \ && mkdir -p build_honggfuzz && cd build_honggfuzz \ && export HONGGFUZZ_HOME="$DEPS_DIR/honggfuzz" \ @@ -98,10 +104,8 @@ RUN echo 'Building deepstate with Honggfuzz' \ && make -j $make_j \ && sudo cp ./libdeepstate_HFUZZ.a /usr/local/lib/ -# Angr, Manticore -RUN echo 'Installing angr and manticore' \ - && pip3 install z3-solver angr git+git://github.com/trailofbits/manticore.git --user - ENV CXX=clang++ CC=clang +ENV AFL_HOME="$DEPS_DIR/afl" HONGGFUZZ_HOME="$DEPS_DIR/honggfuzz" \ + ANGORA_HOME="$DEPS_DIR/angora" ECLIPSER_HOME="$DEPS_DIR/eclipser" CMD ["/bin/bash"] \ No newline at end of file diff --git a/docker/install_angora.sh b/docker/install_angora.sh index dbe426cc..74ceb50f 100644 --- a/docker/install_angora.sh +++ b/docker/install_angora.sh @@ -6,6 +6,7 @@ sudo sed -i -- 's/#deb-src/deb-src/g' /etc/apt/sources.list sudo sed -i -- 's/# deb-src/deb-src/g' /etc/apt/sources.list # Install dependencies +sudo apt-get update sudo apt-get install -y rustc \ cargo libstdc++-7-dev zlib1g-dev \ && sudo rm -rf /var/lib/apt/lists/* diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..307891b8 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,5 @@ +# Documentation + +* [Basic usage](/docs/basic_usage.md) +* [Fuzzing](/docs/fuzzing.md) +* [Swarm testing](/docs/swarm_testing.md) \ No newline at end of file diff --git a/docs/basic_usage.md b/docs/basic_usage.md new file mode 100644 index 00000000..99c604a8 --- /dev/null +++ b/docs/basic_usage.md @@ -0,0 +1,242 @@ +# Basic usage + +DeepState consists of a static library, used to write test harnesses, +and command-line _executors_ written in Python. At this time, the best +documentation is in the [examples](/examples) and in our +[paper](https://agroce.github.io/bar18.pdf). A more extensive +example, using DeepState and libFuzzer to test a user-mode file +system, is available [here](https://github.com/agroce/testfs); in +particular the +[Tests.cpp](https://github.com/agroce/testfs/blob/master/Tests.cpp) +file and CMakeLists.txt show DeepState usage. Another extensive +example is a [differential tester that compares Google's leveldb and +Facebook's rocksdb](https://github.com/agroce/testleveldb). + +## Writing test harness + +```cpp +#include + +using namespace deepstate; + +/* Simple, buggy, run-length encoding that creates "human readable" + * encodings by adding 'A'-1 to the count, and splitting at 26. + * e.g., encode("aaabbbbbc") = "aCbEcA" since C=3 and E=5 */ + +char* encode(const char* input) { + unsigned int len = strlen(input); + char* encoded = (char*)malloc((len*2)+1); + int pos = 0; + if (len > 0) { + unsigned char last = input[0]; + int count = 1; + for (int i = 1; i < len; i++) { + if (((unsigned char)input[i] == last) && (count < 26)) + count++; + else { + encoded[pos++] = last; + encoded[pos++] = 64 + count; + last = (unsigned char)input[i]; + count = 1; + } + } + encoded[pos++] = last; + encoded[pos++] = 65; // Should be 64 + count + } + encoded[pos] = '\0'; + return encoded; +} + +char* decode(const char* output) { + unsigned int len = strlen(output); + char* decoded = (char*)malloc((len/2)*26); + int pos = 0; + for (int i = 0; i < len; i += 2) { + for (int j = 0; j < (output[i+1] - 64); j++) { + decoded[pos++] = output[i]; + } + } + decoded[pos] = '\0'; + return decoded; +} + +// Can be (much) higher (e.g., > 1024) if we're using fuzzing, not symbolic execution +#define MAX_STR_LEN 6 + +TEST(Runlength, BoringUnitTest) { + ASSERT_EQ(strcmp(encode(""), ""), 0); + ASSERT_EQ(strcmp(encode("a"), "aA"), 0); + ASSERT_EQ(strcmp(encode("aaabbbbbc"), "aCbEcA"), 0); +} + +TEST(Runlength, EncodeDecode) { + char* original = DeepState_CStrUpToLen(MAX_STR_LEN, "abcdef0123456789"); + char* encoded = encode(original); + ASSERT_LE(strlen(encoded), strlen(original)*2) << "Encoding is > length*2!"; + char* roundtrip = decode(encoded); + ASSERT_EQ(strncmp(roundtrip, original, MAX_STR_LEN), 0) << + "ORIGINAL: '" << original << "', ENCODED: '" << encoded << + "', ROUNDTRIP: '" << roundtrip << "'"; +} +``` + +The code above (which can be found +[here](https://github.com/trailofbits/deepstate/blob/master/examples/Runlen.cpp)) +shows an example of a DeepState test harness. Most of the code is +just the functions to be tested. Using DeepState to test them requires: + +- Including the DeepState C++ header and using the DeepState namespace + +- Defining at least one TEST, with names + +- Calling some DeepState APIs that produce data + - In this example, we see the `DeepState_CStrUpToLen` call tells + DeepState to produce a string that has up to `MAX_STR_LEN` + characters, chosen from those present in hex strings. + +- Optionally making some assertions about the correctness of the +results + - In `Runlen.cpp` this is the `ASSERT_LE` and `ASSERT_EQ` checks. + - In the absence of any properties to check, DeepState can still + look for memory safety violations, crashes, and other general + categories of undesirable behavior, like any fuzzer. + + +## Running the test + +``` +~/deepstate/build/examples$ ./Runlen +TRACE: Running: Runlength_EncodeDecode from /Users/alex/deepstate/examples/Runlen.cpp(55) +TRACE: Passed: Runlength_EncodeDecode +TRACE: Running: Runlength_BoringUnitTest from /Users/alex/deepstate/examples/Runlen.cpp(49) +TRACE: Passed: Runlength_BoringUnitTest +``` + +Executing DeepState executable will run the "BoringUnitTest" and "EncodeDecode". +The first one is like a traditional hand-written unit test and simply tests +fixed inputs devised by a programmer. The second one uses default (all zero bytes) +values. These inputs do not expose the bug in `encode`. + +Using DeepState, however, it is easy to find the bug. Just try: + +```shell +deepstate-angr ./Runlen --output_test_dir out +``` + +or + +```shell +./Runlen --fuzz --exit_on_fail --output_test_dir out +``` + +The fuzzer will output something like: + +``` +INFO: Starting fuzzing +WARNING: No seed provided; using 1546631311 +WARNING: No test specified, defaulting to last test defined (Runlength_EncodeDecode) +CRITICAL: /Users/alex/deepstate/examples/Runlen.cpp(60): ORIGINAL: '91c499', ENCODED: '9A1AcA4A9A', ROUNDTRIP: '91c49' +ERROR: Failed: Runlength_EncodeDecode +``` + + +## Tests replay + +To run saved inputs against the test, just run the executable with appropriate arguments: +```shell +./Runlen --input_test_dir ./out +INFO: Ran 0 tests for Runlength_BoringUnitTest; 0 tests failed +CRITICAL: /home/gros/studia/mgr/fuzzing/tools/deepstate/examples/Runlen.cpp(60): ORIGINAL: 'abbbbb', ENCODED: 'aAbA', ROUNDTRIP: 'ab' +ERROR: Failed: Runlength_EncodeDecode +... +INFO: Ran 64 tests for Runlength_EncodeDecode; 31 tests failed +``` + +## Test case reduction + +While tests generated by symbolic execution are likely to be highly +concise already, fuzzer-generated tests may be much larger than they +need to be. + +DeepState provides a test case reducer to shrink tests intelligently, +aware of the structure of a DeepState test. For example, if your +executable is named `TestFileSystem` and the test you want to reduce +is named `rmdirfail.test` you would use it like this: + +```shell +deepstate-reduce ./TestFileSystem create.test mincreate.test +``` + +In many cases, this will result in finding a different failure or +crash that allows smaller test cases, so you can also provide a string +that controls the criterion for which test outputs are considered valid +reductions (by default, the reducer looks for any test that fails or +crashes). Only outputs containing the `--criterion` are considered to +be valid reductions (`--regexpCriterion` lets you use a Python regexp +for more complex checks): + +```shell +deepstate-reduce ./TestFileSystem create.test mincreate.test --criteria "Assertion failed: ((testfs_inode_get_type(in) == I_FILE)" +``` + +The output will look something like: + +``` +Original test has 8192 bytes +Applied 128 range conversions +Last byte read: 527 +Shrinking to ignore unread bytes +Writing reduced test with 528 bytes to rnew +================================================================================ +Iteration #1 0.39 secs / 2 execs / 0.0% reduction +Structured deletion reduced test to 520 bytes +Writing reduced test with 520 bytes to rnew +0.77 secs / 3 execs / 1.52% reduction + +... + +Structured swap: PASS FINISHED IN 0.01 SECONDS, RUN: 5.1 secs / 151 execs / 97.54% reduction +Reduced byte 12 from 4 to 1 +Writing reduced test with 13 bytes to rnew +5.35 secs / 169 execs / 97.54% reduction +================================================================================ +Byte reduce: PASS FINISHED IN 0.5 SECONDS, RUN: 5.6 secs / 186 execs / 97.54% reduction +================================================================================ +Iteration #2 5.6 secs / 186 execs / 97.54% reduction +Structured deletion: PASS FINISHED IN 0.03 SECONDS, RUN: 5.62 secs / 188 execs / 97.54% reduction +Structured edge deletion: PASS FINISHED IN 0.03 SECONDS, RUN: 5.65 secs / 190 execs / 97.54% reduction +1-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 5.84 secs / 203 execs / 97.54% reduction +4-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 6.03 secs / 216 execs / 97.54% reduction +8-byte chunk removal: PASS FINISHED IN 0.19 SECONDS, RUN: 6.22 secs / 229 execs / 97.54% reduction +1-byte reduce and delete: PASS FINISHED IN 0.04 SECONDS, RUN: 6.26 secs / 232 execs / 97.54% reduction +4-byte reduce and delete: PASS FINISHED IN 0.03 SECONDS, RUN: 6.29 secs / 234 execs / 97.54% reduction +8-byte reduce and delete: PASS FINISHED IN 0.01 SECONDS, RUN: 6.31 secs / 235 execs / 97.54% reduction +Byte range removal: PASS FINISHED IN 0.76 SECONDS, RUN: 7.06 secs / 287 execs / 97.54% reduction +Structured swap: PASS FINISHED IN 0.01 SECONDS, RUN: 7.08 secs / 288 execs / 97.54% reduction +================================================================================ +Completed 2 iterations: 7.08 secs / 288 execs / 97.54% reduction +Padding test with 23 zeroes +Writing reduced test with 36 bytes to mincreate.test +``` + +You can use `--which_test ` to specify which test to +run, as with the `--input_which_test` options to test replay. If you +find that test reduction is taking too long, you can try the `--fast` +option to get a quick-and-dirty reduction, and later use the default +settings, or even `--slowest` setting to try to reduce it further. + +Test case reduction should work on any OS. + + +## Log Levels + +By default, DeepState is not very verbose about testing activity, +other than failing tests. The `DEEPSTATE_LOG` environment variable +or the `--min_log_level` argument lowers the threshold for output, +with 0 = `DEBUG`, 1 = `TRACE` (output from the tests, including from `printf`), +2 = INFO (DeepState messages, the default), 3 = `WARNING`, +4 = `ERROR`, 5 = `EXTERNAL` (output from other programs such as +libFuzzer), and 6 = `CRITICAL` messages. Lowering the `min_log_level` can be very +useful for understanding what a DeepState harness is actually doing; +often, setting `--min_log_level 1` in either fuzzing or symbolic +execution will give sufficient information to debug your test harness. \ No newline at end of file diff --git a/docs/fuzzing.md b/docs/fuzzing.md new file mode 100644 index 00000000..bea46bf7 --- /dev/null +++ b/docs/fuzzing.md @@ -0,0 +1,291 @@ +# Built-In Fuzzer + +Every DeepState executable provides a simple built-in fuzzer that +generates tests using completely random data. Using this fuzzer is as +simple as calling the native executable with the `--fuzz` argument. +The fuzzer also takes a `seed` and `timeout` (default of two minutes) +to control the fuzzing. By default fuzzing saves +only failing and crashing tests, and these only when given an output +directory. If you want to actually save the test cases +generated, you need to add a `--output_test_dir` argument to tell +DeepState where to put the generated tests, and if you want the +(totally random and unlikely to be high-quality) passing tests, you +need to add `--fuzz_save_passing`. + +Note that while symbolic execution only works on Linux, without a +fairly complex cross-compilation process, the brute force fuzzer works +on macOS or (as far as we know) any Unix-like system. + +## A Note on MacOS and Forking + +Normally, when running a test for replay or fuzzing, DeepState forks +in order to cleanly handle crashes of a test. Unfortunately, `fork()` +on macOS is _extremely_ slow. When using the built-in fuzzer or +replaying more than a few tests, it is highly recommended to add the `--no_fork` +option on macOS, unless you need the added crash handling (that is, +only when things aren't working without that option). + +# External fuzzers + +DeepState currently support five external fuzzers: +[libFuzzer](https://llvm.org/docs/LibFuzzer.html), +[AFL](http://lcamtuf.coredump.cx/afl), +[HonggFuzz](https://github.com/google/honggfuzz), +[Eclipser](https://github.com/SoftSec-KAIST/Eclipser) and +[Angora](https://github.com/AngoraFuzzer/Angora). + +To use one of them as DeepState backend, you need to: +* install it +* compile DeepState with it +* compile target test with it +* run executor with location of installed files provided + +To install the fuzzer follow instructions on its website or +run Deepstate via Docker, as described in [README.md](/README.md) + +To compile DeepState with the fuzzer, run `cmake` with +`-DDEEPSTATE_FUZZERNAME=on` (like `-DDEEPSTATE_AFL=on`) option and +`CC/CXX` variables set to the fuzzer's compiler. This will produce +library called `libdeepstate_FUZZERNAME.a`, which you may put to +standard location (`/usr/local/lib/`). + +To compile target test, use fuzzer's compiler and link with appropriate +DeepState library (`-ldeepstate_FUZZERNAME`). + +To provide location of fuzzer's executables to python executor you may: +* put the executables to some $PATH location +* export `FUZZERNAME_HOME` environment variable (like `ANGORA_HOME`) +with value set to the location of fuzzer's executables +* specify `--home_path` argument when running the executor + +All that, rather complicated setup may be simplified with Docker. +Just build the image (changing OS in `./docker/base/Dockerfile` if needed) +and use it with your project. All the fuzzers and evironment variables will be there. + +## Fuzzer executors usage + +Fuzzer executors (`deepstate-honggfuzz` etc.) are meant to be as uniform +as possible, thus making it easy to compile and run tests. + +Compilation: `deepstate-afl --compile_test ./SimpleCrash.cpp --out_test_name SimpleCrash` + +Run: `mkdir out && deepstate-afl --output_test_dir out ./SimpleCrash.afl` + +The only required arguments are location of output directory and the test. +Optional arguments: +``` +--input_seeds - location of directory with initial inputs +--max_input_size - maximal length of inputs +--exec_timeout - timeout for run on one input file +--timeout - timeout for whole fuzzing process +--fuzzer_out - use fuzzer output rather that deepstate (uniform) one +--mem_limit - memory limit for the fuzzer +--min_log_level - how much to log (0=DEBUG, 6=CRITICAL) +--blackbox - fuzz not-instrumented binary +--dictionary - file with words that may enhance fuzzing (fuzzer dependent format) +``` + +Each fuzzer creates following files/directories under output directory: +``` +* deepstate-stats.txt - some statistic parsed by executor +* fuzzer-output.txt - all stdout/stderr from the fuzzer +* PUSH_DIR - fuzzer will take (synchronize) additional inputs from here +* PULL_DIR - fuzzer will save produced inputs here (may be the same as PUSH_DIR) +* CRASH_DIR - fuzzer will save crashes here +``` + +Failed tests are treated as crashes when using fuzzer executors +(because of `--abort_on_fail` flag). + +Note that some fuzzers (notably AFL) requires input seeds. When not provided, +executor will create a dumb one, which may be not very efficient for fuzzing. + +Input files need to be smaller than the DeepState input size limit (8192 bytes), +which is the default limit in executors. But not all fuzzers support file size +limitation, so if your test cases grown too large, you may need to stop fuzzing +and minimalize them. + +Also, there should not be crash-producing files inside input seeds directory. + +Because AFL and other file-based fuzzers only rely on the DeepState +native test executable, they should (like DeepState's built-in simple +fuzzer) work fine on macOS and other Unix-like OSes. On macOS, you +will want to consider doing the work to use [persistent mode](http://lcamtuf.blogspot.com/2015/06/new-in-afl-persistent-mode.html), or even +running inside a VM, due to AFL (unless in persistent mode) relying +extensively on forks, which are very slow on macOS. + +### AFL + +```bash +$ cd ./deepstate +$ mkdir -p build_afl && cd build_afl +$ export AFL_HOME="/afl-2.52b" +$ CXX="$AFL_HOME/afl-clang++" CC="$AFL_HOME/afl-clang" cmake -DDEEPSTATE_AFL=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_AFL.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/the_fuzzer/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +### libFuzzer + +It is bundled into newer clang compilers. + +```bash +$ cd ./deepstate +$ mkdir -p build_libfuzzer && cd build_libfuzzer +$ CXX=clang++ CC=clang cmake -DDEEPSTATE_LIBFUZZER=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_LF.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes + +Use the `LIBFUZZER_WHICH_TEST` +environment variable to control which test libFuzzer runs, using a +fully qualified name (e.g., +`Arithmetic_InvertibleMultiplication_CanFail`). By default, you get +the first test defined (which works fine if there is only one test). + +One hint when using libFuzzer is to avoid dynamically allocating +memory during a test, if that memory would not be freed on a test +failure. This will leak memory and libFuzzer will run out of memory +very quickly in each fuzzing session. Using libFuzzer on macOS +requires compiling DeepState and your program with a clang that +supports libFuzzer (which the Apple built-in probably won't); this can be as simple as doing: + +```shell +brew install llvm@7 +CC=/usr/local/opt/llvm\@7/bin/clang CXX=/usr/local/opt/llvm\@7/bin/clang++ DEEPSTATE_LIBFUZZER=TRUE cmake .. +make install +``` + +Other ways of getting an appropriate LLVM may also work. + +On macOS, libFuzzer's normal output is not visible. Because libFuzzer +does not fork to execute tests, there is no issue with fork speed on +macOS for this kind of fuzzing. + +On any platform, +you can see more about what DeepState under libFuzzer is doing by +setting the `LIBFUZZER_LOUD` environment variable, and tell libFuzzer +to stop upon finding a failing test using `LIBFUZZER_EXIT_ON_FAIL`. + +### HonggFuzz + +```bash +$ cd ./deepstate +$ mkdir -p build_honggfuzz && cd build_honggfuzz +$ export HONGGFUZZ_HOME="/honggfuzz" +$ CXX="$HONGGFUZZ_HOME/hfuzz_cc/hfuzz-clang++" CC="$HONGGFUZZ_HOME/hfuzz_cc/hfuzz-clang" cmake -DDEEPSTATE_HONGGFUZZ=ON ../ +$ make -j4 +$ sudo cp ./libdeepstate_HFUZZ.a /usr/local/lib/ +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +### Eclipser + +Eclipser uses QEMU instrumentation and therefore doesn't require +special DeepState compilation. You should just use `libdeepstate.a` +(QEMU doesn't like special instrumentation). + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/sync_dir/queue +* CRASH_DIR - out/the_fuzzer/crashes + + +### Angora + +Angora uses two binaries for fuzzing, one with taint tracking information +and one without. So we need two deepstate libraries and will need to +compile each test two times. + +Angora also requires old version of llvm/clang (between 4.0.0 and 7.1.0). +Executor will need to find it, so you may want to put it under `$ANGORA_HOME/clang+llvm/`. + +```bash +# for deepstate compilation only +$ export PATH="/clang+llvm/bin:$PATH" +$ export LD_LIBRARY_PATH="/clang+llvm/lib:$LD_LIBRARY_PATH" + +$ cd ./deepstate +$ export ANGORA_HOME="/angora" +$ mkdir -p build_angora_taint && cd build_angora_taint +$ export USE_TRACK=1 +$ CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ +$ make -j4 -i # ignore errors, because Angora doesn't support 32bit builds \ +$ sudo cp ./libdeepstate_taint.a /usr/local/lib/ +$ cd ../ + +$ mkdir -p build_angora_fast && cd build_angora_fast +$ export USE_FAST=1 +$ CXX="$ANGORA_HOME/bin/angora-clang++" CC="$ANGORA_HOME/bin/angora-clang" cmake -DDEEPSTATE_ANGORA=ON ../ +$ make -j4 -i +$ sudo cp ./libdeepstate_fast.a /usr/local/lib/ +``` + +```bash +$ mv /clang+llvm $ANGORA_HOME/ +$ mkdir out +$ deepstate-angora --compile_test ./SimpleCrash.cpp --out_test_name SimpleCrash +$ deepstate-angora -o out ./SimpleCrash.taint.angora ./SimpleCrash.fast.angora +``` + +Dirs: +* PUSH_DIR - out/sync_dir/queue +* PULL_DIR - out/angora/queue +* CRASH_DIR - out/angora/crashes + + +## Tests replay + +To run saved inputs against some test, just run it with appropriate arguments: + +``` +./Runlen --abort_on_fail --input_test_files_dir ./out/output_afl/the_fuzzer/queue +``` + +No need to use fuzzer specific compilation (so don't use `SimpleCrash_AFL` etc. +They are slower due to instrumentation). + + +## Ensembler (fuzzers synchronization) + +You may run as many executors as you want (and have resources). But to synchronize +them, you need to specify `--sync_dir` option pointing to some shared directory. + +Each fuzzer will push produced test cases to that directory and pull from it as needed. + +Currently, there are some limitations in synchronization for the following fuzzers: +* Eclipser - needs to be restarted to use pulled test cases +* HonggFuzz - same as above +* Angora - pulled files need to have correct, AFL format (`id:00003`) and the id must +be greater that the biggest in Angora's local (pull) directory +* libFuzzer - stops fuzzing after first crash found, so there should be no crashes in `sync_dir` + + +## Which Fuzzer Should I Use? + +In fact, since DeepState supports libFuzzer, AFL, HonggFuzz, Angora and Eclipser, +a natural question is "which is the best fuzzer?" In +general, it depends! We suggest using them all, which DeepState makes +easy. libFuzzer is very fast, and sometimes the CMP breakdown it +provides is very useful; however, it's often bad at finding longer +paths where just covering nodes isn't helpful. AFL is still an +excellent general-purpose fuzzer, and often beats "improved" versions +over a range of programs. Finally, Eclipser has some tricks that let +it get traction in some cases where you might think only symbolic +execution (which wouldn't scale) could help. diff --git a/docs/swarm_testing.md b/docs/swarm_testing.md new file mode 100644 index 00000000..df7be322 --- /dev/null +++ b/docs/swarm_testing.md @@ -0,0 +1,45 @@ +# Swarm Testing + +[Swarm testing](https://agroce.github.io/issta12.pdf) is an approach +to test generation that [modifies the distributions of finite choices](https://blog.regehr.org/archives/591) +(e.g., string generation and `OneOf` choices of which functions to +call). It has a long history of improving compiler testing, and +usually (but not always) API testing. The Hypothesis Python testing +tool +[recently added swarm to its' stable of heuristics](https://github.com/HypothesisWorks/hypothesis/pull/2238). + +The basic idea is simple. Let's say we are generating tests of a +stack that overflows when a 64th item is pushed on the stack, due to a +typo in the overflow check. Our tests are +256 calls to push/pop/top/clear. Obviously the odds of getting 64 +pushes in a row, without popping or clearing, are very low (for a dumb +fuzzer, the odds are astronomically low). +Coverage-feedback and various byte-copying heuristics in AFL and +libFuzzer etc. can sometimes work around such problems, but in other, +more complex cases, they are stumped. Swarm testing "flips a coin" +before each test, and only includes API calls in the test if the coin +came up heads for that test. That means we just need some test to run +with heads for push and tails for pop and clear. + +DeepState supports fully automated swarm testing. Just compile your +harness with `-DDEEPSTATE_PURE_SWARM` and all your `OneOf`s _and_ +DeepState string generation functions will use swarm testing. This is +a huge help for the built-in fuzzer (for example, it more than doubles +the fault detection rate for the `Runlen` example above). Eclipser +can get "stuck" with swarm testing, but AFL and libFuzzer can +certainly sometimes benefit from swarm testing. There is also an option +`-DDEEPSTATE_MIXED_SWARM` that mixes swarm and regular generation. It +flips an additional coin for each potentially swarmable thing, and +decides to use swarm or not for that test. This can produce a mix of +swarm and regular generation that is unique to DeepState. If you +aren't finding any bugs using a harness that involves `OneOf` or +generating strings, it's a good idea to try both swarm methods before +declaring the code bug-free! There is another, more experimental, +swarm-like method, `-DDEEPSTATE_PROB_SWARM`, that is of possible interest. +Instead of pure binary inclusion/exclusion of choices, this varies the +actual distribution of choices. However, because this often ends up behaving +more like a non-swarm selection, it may not be as good at ferreting out +unusual behaviors due to extreme imbalance of choices. + +Note that tests produced under a particular swarm option are _not_ +binary compatible with other settings for swarm, due to the added coin flips. diff --git a/docs/symbolic_execution.md b/docs/symbolic_execution.md new file mode 100644 index 00000000..8bd254d9 --- /dev/null +++ b/docs/symbolic_execution.md @@ -0,0 +1,6 @@ +# Symbolic execution + +TODO: +- something general about SE +- something about angr and manticore +- how DeepState integrates SE (simplified stuff from the paper) diff --git a/docs/test_harness.md b/docs/test_harness.md new file mode 100644 index 00000000..6f15dcee --- /dev/null +++ b/docs/test_harness.md @@ -0,0 +1,5 @@ +# Test harness + +TODO: +- how it works +- API - what methods can be used in the harness diff --git a/examples/EnsembledCrash.cpp b/examples/EnsembledCrash.cpp new file mode 100644 index 00000000..4b78a561 --- /dev/null +++ b/examples/EnsembledCrash.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2019 Trail of Bits, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include + +using namespace deepstate; + +DEEPSTATE_NOINLINE static void segfault(char *first, char* second) { + std::size_t hashed = std::hash{}(first); + std::size_t hashed2 = std::hash{}(second); + unsigned *p = NULL; + if (hashed == 7169420828666634849U) { + if (hashed2 == 10753164746288518855U) { + *(p+2) = 0xdeadbeef; /* crash */ + } + printf("BOM\n"); + } +} + +TEST(SimpleCrash, SegFault) { + char *first = (char*)DeepState_CStr_C(9, 0); + char *second = (char*)DeepState_CStr_C(9, 0); + + for (int i = 0; i < 9; ++i) + printf("%02x", (unsigned char)first[i]); + printf("\n"); + for (int i = 0; i < 9; ++i) + printf("%02x", (unsigned char)second[i]); + + segfault(first, second); + + ASSERT_EQ(first, first); + ASSERT_NE(first, second); +} diff --git a/tests/deepstate_base.py b/tests/deepstate_base.py index ca2249f6..75be4f56 100644 --- a/tests/deepstate_base.py +++ b/tests/deepstate_base.py @@ -10,4 +10,24 @@ def test_manticore(self): self.run_deepstate("deepstate-manticore") def run_deepstate(self, deepstate): - print("define an actual test of DeepState here.") + raise NotImplementedError("Define an actual test of DeepState in DeepStateTestCase:run_deepstate.") + + +class DeepStateFuzzerTestCase(TestCase): + def test_afl(self): + self.run_deepstate("deepstate-afl") + + def test_libfuzzer(self): + self.run_deepstate("deepstate-libfuzzer") + + def test_honggfuzz(self): + self.run_deepstate("deepstate-honggfuzz") + + def test_angora(self): + self.run_deepstate("deepstate-angora") + + def test_eclipser(self): + self.run_deepstate("deepstate-eclipser") + + def run_deepstate(self, deepstate): + raise NotImplementedError("Define an actual test of DeepState in DeepStateFuzzerTestCase:run_deepstate.") diff --git a/tests/logrun.py b/tests/logrun.py index 553983e9..10b663e6 100644 --- a/tests/logrun.py +++ b/tests/logrun.py @@ -2,47 +2,94 @@ import subprocess import time import sys +from tempfile import mkdtemp +from shutil import rmtree +import psutil -def logrun(cmd, file, timeout): + +def logrun(cmd, file, timeout, break_callback=None): sys.stderr.write("\n\n" + ("=" * 80) + "\n") sys.stderr.write("RUNNING: ") sys.stderr.write(" ".join(cmd) + "\n\n") sys.stderr.flush() + + tmp_out_dir = None with open(file, 'w') as outf: + additional_args = [] + + # auto-create output dir + if set(cmd).isdisjoint({"-o", "--output_test_dir", "--out_test_name"}): + tmp_out_dir = mkdtemp(prefix="deepstate_logrun_") + additional_args.extend(["--output_test_dir", tmp_out_dir]) # create empty output dir + # We need to set log_level so we see ALL messages, for testing - p = subprocess.Popen(cmd + ["--min_log_level", "0"], stdout=outf, stderr=outf) + if "--min_log_level" not in cmd: + additional_args.extend(["--min_log_level", "0"]) + + proc = subprocess.Popen(cmd + additional_args, stdout=outf, stderr=outf) + + callback_break = False + oldContentLen = 0 start = time.time() - oldContents = "" lastOutput = time.time() - while (p.poll() is None) and ((time.time() - start) < timeout): - if (time.time() - lastOutput) > 300: - sys.stderr.write(".") + inf = open(file, 'r') + while (proc.poll() is None) and ((time.time() - start) < timeout): + inf.seek(0, 2) + newContentLen = inf.tell() + + if newContentLen > oldContentLen: + inf.seek(oldContentLen, 0) + newContent = inf.read() + sys.stderr.write(newContent) sys.stderr.flush() + oldContentLen = newContentLen lastOutput = time.time() - with open(file, 'r') as inf: - contents = inf.read() - if len(contents) > len(oldContents): - sys.stderr.write(contents[len(oldContents):]) + + if break_callback and break_callback(newContent): + callback_break = True + break + + if (time.time() - lastOutput) > 300: + sys.stderr.write(".") sys.stderr.flush() - oldContents = contents lastOutput = time.time() - time.sleep(0.05) + + time.sleep(0.5) + totalTime = time.time() - start sys.stderr.write("\n") - rv = (p.returncode, contents) - if p.poll() is None: - rv = ("TIMEOUT", contents) - if "Traceback (most recent call last)" in contents: - rv = ("EXCEPTION RAISED", contents) - if "internal error" in contents: - rv = ("INTERNAL ERROR", contents) + + inf.seek(oldContentLen, 0) + newContent = inf.read() + sys.stderr.write(newContent) + sys.stderr.flush() + inf.seek(0, 0) + contents = inf.read() + inf.close() + + rv = [proc.returncode, contents] + if callback_break: + rv[0] = "CALLBACK_BREAK" + elif proc.poll() is None: + rv[0] = "TIMEOUT" + elif "Traceback (most recent call last)" in contents: + rv[0] = "EXCEPTION RAISED" + elif "internal error" in contents: + rv[0] = "INTERNAL ERROR" + + try: + for some_proc in psutil.Process(proc.pid).children(recursive=True) + [proc]: + some_proc.terminate() + except psutil.NoSuchProcess: + pass + sys.stderr.write("\nDONE\n\n") sys.stderr.write("TOTAL EXECUTION TIME: " + str(totalTime) + "\n") - sys.stderr.write("RETURN VALUE: " + str(p.returncode) + "\n") + sys.stderr.write("RETURN VALUE: " + str(proc.returncode) + "\n") sys.stderr.write("RETURNING AS RESULT: " + str(rv[0]) + "\n") sys.stderr.write("=" * 80 + "\n") - return rv - - + if tmp_out_dir: + rmtree(tmp_out_dir, ignore_errors=True) + return rv diff --git a/tests/test_fuzzers.py b/tests/test_fuzzers.py new file mode 100644 index 00000000..5fd694ad --- /dev/null +++ b/tests/test_fuzzers.py @@ -0,0 +1,79 @@ +from __future__ import print_function +import deepstate_base +import logrun +from tempfile import mkdtemp, TemporaryDirectory, mkstemp +from pathlib import Path +from os import path +from glob import glob +import re +import sys + + +class CrashFuzzerTest(deepstate_base.DeepStateFuzzerTestCase): + def run_deepstate(self, deepstate): + def do_compile(tempdir, test_source_file): + """ + Compile test_source_file using frontend API + temdir is a workspace + """ + # prepare args + output_test_name = path.join(tempdir, Path(test_source_file).stem) + _, output_log_file = mkstemp(dir=tempdir) + arguments = [ + "--compile_test", test_source_file, + "--out_test_name", output_test_name + ] + + # run command + (r, output) = logrun.logrun([deepstate] + arguments, output_log_file, 360) + compiled_files = glob(output_test_name + '*') + + # check output + self.assertEqual(r, 0) + for compiled_file in compiled_files: + self.assertTrue(path.isfile(compiled_file)) + + # return compiled file(s) + # if Angora fuzzer, file.taint should be before file.fast + if any([compiled_file.endswith('.taint.angora') for compiled_file in compiled_files]): + compiled_files = sorted(compiled_files, reverse=True) + return compiled_files + + + def crash_found(output): + """ + Check if some crash were found assuming that + fuzzer output is the deepstate one (--fuzzer_out == False) + """ + for crashes_stat in re.finditer(r"^FUZZ_STATS:.*:unique_crashes:(\d+)$", + output, re.MULTILINE): + if int(crashes_stat.group(1)) > 0: + return True + return False + + + def do_fuzz(tempdir, compiled_files): + """ + Fuzz compiled_files (single compiled test/harness or two files if Angora) + until first crash + """ + # prepare args + _, output_log_file = mkstemp(dir=tempdir) + output_test_dir = mkdtemp(dir=tempdir) + + arguments = [ + "--output_test_dir", output_test_dir + ] + compiled_files + + # run command + (r, output) = logrun.logrun([deepstate] + arguments, output_log_file, + 180, break_callback=crash_found) + + # check output + self.assertTrue(crash_found(output)) + + + test_source_file = "examples/SimpleCrash.cpp" + with TemporaryDirectory(prefix="deepstate_test_fuzzers_") as tempdir: + compiled_files = do_compile(tempdir, test_source_file) + do_fuzz(tempdir, compiled_files) diff --git a/tests/test_fuzzers_sync.py b/tests/test_fuzzers_sync.py new file mode 100644 index 00000000..88797c73 --- /dev/null +++ b/tests/test_fuzzers_sync.py @@ -0,0 +1,260 @@ +from __future__ import print_function + +import base64 +import deepstate_base +import logrun +import os +import re +import subprocess +import sys +import time + +from base64 import b64decode +from glob import glob +from os import path +from pathlib import Path +from shutil import rmtree +from tempfile import TemporaryDirectory +from tempfile import mkdtemp +from tempfile import mkstemp +from time import sleep +from unittest import TestCase + + +class CrashFuzzerTest(TestCase): + def test_fuzzers_synchronization(self): + def do_compile(fuzzer, tempdir, test_source_file): + """ + Compile test_source_file using frontend API + temdir is a workspace + """ + print(f"Compiling testcase for fuzzer {fuzzer}") + + # prepare args + output_test_name = path.join(tempdir, Path(test_source_file).stem) + _, output_log_file = mkstemp(dir=tempdir) + arguments = [ + "--compile_test", test_source_file, + "--out_test_name", output_test_name + ] + + # run command + proc = subprocess.Popen([f"deepstate-{fuzzer}"] + arguments) + proc.communicate() + compiled_files = glob(output_test_name + f"*.{fuzzer}") + + # check output + self.assertEqual(proc.returncode, 0) + for compiled_file in compiled_files: + self.assertTrue(path.isfile(compiled_file)) + + # return compiled file(s) + # if Angora fuzzer, file.taint should be before file.fast + if any([compiled_file.endswith('.taint.angora') for compiled_file in compiled_files]): + compiled_files = sorted(compiled_files, reverse=True) + return compiled_files + + + def do_fuzz(fuzzer, workspace_dir, sync_dir, compiled_files, output_from_fuzzer=None): + """ + Fuzz compiled_files (single compiled test/harness or two files if Angora) + until first crash + """ + # prepare args + output_dir = mkdtemp(prefix=f"deepstate_{fuzzer}_", dir=workspace_dir) + + arguments = [ + "--output_test_dir", output_dir, + "--sync_dir", sync_dir, + "--sync_cycle", "5", + "--min_log_level", "0" + ] + compiled_files + + # run command + exe = f"deepstate-{fuzzer}" + cmd = ' '.join([exe] + arguments) + print(f"Running: `{cmd}`.") + if output_from_fuzzer and output_from_fuzzer == fuzzer: + proc = subprocess.Popen([exe] + arguments) + else: + proc = subprocess.Popen([exe] + arguments, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return output_dir, proc + + + def crashes_found(fuzzer, output): + """ + Check if some crash were found assuming that + fuzzer output is the deepstate one (--fuzzer_out == False) + """ + no_crashes = 0 + for crashes_stat in re.finditer(r"^unique_crashes:(\d+)$", + output, re.MULTILINE): + no_crashes = int(crashes_stat.group(1)) + print(f"Crashes found by fuzzer {fuzzer} - {no_crashes}.") + return 0 + + + def wait_for_crashes(fuzzers, timeout): + for fuzzer in fuzzers: + fuzzers[fuzzer]["no_crashes"] = 0 + + start_time = int(time.time()) + + while any([v["no_crashes"] < 1 for _, v in fuzzers.items()]): + if timeout: + self.assertLess(time.time() - start_time, timeout, msg="TIMEOUT") + + for fuzzer, values in fuzzers.items(): + try: + stats = dict() + with open(values["stats_file"], "r") as f: + for line in f: + line = line.strip() + if ":" not in line: + continue + k, v = line.split(":", 1) + stats[k] = v + + print("{:10s}:".format(fuzzer), end="\t") + if values["proc"].poll() is None: + for stat in ["unique_crashes", "sync_dir_size", "execs_done", "paths_total"]: + if stat in stats: + print("{}: {:10s}".format(stat, stats[stat]), end=" |\t") + print("") + fuzzers[fuzzer]["no_crashes"] = int(stats["unique_crashes"]) + else: + if "unique_crashes" in stats: + print("unique_crashes: {:10s}".format(stats["unique_crashes"]), end=" |\t") + print("DEAD " + "OoOoo"*5 + "x...") + + except FileNotFoundError: + print(f" - stats not found (`{values['stats_file']}`).") + + for _ in range(3): + print("~*~"*5, end=" - ") + sys.stderr.flush() + sys.stdout.flush() + sleep(1) + print("") + + print(f"CRASHING - done") + print("-"*50) + + + def do_sync_test(output_from_fuzzer=None): + # start all fuzzers + for fuzzer in fuzzers.keys(): + output_dir, proc = do_fuzz(fuzzer, workspace_dir, sync_dir, + fuzzers[fuzzer]["compiled_files"], + output_from_fuzzer) + fuzzers[fuzzer]["output_dir"] = output_dir + fuzzers[fuzzer]["proc"] = proc + fuzzers[fuzzer]["stats_file"] = os.path.join(output_dir, "deepstate-stats.txt") + + # import Frontend classes so we can use PUSH/PULL/CRASH dirs + deepstate_python = os.path.join(os.path.dirname(__file__), "bin", "deepstate") + print(f"Adding deepstate python path: {deepstate_python}.") + sys.path.append(deepstate_python) + + if "afl" in fuzzers: + from deepstate.executors.fuzz.afl import AFL + fuzzers["afl"]["class"] = AFL + if "angora" in fuzzers: + from deepstate.executors.fuzz.angora import Angora + fuzzers["angora"]["class"] = Angora + if "honggfuzz" in fuzzers: + from deepstate.executors.fuzz.honggfuzz import Honggfuzz + fuzzers["honggfuzz"]["class"] = Honggfuzz + if "eclipser" in fuzzers: + from deepstate.executors.fuzz.eclipser import Eclipser + fuzzers["eclipser"]["class"] = Eclipser + if "libfuzzer" in fuzzers: + from deepstate.executors.fuzz.libfuzzer import LibFuzzer + fuzzers["libfuzzer"]["class"] = LibFuzzer + + # run them for a bit + wait_for_start = 2 + print(f"Fuzzers started, waiting {wait_for_start} seconds.") + for _ in range(wait_for_start): + sleep(1) + print('.', end="") + sys.stderr.flush() + sys.stdout.flush() + print("") + + # assert that all fuzzers started + print("Checking if fuzzers are up and running") + for fuzzer, values in fuzzers.items(): + try: + self.assertTrue(values["proc"].poll() is None) + except Exception as e: + print(f"Error for fuzzer {fuzzer}:") + if values["proc"] and values["proc"].stderr: + print(values["proc"].stderr.read().decode('utf8')) + raise e + push_dir = os.path.join(values["output_dir"], values["class"].PUSH_DIR) + self.assertTrue(os.path.isdir(push_dir)) + + # manually push crashing seeds to fuzzers local dirs + seeds = [b64decode("R3JvcyBwemRyIGZyb20gUEwu")] + fuzzer_id = 0 + for seed_no, seed in enumerate(seeds): + fuzzer_id %= len(fuzzers) + fuzzer = sorted(fuzzers.keys())[fuzzer_id] + values = fuzzers[fuzzer] + push_dir = os.path.join(values["output_dir"], values["class"].PUSH_DIR) + print(f"Pushing seed {seed_no} to {fuzzer}: `{push_dir}`") + with open(os.path.join(push_dir, f"id:000201,the_crash"), "wb") as f: + f.write(seed) + fuzzer_id += 1 + + # check if all fuzzers find at least two crashes + # that is: the one pushed to its local dir and at least one other + wait_for_crashes(fuzzers, timeout) + + + # config + fuzzers_list = ["afl", "libfuzzer", "angora", "eclipser", "honggfuzz"] + output_from_fuzzer = None # or "afl" etc + timeout = None + + # init + fuzzers = dict() + test_source_file = "examples/EnsembledCrash.cpp" + sync_dir = mkdtemp(prefix="syncing_") + workspace_dir = mkdtemp(prefix="workspace_") + compiled_files_dir = mkdtemp(prefix="compiled_", dir=workspace_dir) + + # compile for all fuzzers + for fuzzer in fuzzers_list: + compiled_files = do_compile(fuzzer, compiled_files_dir, test_source_file) + fuzzers[fuzzer] = {"compiled_files": compiled_files} + + # do testing + try: + print("Starting synchronization run") + do_sync_test(output_from_fuzzer) + except Exception as e: + # cleanup + # hard kill processes + print('Killing spawned processes.') + for _, value in fuzzers.items(): + try: + proc = value["proc"] + for some_proc in psutil.Process(proc.pid).children(recursive=True) + [proc]: + some_proc.kill() + except: + pass + + # filesystem + print("Clearing tmp files.") + try: + sleep(1) + rmtree(workspace_dir, ignore_errors=True) + rmtree(sync_dir, ignore_errors=True) + except Exception as e2: + print(f"Error clearing: {e2}") + + # now can raise + raise e