Skip to content

Commit

Permalink
Introduce fuzzer
Browse files Browse the repository at this point in the history
By leveraging the LLVM's LibFuzzer, we will be able to test our
emulator code more thoroughly.

The seed corpus being added automatically are all the elf files in the
build folder.

Major changes:
- Conditional compilation for the main function, as the fuzzer will
generate its own main function
- Need to use the clang toolchain
- Introduce loading buffer as elf file, as the input from fuzzer will
be passed into the emulator directly instead of going through a file
- Fixed ELF verification logic as the fuzzer already breaks the code

Other minor changes are:
- Fix Codacy issue "rejecting SARIF, as there are more runs than allowed"
  • Loading branch information
henrybear327 committed Nov 26, 2023
1 parent afe4bf6 commit 56cbb36
Show file tree
Hide file tree
Showing 12 changed files with 277 additions and 6 deletions.
37 changes: 37 additions & 0 deletions .ci/fuzz.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -e -u -o pipefail

# check the existence of the clang toolchain
command -v clang &> /dev/null

# compile
make clean
clang \
-g -O1 \
-fsanitize=fuzzer,address,undefined \
-include src/common.h \
-D RV32_FEATURE_EXT_F=0 \
-D RV32_FEATURE_SDL=0 \
-D DEFAULT_STACK_ADDR=0xFFFFE000 \
-D DEFAULT_ARGS_ADDR=0xFFFFF000 \
-D FUZZER \
-o build/rv32emu_fuzz \
src/fuzz-target.cc \
src/map.c \
src/utils.c \
src/decode.c \
src/io.c \
src/syscall.c \
src/emulate.c \
src/riscv.c \
src/elf.c \
src/cache.c \
src/mpool.c \
src/main.c

# populate the initial CORPUS for the fuzzer using valid elf
mkdir -p build/fuzz/CORPUS_DIR
cp build/*.elf build/fuzz/CORPUS_DIR

# execute
./build/rv32emu_fuzz build/fuzz/CORPUS_DIR -timeout=3 -max_total_time=1200
5 changes: 5 additions & 0 deletions .codacy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
exclude_paths:
- ".github/**"
- "build/**"
- "docs/**"
- "tests/**"
17 changes: 17 additions & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: Fuzz Test

on: [push, pull_request]

jobs:
rv32emu:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: install-dependencies
run: |
sudo apt-get update
sudo apt-get install clang
shell: bash
- name: Run fuzzer
run: .ci/fuzz.sh
shell: bash
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,28 @@ build/id1/
build/gfx.wad
build/doomrc
toolchain/
.vscode

# built objects
build/.config
build/rv32emu
build/rv32emu_fuzz
build/arch-test
build/mini-gdbstub
build/softfloat
build/cache/
build/map/
build/path/
build/fuzz/
*.o
*.o.d
tests/**/*.elf
tests/arch-test-target/config.ini
__pycache__/

# fuzzer
crash-*
leak-*
timeout-*
fuzz.elf
*.log
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ LABEL maintainer="henrybear327@gmail.com"
# Install packages required for the emulator to compile and execute correctly
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y \
libsdl2-dev libsdl2-mixer-dev python3-pip git
libsdl2-dev libsdl2-mixer-dev python3-pip git clang

RUN python3 -m pip install git+https://github.com/riscv/riscof

Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ include mk/toolchain.mk

OUT ?= build
BIN := $(OUT)/rv32emu
FUZZ_BIN := $(OUT)/rv32emu_fuzz

CONFIG_FILE := $(OUT)/.config
-include $(CONFIG_FILE)
Expand Down Expand Up @@ -214,7 +215,7 @@ endif
endif

clean:
$(RM) $(BIN) $(OBJS) $(HIST_BIN) $(HIST_OBJS) $(deps) $(CACHE_OUT)
$(RM) $(BIN) $(FUZZ_BIN) $(OBJS) $(HIST_BIN) $(HIST_OBJS) $(deps) $(CACHE_OUT)
distclean: clean
-$(RM) $(DOOM_DATA) $(QUAKE_DATA)
$(RM) -r $(OUT)/id1
Expand Down
23 changes: 23 additions & 0 deletions docs/fuzzer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Fuzzing

We are using the [LLVM Fuzzer](https://llvm.org/docs/LibFuzzer.html).

The fuzzer used here is without structured input generation. Instead, we rely
on the fuzzer to mutate the input.

The initial seeds are all the ELF files in the `build` directory.

## Execution

The script compiles the emulator and links it with the LibFuzzer, prepares the seed corpus, and executes the fuzzing tests.

- `.ci/fuzz.sh`

## References

> Inspired by the fuzzer from [libriscv](https://github.com/fwsGonzo/libriscv/tree/master/fuzz).
- [LLVM official LibFuzzer documentation](https://llvm.org/docs/LibFuzzer.html#corpus)
- [Chromium - Getting started with LibFuzzer](https://chromium.googlesource.com/chromium/src/+/refs/heads/main/testing/libfuzzer/getting_started_with_libfuzzer.md)
- [Fuzzing tutorial](https://github.com/google/fuzzing/blob/master/tutorial/libFuzzerTutorial.md)
- [UBSAN](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html)
31 changes: 28 additions & 3 deletions src/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void elf_delete(elf_t *e)
/* release a loaded ELF file */
static void release(elf_t *e)
{
#if !defined(USE_MMAP)
#if !defined(USE_MMAP) && !defined(FUZZER)
free(e->raw_data);
#endif

Expand Down Expand Up @@ -291,18 +291,36 @@ bool elf_load(elf_t *e, riscv_t *rv, memory_t *mem)
return true;
}

#ifdef FUZZER
bool elf_open(elf_t *e, uint8_t *data, size_t len)
#else
bool elf_open(elf_t *e, const char *input)
#endif
{
/* free previous memory */
if (e->raw_data)
release(e);

#ifndef FUZZER
char *path = sanitize_path(input);
if (!path) {
return false;
}
#endif

#if defined(USE_MMAP)
#if defined(FUZZER)
if (!data || !len) {
/* if the fuzzer sent in an empty buffer, we don't proceed further */
return false;
}

/* get file size */
e->raw_size = len;

/* allocate memory */
free(e->raw_data);
e->raw_data = (uint8_t *) data;
#elif defined(USE_MMAP)
int fd = open(path, O_RDONLY);

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This argument to a file access function is derived from
user input (a command-line argument)
and then passed to open(__path).
if (fd < 0) {
free(path);
Expand All @@ -324,7 +342,6 @@ bool elf_open(elf_t *e, const char *input)
return false;
}
close(fd);

#else /* fallback to standard I/O text stream */
FILE *f = fopen(path, "rb");
if (!f) {
Expand Down Expand Up @@ -357,16 +374,24 @@ bool elf_open(elf_t *e, const char *input)
#endif /* USE_MMAP */

/* point to the header */
if (sizeof(struct Elf32_Ehdr) > e->raw_size) {
release(e);
return false;
}
e->hdr = (const struct Elf32_Ehdr *) e->raw_data;

/* check it is a valid ELF file */
if (!is_valid(e)) {
release(e);
#ifndef FUZZER
free(path);
#endif
return false;
}

#ifndef FUZZER
free(path);
#endif
return true;
}

Expand Down
4 changes: 4 additions & 0 deletions src/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,11 @@ elf_t *elf_new();
void elf_delete(elf_t *e);

/* Open an ELF file from specified path */
#ifdef FUZZER
bool elf_open(elf_t *e, uint8_t *data, size_t len);
#else
bool elf_open(elf_t *e, const char *path);
#endif

/* Find a symbol entry */
const struct Elf32_Sym *elf_get_symbol(elf_t *e, const char *name);
Expand Down
76 changes: 76 additions & 0 deletions src/fuzz-target.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "riscv.h"

const int max_cycles = 5000;
const char *fake_rv32emu_name = "./fake_rv32emu";
const char *fake_elf_name = "fake_elf";

/* In order to be able to inspect a coredump we want to crash on every ASAN
* error.
*/
extern "C" void __asan_on_error()
{
abort();
}
extern "C" void __msan_on_error()
{
abort();
}

static void fuzz_elf_loader(const uint8_t *data, size_t len)
{
int argc = 1 + 2 * 3 + 1;
char **args = (char **) malloc(sizeof(char *) * argc);

char *arg0 = (char *) malloc(strlen(fake_rv32emu_name) + 1);
strncpy(arg0, fake_rv32emu_name, strlen(fake_rv32emu_name) + 1);
args[0] = arg0;

char *arg1 = (char *) malloc(3);
strncpy(arg1, "-s", 3);
args[1] = arg1;
args[2] = (char *) data;

char *arg3 = (char *) malloc(3);
strncpy(arg3, "-l", 3);
args[3] = arg3;
char *len_str =
(char *) malloc(20 + 1); /* LLONG_MIN in base 10 has 20 chars */
sprintf(len_str, "%zu", len);
args[4] = len_str;

char *arg5 = (char *) malloc(3);
strncpy(arg5, "-k", 3);
args[5] = arg5;
char *max_cycles_str =
(char *) malloc(11 + 1); /* INT_MIN in base 10 has 11 chars */
sprintf(max_cycles_str, "%d", max_cycles);
args[6] = max_cycles_str;

char *arg7 = (char *) malloc(strlen(fake_elf_name) + 1);
strncpy(arg7, fake_elf_name, strlen(fake_elf_name) + 1);
args[7] = arg7;

int ret = rv_init_and_execute_elf(argc, args);
if (ret == 0) {
fprintf(stderr, "Executed successfully\n");
} else {
fprintf(stderr, "Executed with failure\n");
}

free(arg0);
free(arg1);
free(arg3);
free(len_str);
free(arg5);
free(max_cycles_str);
free(arg7);
free(args);
}

extern "C" void LLVMFuzzerTestOneInput(const uint8_t *data, size_t len)
{
fuzz_elf_loader(data, len);
}
Loading

0 comments on commit 56cbb36

Please sign in to comment.