diff --git a/panda/debian/.gitignore b/panda/debian/.gitignore index 7bbf39a4c32..3277dbe3185 100644 --- a/panda/debian/.gitignore +++ b/panda/debian/.gitignore @@ -1 +1,2 @@ -panda.deb +*.deb +*.whl \ No newline at end of file diff --git a/panda/debian/setup.sh b/panda/debian/setup.sh index b28dc85c7e2..f3835516f85 100755 --- a/panda/debian/setup.sh +++ b/panda/debian/setup.sh @@ -43,17 +43,17 @@ if [[ ! -f "../dependencies/ubuntu_${version}_base.txt" ]]; then fi # Build the installer to generate the wheel file -DOCKER_BUILDKIT=1 docker build --target installer -t panda --build-arg BASE_IMAGE="ubuntu:${version}" ../.. +DOCKER_BUILDKIT=1 docker build --target installer -t panda_installer --build-arg BASE_IMAGE="ubuntu:${version}" ../.. # Copy wheel file out of container to host # this also preserves wheel name, which is important as pip install WILL fail if you arbitarily change the generated wheel file name -docker run --rm -v $(pwd):/out panda bash -c "cp /panda/panda/python/core/dist/*.whl /out" +docker run --rm -v $(pwd):/out panda_installer bash -c "cp /panda/panda/python/core/dist/*.whl /out" # Finish building main panda container for the target ubuntu version -DOCKER_BUILDKIT=1 docker build --target panda -t panda --build-arg BASE_IMAGE="ubuntu:${version}" ../.. +DOCKER_BUILDKIT=1 docker build --target panda --cache-from=panda_installer -t panda --build-arg BASE_IMAGE="ubuntu:${version}" ../.. # Now build the packager container from that -docker build -t packager . +DOCKER_BUILDKIT=1 docker build --cache-from=panda -t packager . # Copy deb file out of container to host docker run --rm -v $(pwd):/out packager bash -c "cp /pandare.deb /out" diff --git a/panda/plugins/dwarf2/dwarf2.cpp b/panda/plugins/dwarf2/dwarf2.cpp index 6a72013d465..5f351251025 100644 --- a/panda/plugins/dwarf2/dwarf2.cpp +++ b/panda/plugins/dwarf2/dwarf2.cpp @@ -1039,7 +1039,7 @@ void pri_dwarf_plog(const char *file_callee, const char *fn_callee, uint64_t lno Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; // create a call or ret message - if (isCall){ + if (isCall) { ple.dwarf2_call = dwarf; } else{ @@ -1850,6 +1850,7 @@ bool load_debug_info(const char *dbg_prefix, const char *basename, uint64_t base return true; } +// You need this code to run to fill out TaintQueryPri bool read_debug_info(const char* dbg_prefix, const char *basename, uint64_t base_address, bool needs_reloc) { printf ("read_debug_info %s\n", dbg_prefix); @@ -1879,6 +1880,8 @@ bool looking_for_libc=false; const char *libc_host_path=NULL; std::string libc_name; +// Call back to the loaded plugin +// TODO: mmap never gets called, so loaded never gets called... void on_library_load(CPUState *cpu, target_ulong pc, char *guest_lib_name, target_ulong base_addr, target_ulong size) { printf ("on_library_load guest_lib_name=%s\n", guest_lib_name); if (!correct_asid(cpu)) { @@ -1890,7 +1893,7 @@ void on_library_load(CPUState *cpu, target_ulong pc, char *guest_lib_name, targe //printf("Trying to load symbols for %s at %#x.\n", lib_name, base_addr); std::string lib = std::string(guest_lib_name); std::size_t found = lib.find(guest_debug_path); - if (found == std::string::npos){ + if (found == std::string::npos) { char *lib_name = strdup((host_mount_path + lib).c_str()); printf("access(%s, F_OK): %x\n", lib_name, access(lib_name, F_OK)); if (access(lib_name, F_OK) == -1) { @@ -1899,9 +1902,7 @@ void on_library_load(CPUState *cpu, target_ulong pc, char *guest_lib_name, targe } if (looking_for_libc && lib.find(libc_name) != std::string::npos) { -// if (lib.find("libc-2.13") != std::string::npos) { lib_name = strdup(libc_host_path); -// lib_name = strdup("/mnt/lava-32-qcow/usr/lib/debug/lib/i386-linux-gnu/i686/cmov/libc-2.13.so"); printf ("actually loading lib_name = %s\n", lib_name); bool needs_reloc = true; // elf_base != base_addr; read_debug_info(lib_name, basename(lib_name), base_addr, needs_reloc); @@ -1931,34 +1932,50 @@ void on_library_load(CPUState *cpu, target_ulong pc, char *guest_lib_name, targe } // We want to catch all loaded modules, but don't want to -// check every single call. This is a compromise -- check -// every 1000 calls. If we had a callback in OSI for -// on_library_load we could do away with this hack. -int mod_check_count = 0; +// check every single call. We use a callback in OSI for +// on_library_load. bool main_exec_initialized = false; -#define MOD_CHECK_FREQ 1000 bool ensure_main_exec_initialized(CPUState *cpu) { //if (!correct_asid(cpu)) return; OsiProc *p = get_current_process(cpu); GArray *libs = NULL; libs = get_mappings(cpu, p); free_osiproc(p); - if (!libs) + if (!libs) { + printf("get_mappings failed\n"); return false; - - //printf("[ensure_main_exec_initialized] looking at libraries\n"); + } + printf("[ensure_main_exec_initialized] looking at libraries for %s\n", proc_to_monitor); for (unsigned i = 0; i < libs->len; i++) { char fname[260] = {}; OsiModule *m = &g_array_index(libs, OsiModule, i); - if (!m->file) continue; - if (!m->name) continue; - std::string lib = std::string(m->file); if (debug) { - printf("[ensure_main_exec_initialized] looking at file %s\n", m->file); + printf("Iteration %d within the for loop of libraries in main_exec_initialized\n", i); + } + if (!m->file) { + if (debug) { + printf("Invalid file from OsiModule\n"); + } + continue; + } + if (!m->name) { + if (debug) { + printf("Invalid name from OsiModule\n"); + } + continue; + } + std::string lib = std::string(m->file); + + if (0 != strncmp(m->name, proc_to_monitor, strlen(m->name))) { + if (debug) { + printf("[ensure_main_exec_initialized] looking at file %s, skip this\n", m->file); + printf("[ensure_main_exec_initialized] looking at name %s, skip this\n", m->name); + } + continue; } - if (0 != strncmp(m->name, proc_to_monitor, strlen(m->name))) continue; - //printf("[ensure_main_exec_initialized] looking at file %s\n", m->file); + printf("[ensure_main_exec_initialized] Found that file, time to try loading... %s\n", m->file); + //std::size_t found = lib.find(guest_debug_path); //if (found == std::string::npos) continue; //std::string host_name = lib.substr(0, found) + @@ -1978,6 +1995,7 @@ bool ensure_main_exec_initialized(CPUState *cpu) { active_libs.push_back(Lib(fname, m->base, m->base + m->size)); uint64_t elf_base = elf_get_baseaddr(fname, m->name, m->base); bool needs_reloc = elf_base != m->base; + // TODO: The issue is that we are not loading the symbols for the main executable if (!read_debug_info(fname, m->name, m->base, needs_reloc)) { fprintf(stderr, "Couldn't load symbols from %s.\n", fname); continue; @@ -2062,12 +2080,15 @@ void on_call(CPUState *cpu, target_ulong pc) { if (it == line_range_list.end() || pc < it->lowpc ){ auto it_dyn = addr_to_dynl_function.find(pc); if (it_dyn != addr_to_dynl_function.end()){ - if (debug) printf ("CALL: Found line info for 0x" TARGET_FMT_lx "\n", pc); + if (debug) { + printf ("CALL: Found line info for 0x" TARGET_FMT_lx "\n", pc); + } pri_runcb_on_fn_start(cpu, pc, NULL, it_dyn->second.c_str()); } else { - if (debug) + if (debug) { printf("CALL: Could not find line info for 0x" TARGET_FMT_lx "\n", pc); + } } return; } @@ -2129,7 +2150,7 @@ void on_ret(CPUState *cpu, target_ulong pc_func) { std::string file_name = it->filename; std::string funct_name = funcaddrs[cur_function]; cur_line = it->line_number; - //printf("RET: [%s] [0x%llx]-%s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(),cur_function, funct_name.c_str(),cur_line,pc_func); + //printf("RET: [%s] [0x%llx]-%s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(),cur_function, funct_name.c_str(),cur_line,pc_func) if (logCallSites) { dwarf_log_callsite(cpu, file_name.c_str(), funct_name.c_str(), cur_line, false); } @@ -2141,10 +2162,10 @@ void __livevar_iter(CPUState *cpu, std::vector vars, liveVarCB f, void *args, - target_ulong fp){ + target_ulong fp) { //printf("size of vars: %ld\n", vars.size()); - for (auto it : vars){ - std::string var_name = it.var_name; + for (auto it : vars) { + std::string var_name = it.var_name; DwarfVarType var_type {type_map[it.fname][it.cu][it.var_type], it.dec_line, var_name}; //enum LocType { LocReg, LocMem, LocConst, LocErr }; target_ulong var_loc; @@ -2179,14 +2200,14 @@ int livevar_find(CPUState *cpu, std::vector vars, liveVarPred pred, void *args, - VarInfo &ret_var){ + VarInfo &ret_var) { target_ulong fp = dwarf2_get_cur_fp(cpu, pc); - if (fp == (target_ulong) -1){ + if (fp == (target_ulong) -1) { printf("Error: was not able to get the Frame Pointer for the function %s at @ 0x" TARGET_FMT_lx "\n", funcaddrs[cur_function].c_str(), pc); return 0; } - for (auto it : vars){ + for (auto it : vars) { target_ulong var_loc; //process_dwarf_locs(locdesc[i]->ld_s, locdesc[i]->ld_cents); //printf("\n"); @@ -2206,7 +2227,7 @@ int livevar_find(CPUState *cpu, * end PPPs ******************************************************************** */ int compare_address(void *var_ty, const char *var_nm, LocType loc_t, target_ulong loc, void *query_address){ - switch (loc_t){ + switch (loc_t) { case LocReg: break; case LocMem: @@ -2238,7 +2259,7 @@ void dwarf_get_vma_symbol (CPUState *cpu, target_ulong pc, target_ulong vma, cha //VarInfo ret_var = VarInfo(NULL, NULL, NULL, 0); VarInfo ret_var; - if (livevar_find(cpu, pc, funcvars[fn_address], compare_address, (void *) &vma, ret_var)){ + if (livevar_find(cpu, pc, funcvars[fn_address], compare_address, (void *) &vma, ret_var)) { *symbol_name = (char *)ret_var.var_name.c_str(); return; } @@ -2257,7 +2278,7 @@ void dwarf_get_pc_source_info(CPUState *cpu, target_ulong pc, SrcInfo *info, int return; } auto it = std::lower_bound(line_range_list.begin(), line_range_list.end(), pc, CompareRangeAndPC()); - if (it == line_range_list.end() || pc < it->lowpc ){ + if (it == line_range_list.end() || pc < it->lowpc) { auto it_dyn = addr_to_dynl_function.find(pc); if (it_dyn != addr_to_dynl_function.end()){ //printf("In a a plt function\n"); @@ -2272,7 +2293,7 @@ void dwarf_get_pc_source_info(CPUState *cpu, target_ulong pc, SrcInfo *info, int return; } - if (it->lowpc == it->highpc){ + if (it->lowpc == it->highpc) { //printf("In a a plt function\n"); *rc = 1; return; @@ -2289,11 +2310,11 @@ void dwarf_get_pc_source_info(CPUState *cpu, target_ulong pc, SrcInfo *info, int void dwarf_all_livevar_iter(CPUState *cpu, target_ulong pc, liveVarCB f, - void *args){ + void *args) { //void (*f)(const char *var_ty, const char *var_nm, LocType loc_t, target_ulong loc)){ - if (inExecutableSource){ + if (inExecutableSource) { target_ulong fp = dwarf2_get_cur_fp(cpu, pc); - if (fp == (target_ulong) -1){ + if (fp == (target_ulong) -1) { printf("Error: was not able to get the Frame Pointer for the function %s at @ 0x" TARGET_FMT_lx "\n", funcaddrs[cur_function].c_str(), pc); return; @@ -2307,8 +2328,10 @@ void dwarf_all_livevar_iter(CPUState *cpu, void dwarf_funct_livevar_iter(CPUState *cpu, target_ulong pc, liveVarCB f, - void *args){ - //printf("iterating through live vars\n"); + void *args) { + if (debug) { + printf("iterating through live vars\n"); + } if (inExecutableSource) { target_ulong fp = dwarf2_get_cur_fp(cpu, pc); if (fp == (target_ulong) -1){ @@ -2328,13 +2351,15 @@ void dwarf_global_livevar_iter(CPUState *cpu, } bool translate_callback_dwarf(CPUState *cpu, target_ulong pc) { - if (!correct_asid(cpu)) return false; - + if (!correct_asid(cpu)) { + return false; + } auto it2 = std::lower_bound(line_range_list.begin(), line_range_list.end(), pc, CompareRangeAndPC()); // after the call to lower_bound the `pc` should be between it2->lowpc and it2->highpc // if it2 == line_range_list.end() we know we definitely didn't find out pc in our line_range_list - if (it2 == line_range_list.end() || pc < it2->lowpc) + if (it2 == line_range_list.end() || pc < it2->lowpc) { return false; + } return true; /* // This is just the linear search to confirm binary search (lower_bound) is @@ -2350,10 +2375,13 @@ bool translate_callback_dwarf(CPUState *cpu, target_ulong pc) { int exec_callback_dwarf(CPUState *cpu, target_ulong pc) { inExecutableSource = false; - if (!correct_asid(cpu)) return 0; + if (!correct_asid(cpu)) { + return 0; + } auto it2 = std::lower_bound(line_range_list.begin(), line_range_list.end(), pc, CompareRangeAndPC()); - if (it2 == line_range_list.end() || pc < it2->lowpc) + if (it2 == line_range_list.end() || pc < it2->lowpc) { return 0; + } inExecutableSource = true; if (it2->lowpc == it2->highpc) { inExecutableSource = false; @@ -2364,15 +2392,17 @@ int exec_callback_dwarf(CPUState *cpu, target_ulong pc) { cur_line = it2->line_number; //printf("[%s] [0x%llx]-%s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(),cur_function, funct_name.c_str(),cur_line,pc); - if (funcaddrs.find(cur_function) == funcaddrs.end()) + if (funcaddrs.find(cur_function) == funcaddrs.end()) { return 0; - if (cur_function == 0) + } + if (cur_function == 0) { return 0; + } //printf("[%s] [0x%llx]-%s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(),cur_function, funct_name.c_str(),cur_line,pc); //__livevar_iter(env, pc, funcvars[cur_function], push_var_if_live); //__livevar_iter(env, pc, global_var_list, push_var_if_live); //__livevar_iter(env, pc, global_var_list, print_var_if_live); - if (cur_line != prev_line){ + if (cur_line != prev_line) { //printf("[%s] %s(), ln: %4lld, pc @ 0x%x\n",file_name.c_str(), funct_name.c_str(),cur_line,pc); pri_runcb_on_after_line_change (cpu, pc, prev_file_name.c_str(), prev_funct_name.c_str(), prev_line); pri_runcb_on_before_line_change(cpu, pc, file_name.c_str(), funct_name.c_str(), cur_line); @@ -2385,7 +2415,7 @@ int exec_callback_dwarf(CPUState *cpu, target_ulong pc) { prev_function = cur_function; prev_line = cur_line; } - //if (funcaddrs.find(pc) != funcaddrs.end()){ + //if (funcaddrs.find(pc) != funcaddrs.end()) { // on_call(env, pc); //} return 0; @@ -2412,24 +2442,27 @@ uint32_t guest_strncpy(CPUState *cpu, char *buf, size_t maxlen, target_ulong gue typedef void (* on_proc_change_t)(CPUState *env, target_ulong asid, OsiProc *proc); void handle_asid_change(CPUState *cpu, target_ulong asid, OsiProc *p) { -// printf ("handle_asid_change\n"); if (!p) { return; } if (!p->name) { return; } if (debug) { printf("p-name: %s proc-to-monitor: %s\n", p->name, proc_to_monitor); } -// printf ("...really\n"); - //if (strcmp(p->name, proc_to_monitor) != 0) { if (strncmp(p->name, proc_to_monitor, strlen(p->name)) == 0) { target_ulong current_asid = panda_current_asid(cpu); monitored_asid.insert(current_asid); printf ("monitoring asid " TARGET_FMT_lx "\n", current_asid); } if (correct_asid(cpu) && !main_exec_initialized){ + if (debug) { + printf ("correct_asid, executing main_exec_initialized\n"); + } main_exec_initialized = ensure_main_exec_initialized(cpu); } + if (!main_exec_initialized) { + printf("The ensure_main_exec_intialized function failed on handle_asid_change\n"); + // exit(1); + } //free_osiproc(p); - } // XXX: osi_foo is largetly commented out and basically does nothing // I am keeping it here as a reminder of maybe tracking of a data structure @@ -2502,9 +2535,8 @@ void osi_foo(CPUState *cpu, TranslationBlock *tb) { return; } - - #endif + bool init_plugin(void *self) { #if defined(TARGET_I386) panda_arg_list *args_gen = panda_get_args("general"); @@ -2517,6 +2549,7 @@ bool init_plugin(void *self) { // monitored_asid = 0; } panda_arg_list *args = panda_get_args("dwarf2"); + debug = panda_parse_bool_opt(args, "debug", "enable debug output"); guest_debug_path = panda_parse_string_req(args, "g_debugpath", "path to binary/build dir on guest machine"); host_debug_path = panda_parse_string_req(args, "h_debugpath", "path to binary/build dir on host machine"); host_mount_path = panda_parse_string_opt(args, "host_mount_path", "dbg", "path to mounted guest file system"); @@ -2540,7 +2573,7 @@ bool init_plugin(void *self) { panda_require("pri"); panda_require("asidstory"); - //panda_require("osi_linux"); + // panda_require("osi_linux"); // make available the api for assert(init_callstack_instr_api()); assert(init_osi_linux_api()); @@ -2577,7 +2610,7 @@ bool init_plugin(void *self) { // if debug path actually points to a file, then make host_debug_path the // directory that contains the executable bin_path = std::string(host_debug_path); - //host_debug_path = dirname(strdup(host_debug_path)); + // host_debug_path = dirname(strdup(host_debug_path)); host_debug_path = dirname(strdup(host_debug_path)); } else { printf("Don\'t know what host_debug_path: %s is, but it is not a file or directory\n", host_debug_path); diff --git a/panda/plugins/loaded/loaded.cpp b/panda/plugins/loaded/loaded.cpp index f3303c5fb73..555d412c5f6 100644 --- a/panda/plugins/loaded/loaded.cpp +++ b/panda/plugins/loaded/loaded.cpp @@ -18,6 +18,7 @@ PANDAENDCOMMENT */ #include #include #include +#include #include "panda/plugin.h" #include "panda/plugin_plugin.h" @@ -52,7 +53,7 @@ PPP_PROT_REG_CB(on_library_load); // This creates the global for this call back fn (on_library_load) PPP_CB_BOILERPLATE(on_library_load) -bool debug = true; +bool debug = false; #define MAX_FILENAME 256 std::map running_procs; @@ -108,18 +109,17 @@ void linux_mmap_pgoff_return(CPUState *cpu,target_ulong pc,uint32_t addr,uint32_ OsiProc proc = running_procs[asid]; char *filename = osi_linux_fd_to_filename(cpu, &proc, fd); // gets us offset into the file. could be useful - //uint64_t pos = osi_linux_fd_to_pos(env, &proc, fd); + // uint64_t pos = osi_linux_fd_to_pos(env, &proc, fd); // if a filename exists and permission is executable - // TODO: fix this magic constant of 0x04 for PROT_EXEC - if (filename != NULL && ((prot & 0x04) == 0x04)) { + if (filename != NULL && ((prot & PROT_EXEC) == PROT_EXEC)) { if (debug) { printf ("[loaded] linux_mmap_pgoff(fd=%d filename=[%s] " "len=%d prot=%x flags=%x " "pgoff=%d)=" TARGET_FMT_lx "\n", (int) fd, filename, len, prot, flags, pgoff, env->regs[R_EAX]); } - PPP_RUN_CB(on_library_load, cpu, pc, filename, env->regs[R_EAX], len) - } else if ((prot & 0x04) == 0x04) { + PPP_RUN_CB(on_library_load, cpu, pc, filename, env->regs[R_EAX], len); + } else if ((prot & PROT_EXEC) == PROT_EXEC) { printf("[loaded] mapped executable section without a filename!\n"); printf ("[loaded] linux_mmap_pgoff(fd=%d " "len=%d prot=%x flags=%x " @@ -127,6 +127,68 @@ void linux_mmap_pgoff_return(CPUState *cpu,target_ulong pc,uint32_t addr,uint32_ len, prot, flags, pgoff, env->regs[R_EAX]); } } + +#elif defined(TARGET_I386) && defined(TARGET_X86_64) +void linux_mprotect_return(CPUState* cpu, target_ulong pc, + uint64_t start, uint32_t len, + uint64_t prot) { + if (debug) { + printf("[loaded] mprotect() on x86-64\n"); + } +} +// https://man7.org/linux/man-pages/man2/mmap.2.html +// https://github.com/panda-re/panda/blob/dev/panda/plugins/syscalls2/generated/syscalls_ext_typedefs_x64.h#L7405-L7412 +void linux_mmap_return(CPUState *cpu, target_ulong pc, + uint64_t addr, uint64_t len, uint64_t prot, + uint64_t flags, uint64_t fd, uint64_t offset) { + + if (debug) { + printf("linux_mmap_return 64-bit is called\n"); + } + CPUArchState *env = (CPUArchState*)cpu->env_ptr; + target_ulong asid = panda_current_asid(cpu); + if (running_procs.count(asid) == 0) { + return; + } + if ((int32_t) fd == -1) { + return; + } + if (debug) { + printf("linux_mmap_return 64-bit is called, with OK fd and non-zero running proc\n"); + } + + OsiProc proc = running_procs[asid]; + char * filename = osi_linux_fd_to_filename(cpu, &proc, fd); + // TODO: Brendan, will this offset help us? I feel like there is a difference + // mmap is just an offset and mmap_pgoff is an offset of pages? These I do NOT think are the same!!!! + // YES!!! SEE FILE_TAINT LIBRARY + // gets us offset into the file, could be useful, how would we update the callback? + // It is unused, I need to comment out... + // uint64_t pos = osi_linux_fd_to_pos(cpu, &proc, fd); + // if a filename exists and permission is executable + if (filename != NULL && ((prot & PROT_EXEC) == PROT_EXEC)) { + if (debug) { + printf("[loaded] linux_mmap(fd=%lu filename=[%s] len=%lu prot=%lx flags=%lx pgoff=%lu)=%lx\n", + fd, filename, len, prot, flags, offset, (unsigned long) env->regs[R_EAX]); + } + PPP_RUN_CB(on_library_load, cpu, pc, filename, env->regs[R_EAX], len); + } + else if ((prot & PROT_EXEC) == PROT_EXEC) { + printf("[loaded] mapped executable section without a filename!\n"); + printf("[loaded] linux_mmap(fd=%lu len=%lu prot=%lx flags=%lx pgoff=%lu)=%lx\n", + fd, len, prot, flags, offset, (unsigned long) env->regs[R_EAX]); + } + else { + if (debug) { + if (filename == NULL) { + printf("[loaded] I got a null file name\n"); + } + else { + printf("[loaded] It seems like filename %s was null, OR PROT_EXEC was not there\n", filename); + } + } + } +} #endif // get current process before each bb execs @@ -166,13 +228,13 @@ void osi_foo(CPUState *cpu, TranslationBlock *tb) { return; } bool init_plugin(void *self) { - //panda_arg_list *args = panda_get_args("loaded"); - panda_require("osi"); assert(init_osi_api()); panda_require("osi_linux"); assert(init_osi_linux_api()); panda_require("syscalls2"); + panda_arg_list *args = panda_get_args("loaded"); + debug = panda_parse_bool_opt(args, "debug", "enable debug output"); #if defined(TARGET_I386) && !defined(TARGET_X86_64) { @@ -183,8 +245,20 @@ bool init_plugin(void *self) { PPP_REG_CB("syscalls2", on_sys_mmap_pgoff_return, linux_mmap_pgoff_return); // don't use these at them moment - //PPP_REG_CB("syscalls2", on_sys_old_mmap_return, linux_old_mmap_return); - //PPP_REG_CB("syscalls2", on_sys_mprotect_return, linux_mprotect_return); + // PPP_REG_CB("syscalls2", on_sys_old_mmap_return, linux_old_mmap_return); + // PPP_REG_CB("syscalls2", on_sys_mprotect_return, linux_mprotect_return); + printf("The loaded plugin is supported on i386\n"); +#elif defined(TARGET_I386) && defined(TARGET_X86_64) + { + panda_cb pcb; + pcb.before_block_exec = osi_foo; + panda_register_callback(self, PANDA_CB_BEFORE_BLOCK_EXEC, pcb); + } + // Tell Plugin 'syscall2', that if a systemcall 'mmap' occurs, then run the code in ;'linux_mmap_return' + // https://www.linuxquestions.org/questions/linux-general-1/difference-between-mmap2-syscall-and-mmap_pgoff-syscall-for-32-bit-linux-4175622986/ + PPP_REG_CB("syscalls2", on_sys_mmap_return, linux_mmap_return); + PPP_REG_CB("syscalls2", on_sys_mprotect_return, linux_mprotect_return); + printf("The loaded plugin is supported on x86-64\n"); #else fprintf(stderr, "The loaded plugin is not currently supported on this platform.\n"); return false; diff --git a/panda/plugins/loaded_libs/loaded_libs.cpp b/panda/plugins/loaded_libs/loaded_libs.cpp index b322e35a557..1b681231345 100644 --- a/panda/plugins/loaded_libs/loaded_libs.cpp +++ b/panda/plugins/loaded_libs/loaded_libs.cpp @@ -134,26 +134,24 @@ bool init_plugin(void *self) { panda_require("syscalls2"); #ifdef TARGET_X86_64 - PPP_REG_CB("syscalls2", on_sys_mmap_return, mmap_return); + PPP_REG_CB("syscalls2", on_sys_mmap_return, mmap_return); + panda_cb pcb; + pcb.before_block_exec = before_block; + panda_register_callback(self, PANDA_CB_BEFORE_BLOCK_EXEC, pcb); + + panda_arg_list *args; + args = panda_get_args("loaded_libs"); + program_name = panda_parse_string_opt(args, "program_name", NULL, "program name to collect libraries for"); + return true; #else - /* #error "No on_sys_mmap_return for target" */ + /* #error "No on_sys_mmap_return for target" */ + printf("loaded_libs plugin is not available for this architecture"); + return false; #endif - - panda_cb pcb; - pcb.before_block_exec = before_block; - panda_register_callback(self, PANDA_CB_BEFORE_BLOCK_EXEC, pcb); - - panda_arg_list *args; - args = panda_get_args("loaded_libs"); - program_name = panda_parse_string_opt(args, "program_name", NULL, "program name to collect libraries for"); - - return true; } void uninit_plugin(void *self) { - cout << "get_libs_count = " << get_libs_count << "\n"; cout << "get_libs_failed_count = " << get_libs_failed_count << "\n"; cout << "frac = " << ((float) get_libs_failed_count) / get_libs_count << "\n"; - } diff --git a/panda/plugins/pri_taint/pri_taint.cpp b/panda/plugins/pri_taint/pri_taint.cpp index 981c235d8f7..3dae050a992 100644 --- a/panda/plugins/pri_taint/pri_taint.cpp +++ b/panda/plugins/pri_taint/pri_taint.cpp @@ -40,13 +40,11 @@ void uninit_plugin(void *); int get_loglevel() ; void set_loglevel(int new_loglevel); } -bool linechange_taint = true; -bool hypercall_taint = true; -bool chaff_bugs = false; -Panda__SrcInfoPri *si = NULL; + const char *global_src_filename = NULL; uint64_t global_src_linenum; unsigned global_ast_loc_id; +uint64_t global_funcaddr; bool debug = false; #define dprintf(...) if (debug) { printf(__VA_ARGS__); fflush(stdout); } @@ -86,7 +84,7 @@ Addr make_greg(uint64_t r, uint16_t off) { return ra; } void print_membytes(CPUState *env, target_ulong a, target_ulong len) { - unsigned char c = (unsigned char)0; + unsigned char c = (unsigned char) 0; printf("{ "); for (int i = 0; i < len; i++) { if (-1 == panda_virtual_memory_read(env, a+i, (uint8_t *) &c, sizeof(char))) { @@ -102,26 +100,50 @@ void print_membytes(CPUState *env, target_ulong a, target_ulong len) { #define LAVA_TAINT_QUERY_MAX_LEN (target_ulong)64ULL #if defined(TARGET_I386) void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, const char *astnodename) { + if (debug) { + printf("[pri_taint] Attempt to lava_taint_query\n"); + } + // can't do a taint query if it is not a valid register (loc) or if // the buf_len is greater than the register size (assume size of guest pointer) if (loc_t == LocReg && (buf >= CPU_NB_REGS || buf_len >= sizeof(target_ulong) || - buf_len == (target_ulong)-1)) + buf_len == (target_ulong)-1)) { + if (debug) { + printf("[pri_taint] The register is not balid OR buf_len > register size\n"); + } return; - if (loc_t == LocErr || loc_t == LocConst) + } + if (loc_t == LocErr || loc_t == LocConst) { + if (debug) { + printf("[pri_taint] The Location is either error OR constant. Shouldn't happen based on pfun()\n"); + } return; - if (!pandalog || !taint2_enabled() || taint2_num_labels_applied() == 0) + } + if (!pandalog || !taint2_enabled() || taint2_num_labels_applied() == 0) { + if (debug) { + printf("[pri_taint] No Panda log, Taint2 not enabled, or No taint2 num labeled applied\n"); + } return; + } + if (debug) { + printf("[pri_taint] OK, Seems like I can Lava Taint! LFG!\n"); + } CPUState *cpu = first_cpu; CPUArchState *env = (CPUArchState *)cpu->env_ptr; bool is_strnlen = ((int) buf_len == -1); - hwaddr phys = loc_t == LocMem ? panda_virt_to_phys(cpu, buf) : 0; - ram_addr_t RamOffset = RAM_ADDR_INVALID; + extern ram_addr_t ram_size; + target_ulong phys = loc_t == LocMem ? panda_virt_to_phys(cpu, buf) : 0; - if (phys == (hwaddr)-1 || PandaPhysicalAddressToRamOffset(&RamOffset, phys, false) != MEMTX_OK) return; + if (phys == -1 || phys > ram_size) { + if (debug) { + printf("[pri_taint] Incorrect physical address -1 or beyond RAM size\n"); + } + return; + } if (debug) { - printf("Querying \"%s\": " TARGET_FMT_lu " bytes @ 0x" TARGET_FMT_lx " phys 0x" TARGET_FMT_plx ", strnlen=%d", astnodename, buf_len, buf, phys, is_strnlen); + //printf("Querying \"%s\": " TARGET_FMT_lu " bytes @ 0x" TARGET_FMT_lx " phys 0x" TARGET_FMT_plx ", strnlen=%d", astnodename, buf_len, buf, phys, is_strnlen); print_membytes(cpu, buf, is_strnlen? 32 : buf_len); printf("\n"); } @@ -129,7 +151,7 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con uint8_t bytes[LAVA_TAINT_QUERY_MAX_LEN] = {0}; target_ulong len = std::min(buf_len, LAVA_TAINT_QUERY_MAX_LEN); if (is_strnlen) { - panda_physical_memory_read(phys, bytes, LAVA_TAINT_QUERY_MAX_LEN); + panda_physical_memory_rw(phys, bytes, LAVA_TAINT_QUERY_MAX_LEN, false); for (int i = 0; i < LAVA_TAINT_QUERY_MAX_LEN; i++) { if (bytes[i] == '\0') { len = i; @@ -151,12 +173,23 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con // is there *any* taint on this extent uint32_t num_tainted = 0; for (uint32_t i = 0; i < len; i++) { - Addr a = loc_t == LocMem ? make_maddr(RamOffset + i) : make_greg(buf, i); /* HACK: presumes for the same physical page ram_addr_t(x + i) == ram_addr_t(x) + i */ - if (taint2_query(a)) num_tainted++; + Addr a = loc_t == LocMem ? make_maddr(phys + i) : make_greg(buf, i); + if (taint2_query(a)) { + num_tainted++; + } } // If nothing's tainted and we aren't doing chaff bugs, return. - if (!chaff_bugs && num_tainted == 0) return; + if (num_tainted == 0) { + if (debug) { + printf("[pri_taint] Nothing is tainted!\n"); + } + return; + } + + if (debug) { + printf("[pri_taint] Starting to write the Panda Log now in pri_taint\n"); + } // 1. write the pandalog entry that tells us something was tainted on this extent Panda__TaintQueryPri tqh = PANDA__TAINT_QUERY_PRI__INIT; @@ -180,7 +213,8 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con // 2. iterate over the bytes in the extent and pandalog detailed info about taint std::vector tq; for (uint32_t offset = 0; offset < len; offset++) { - Addr a = loc_t == LocMem ? make_maddr(RamOffset + offset) : make_greg(buf, offset); /* HACK: presumes for the same physical page ram_addr_t(x + i) == ram_addr_t(x) + i */ + uint32_t pa_indexed = phys + offset; + Addr a = loc_t == LocMem ? make_maddr(pa_indexed) : make_greg(buf, offset); if (taint2_query(a)) { if (loc_t == LocMem) { dprintf("\"%s\" @ 0x" TARGET_FMT_lx " is tainted\n", astnodename, buf + offset); @@ -198,7 +232,7 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con // 4. write out callstack info tqh.call_stack = pandalog_callstack_create(); - dprintf("num taint queries: %lu\n", tq.size()); + dprintf("[pri_taint] num taint queries: %lu\n", tq.size()); tqh.n_taint_query = tq.size(); tqh.taint_query = tq.data(); Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; @@ -207,7 +241,9 @@ void lava_taint_query(target_ulong buf, LocType loc_t, target_ulong buf_len, con pandalog_callstack_free(tqh.call_stack); free(tqh.src_info); - for (Panda__TaintQuery *ptq : tq) pandalog_taint_query_free(ptq); + for (Panda__TaintQuery *ptq : tq) { + pandalog_taint_query_free(ptq); + } } #endif struct args { @@ -215,18 +251,25 @@ struct args { const char *src_filename; uint64_t src_linenum; unsigned ast_loc_id; + uint64_t funcaddr; }; #if defined(TARGET_I386) -void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc, void *in_args){ - if (!taint2_enabled()) +void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc, void *in_args) { + if (!taint2_enabled()) { + if (debug) { + printf("[pri_taint] Taint2 was not enabled (pfun called)\n"); + } return; + } // lava autogenerated variables start with this string const char *blacklist[] = {"kbcieiubweuhc", "phs", "phs_addr"} ; size_t i; for (i = 0; i < sizeof(blacklist)/sizeof(blacklist[0]); i++) { if (strncmp(var_nm, blacklist[i], strlen(blacklist[i])) == 0) { - //printf(" Found a lava generated string: %s", var_nm); + if (debug) { + printf("[pri_taint] Found a lava generated string: %s", var_nm); + } return; } } @@ -239,18 +282,20 @@ void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc global_src_filename = args->src_filename; global_src_linenum = args->src_linenum; global_ast_loc_id = args->ast_loc_id; + global_funcaddr = args->funcaddr; //target_ulong guest_dword; //std::string ty_string = std::string(var_ty); //size_t num_derefs = std::count(ty_string.begin(), ty_string.end(), '*'); //size_t i; - switch (loc_t){ + switch (loc_t) { case LocReg: - dprintf("VAR REG: %s %s in Reg " TARGET_FMT_lu "\n", var_ty, var_nm, loc); + dprintf("[pri_taint] VAR REG: %s %s in Reg " TARGET_FMT_lu "\n", var_ty, var_nm, loc); dwarf2_type_iter(pfun_cpu, loc, loc_t, (DwarfVarType *) var_ty_void, lava_taint_query, 3); break; case LocMem: - if (debug) - printf("VAR MEM: %s %s @ 0x" TARGET_FMT_lx "\n", var_ty, var_nm, loc); + if (debug) { + printf("[pri_taint] VAR MEM: %s %s @ 0x" TARGET_FMT_lx "\n", var_ty, var_nm, loc); + } dwarf2_type_iter(pfun_cpu, loc, loc_t, (DwarfVarType *) var_ty_void, lava_taint_query, 3); break; case LocConst: @@ -263,9 +308,9 @@ void pfun(void *var_ty_void, const char *var_nm, LocType loc_t, target_ulong loc default: assert(1==0); } - free(si); + // free(si); } - +/* void on_line_change(CPUState *cpu, target_ulong pc, const char *file_Name, const char *funct_name, unsigned long long lno){ if (taint2_enabled()){ struct args args = {cpu, file_Name, lno, 0}; @@ -280,21 +325,40 @@ void on_fn_start(CPUState *cpu, target_ulong pc, const char *file_Name, const ch pri_funct_livevar_iter(cpu, pc, (liveVarCB) pfun, (void *)&args); } +// Trace logging in the level of source code +void hypercall_log_trace(unsigned ast_loc_id) { + Panda__LogEntry ple = PANDA__LOG_ENTRY__INIT; + Panda__SourceTraceId stid = PANDA__SOURCE_TRACE_ID__INIT; + stid.ast_loc_id = ast_loc_id; + ple.source_trace_id = &stid; + pandalog_write_entry(&ple); +} +*/ #ifdef TARGET_I386 // Support all features of label and query program -bool i386_hypercall_callback(CPUState *cpu){ +bool i386_hypercall_callback(CPUState *cpu) { + if (debug) { + printf("[pri_taint] Calling i386 hypercall callback!\n"); + } bool ret = false; CPUArchState *env = (CPUArchState*)cpu->env_ptr; - if (taint2_enabled() && pandalog) { + if (taint2_enabled()) { // LAVA Hypercall target_ulong addr = panda_virt_to_phys(cpu, env->regs[R_EAX]); if ((int)addr == -1) { - printf ("panda hypercall with ptr to invalid PandaHypercallStruct: vaddr=0x%x paddr=0x%x\n", + printf ("[pri_taint] panda hypercall with ptr to invalid PandaHypercallStruct: vaddr=0x%x paddr=0x%x\n", (uint32_t) env->regs[R_EAX], (uint32_t) addr); } - else { + else if (pandalog) { + if (debug) { + printf("[pri_taint] Hypercall is OK and Panda Log is set\n"); + } PandaHypercallStruct phs; panda_virtual_memory_rw(cpu, env->regs[R_EAX], (uint8_t *) &phs, sizeof(phs), false); + + uint64_t funcaddr = 0; + panda_virtual_memory_rw(cpu, phs.info, (uint8_t*)&funcaddr, sizeof(target_ulong), false); + if (phs.magic == 0xabcd) { // if the phs action is a pri_query point, see // lava/include/pirate_mark_lava.h @@ -303,29 +367,49 @@ bool i386_hypercall_callback(CPUState *cpu){ SrcInfo info; int rc = pri_get_pc_source_info(cpu, pc, &info); if (!rc) { - struct args args = {cpu, info.filename, info.line_number, phs.src_filename}; - dprintf("panda hypercall: [%s], " + struct args args = {cpu, info.filename, info.line_number, phs.src_filename, funcaddr}; + dprintf("[pri_taint] panda hypercall: [%s], " "ln: %4ld, pc @ 0x" TARGET_FMT_lx "\n", info.filename, info.line_number,pc); pri_funct_livevar_iter(cpu, pc, (liveVarCB) pfun, (void *)&args); - //pri_all_livevar_iter(cpu, pc, (liveVarCB) pfun, (void *)&args); //lava_attack_point(phs); } + else { + if (debug) { + printf("[pri_taint] pri_get_pc_src_info has failed: %d != 0.\n", rc); + } + } ret = true; + // hypercall_log_trace(phs.src_filename); + } + else { + if (debug) { + printf("[pri_taint] Invalid action value in PHS struct: %d != 13.\n", phs.action); + } } } else { - printf ("Invalid magic value in PHS struct: %x != 0xabcd.\n", phs.magic); + printf("[pri_taint] Invalid magic value in PHS struct: %x != 0xabcd.\n", phs.magic); + } + } + else { + if (debug) { + printf("[pri_taint] No Panda Log even though hypercall seemed OK!\n"); } } } + else { + if (debug) { + printf("[pri_taint] taint2 is not enabled (hypercall)\n"); + } + } return ret; } #endif // TARGET_I386 -bool guest_hypercall_callback(CPUState *cpu){ +bool guest_hypercall_callback(CPUState *cpu) { #ifdef TARGET_I386 return i386_hypercall_callback(cpu); #endif @@ -354,33 +438,33 @@ bool init_plugin(void *self) { #if defined(TARGET_I386) panda_arg_list *args = panda_get_args("pri_taint"); - hypercall_taint = panda_parse_bool_opt(args, "hypercall", "Register tainting on a panda hypercall callback"); - linechange_taint = panda_parse_bool_opt(args, "linechange", "Register tainting on every line change in the source code (default)"); - chaff_bugs = panda_parse_bool_opt(args, "chaff", "Record untainted extents for chaff bugs."); - // default linechange_taint to true if there is no hypercall taint - if (!hypercall_taint) - linechange_taint = true; + debug = panda_parse_bool_opt(args, "debug", "enable debug output"); + panda_require("callstack_instr"); assert(init_callstack_instr_api()); panda_require("pri"); assert(init_pri_api()); panda_require("dwarf2"); assert(init_dwarf2_api()); - panda_require("taint2"); assert(init_taint2_api()); - if (hypercall_taint) { - panda_cb pcb; - pcb.guest_hypercall = guest_hypercall_callback; - panda_register_callback(self, PANDA_CB_GUEST_HYPERCALL, pcb); - } - if (linechange_taint){ - PPP_REG_CB("pri", on_before_line_change, on_line_change); + panda_cb pcb; + pcb.guest_hypercall = guest_hypercall_callback; + panda_register_callback(self, PANDA_CB_GUEST_HYPERCALL, pcb); + printf("[pri_taint] This plugin is activated!\n"); + + // If taint isn't already enabled, turn it on. + if (!taint2_enabled()) { + printf("[pri_taint] enabling taint now!\n"); + taint2_enable_taint(); } + return true; +#else + printf("[pri_taint] This plugin is only supported on x86\n"); + return false; //taint2_track_taint_state(); #endif - return true; } diff --git a/panda/python/core/.gitignore b/panda/python/core/.gitignore index 4eebd474f78..fa0ca2b31c2 100644 --- a/panda/python/core/.gitignore +++ b/panda/python/core/.gitignore @@ -8,3 +8,4 @@ data a __pycache__ *.egg-info +.eggs/ \ No newline at end of file