Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PyPANDA interface for targetcmp #1403

Draft
wants to merge 1 commit into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion panda/plugins/targetcmp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ At every function call, check the first two potential arguments to identify if i

The goal here is to dynamically identify values that are compared against a known string.

**WARNING**: this plugin may have a sizable impact on emulation speed. In limited testing, CLI interfaces were still usable, but were noticably slower than normal.

Arguments
---------

Expand All @@ -16,10 +18,47 @@ Arguments

Dependencies
------------
`callstack_instr`

* `callstack_instr`
* `callwitharg`

APIs and Callbacks
------------------

To use `targetcmp` programatically you can use the following API functions.

```
bool add_string(const char* arg)
```
Add `arg` to the list of strings that `targetcmp` is watching for. Returns true if your string was successfully added to the list. Also returns true if your string was already in the search list.


```
bool remove_strings(const char* arg)
```
Remove `arg` from the list of strings that `targetcmp` is watching for. Returns true if your string was successfully found and removed from the list.

```
void reset_strings()
```
Remove all strings that targetcmp is watching for.


`targetcmp` provides a single callback that can be used by other plugins to take actions when a string match is found:

Name: **on_tcm**
Signature:
```C
typedef void (* on_ssm_t)(CPUState *env, char* specified_value, char* compared_value);
```

Example
-------
When tab-completing a command, linux will search the path for a given prefix. If we set our search target to whoami, we'll see when the whoami string is compared against a prefix of `wh`.

```
$(python3 -m pandare.qcows x86_64) -panda callstack_isntr -panda callwitharg -panda targetcmp:target_strings=whoami,verbose=True

root@guest# echo wh[PRESS TAB TWICE]
[TargetCMP of whoami] wh
```
28 changes: 28 additions & 0 deletions panda/plugins/targetcmp/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pandare import Panda

panda = Panda(generic="arm")

panda.load_plugin("callstack_instr")
panda.load_plugin("callwitharg")
panda.load_plugin("targetcmp")#, {"target_strings": "magic"})
#panda.load_plugin("targetcmp", {"target_strings": "whoami", "verbose": True})

@panda.ppp("targetcmp", "on_tcm")
def on_tcm(cpu, known, unknown):
found = panda.ffi.string(known).decode() # The key we set
other = panda.ffi.string(unknown).decode() # What it was compared to
print(f"TCM detected comparison of {found} to {other}")
#panda.arch.dump_regs(cpu)

@panda.queue_blocking
def driver():
panda.revert_sync("root")

t = panda.ffi.new("char[]", b"whoami")
panda.plugins["targetcmp"].add_target(t)

print(panda.run_serial_cmd("find /usr/bin/ -name 'who*'"))
panda.end_analysis()

#panda.disable_tb_chaining()
panda.run()
168 changes: 118 additions & 50 deletions panda/plugins/targetcmp/targetcmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,81 @@
#include "callwitharg/callwitharg.h" // Unnecessary?

extern "C" {

#include "targetcmp_ppp.h"
#include "callwitharg/callwitharg_ext.h"

bool add_target(const char* target);
bool remove_target(const char* target);
void reset_targets(void);

bool init_plugin(void *);
void uninit_plugin(void *);
PPP_PROT_REG_CB(on_tcm);
}

size_t target_str_len;
char *target_str;
PPP_CB_BOILERPLATE(on_tcm)


std::ofstream outfile;
bool verbose = false;
//size_t target_str_len;

// To support multiple concurrent targets, we need to track the strings we're tracking
std::vector<std::string> targets;

// We track the last QUEUE_SIZE addresses we've checked to avoid rereading guest pointers
#define QUEUE_SIZE 100
std::atomic<size_t> queue_idx(0);
std::atomic<target_ulong> queue[QUEUE_SIZE];
// Now we'll define a function to add to the queue
void add_to_queue(target_ulong addr) {
size_t idx = queue_idx.fetch_add(1);
queue[idx % QUEUE_SIZE] = addr;
//#define QUEUE_SIZE 100
// We need a queue for each entry in targets
//std::vector<std::atomic<size_t>> queue_idx;
//std::vector<std::atomic<target_ulong*>> queue;

bool add_target(const char* target) {
// Check if we already have this target
for (size_t i = 0; i < targets.size(); i++) {
if (targets[i] == target) {
return false;
}
}
// If not, add it to our list
targets.push_back(target);
// And create a queue for it
//queue_idx.push_back(0);
//queue.push_back(new std::atomic<target_ulong>[QUEUE_SIZE]);

// And request that callwitharg track it
add_target_string((char*)target);

return true;
}
// And a function to check if an address is in the queue
bool in_queue(target_ulong addr) {
for (size_t i = 0; i < QUEUE_SIZE; i++) {
if (queue[i] == addr) return true;

bool remove_target(const char* target) {
// Check if we have this target
for (size_t i = 0; i < targets.size(); i++) {
if (targets[i] == target) {
// If so, remove it
targets.erase(targets.begin() + i);

// And delete its queue
//delete[] queue[i];
//queue.erase(queue.begin() + i);
//queue_idx.erase(queue_idx.begin() + i);

return true;
}
}

return false;
}

// C++ set for storing unique string matches
void reset_targets(void) {
targets.clear();
}

// C++ set for storing unique string matche that we've logged as key=value
std::set<std::string> matches;

void record_match(char *str) {
void record_match(CPUState* cpu, char *known_value, char *str) {
if (strlen(str) == 0) return;

for (int i = 0; i < strlen(str); i++) {
Expand All @@ -50,36 +95,51 @@ void record_match(char *str) {
}
}

std::string s(str);
// If it's a self-comparison, ignore
if (strcmp(known_value, str) == 0 && strlen(known_value) == strlen(str)) {
return;
}

// We want to create a key=value string to log
std::string s(known_value);
s.append("=");
s.append(str);

if (matches.find(s) == matches.end()) {
//printf("TargetCMP finds %s with length %u\n", s.c_str(), s.length());
outfile << s << std::endl;
// New match - we want to report this!

// Verbose: log to stdout
if (verbose) {
printf("[TargetCMP of %s] %s\n", known_value, str);
}

// Log file: write down
if (outfile.is_open()) {
outfile << s << std::endl;
}

// PPP output:
PPP_RUN_CB(on_tcm, cpu, known_value, str);


// Update matches
matches.insert(s);

}
}

void on_match(CPUState* cpu, target_ulong func_addr, target_ulong *args, char* value, uint matching_idx, uint args_read) {
// We expect 2 args, if matching_idx is 0, arg1 is our target pointer, otherwise arg0
assert(args_read >= 2);

//printf("Match in arg %d with arg1=" TARGET_FMT_lx " and arg2=" TARGET_FMT_lx "\n", matching_idx, args[0], args[1]);

target_ulong target_ptr = args[matching_idx == 0 ? 1 : 0]; // If we matched arg0, we want arg1 and vice versa

// If it's in the queue, we've already checked it - bail
if (in_queue(target_ptr)) {
return;
}
// Otherwise add it to the queue
add_to_queue(target_ptr);

size_t short_len = strlen(value);
size_t full_len = 4*short_len;
char* other_arg = (char*)malloc(full_len + 1);

// Try to read the target string from memory
if (panda_virtual_memory_read(cpu, target_ptr, (uint8_t*)other_arg, full_len) == 0) {
other_arg[target_str_len] = '\0'; // Ensure null termination
other_arg[full_len] = '\0'; // Ensure null termination
} else if (panda_virtual_memory_read(cpu, target_ptr, (uint8_t*)other_arg, short_len) == 0) {
// Recovered short string - move null terminator early
other_arg[short_len] = '\0'; // Ensure null termination
Expand All @@ -88,14 +148,20 @@ void on_match(CPUState* cpu, target_ulong func_addr, target_ulong *args, char* v
free(other_arg);
return;
}
record_match(other_arg);

record_match(cpu, value, other_arg);
free(other_arg);
}

// logfile default is cwd/targetcmp.txt
std::filesystem::path logfile = std::filesystem::current_path() / "targetcmp.txt";

bool init_plugin(void *self) {
#if !defined(TARGET_ARM) && !defined(TARGET_MIPS) && !defined(TARGET_X86_64)
printf("ERROR: Unsupported architecture for targetcmp\n");
return false;
#endif

if (!init_callwitharg_api()) {
printf("[targetcmp] Fatal error: unable to initialize callwitharg - is it loaded?\n");
return false;
Expand All @@ -104,38 +170,40 @@ bool init_plugin(void *self) {
std::unique_ptr<panda_arg_list, void(*)(panda_arg_list*)> args(
panda_get_args("targetcmp"), panda_free_args);

const char* logfile_arg = panda_parse_string_opt(args.get(), "output_file",
NULL, "Output file to record compared values into");
if (logfile_arg) logfile = std::string(logfile_arg);
// Optional arguments: target_strings, output_file, verbose
char *target_str = strdup(panda_parse_string_opt(args.get(), "target_strings",
"String(s) to match. Colon seperated", ""));

target_str = strdup(panda_parse_string_req(args.get(), "target_str", "String to match"));
target_str_len = strlen(target_str);

if (target_str_len <= 0) {
printf("targetcmp error: invalid target_str argument\n");
return false;
const char* logfile_arg = panda_parse_string_opt(args.get(), "output_file",
NULL, "Output file to record compared values into");
if (logfile_arg) {
// Open file for writing, delete anything there.
outfile.open(logfile.string(), std::ios_base::out | std::ios_base::trunc);
}

// On every function call, use our callback to check an argument is the target_str, if so store the other arg
#if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_X86_64)
// Create empty file - Just so we see that something's happening
// Open file for writing, delete anything there.
outfile.open(logfile.string(), std::ios_base::out | std::ios_base::trunc);
verbose = panda_parse_bool_opt(args.get(), "verbose", "enable verbose output on every match");

// Call callwitharg's add_target_string function
add_target_string(target_str);
// If we have a target_str, split it on commas and add each target
if (strlen(target_str) > 0) {
char* target = strtok(target_str, ":");
while (target != NULL) {
add_target(target);
target = strtok(NULL, ":");
}
}

// Register on_call_match with callwitharg's on_call_match_str PPP callback
PPP_REG_CB("callwitharg", on_call_match_str, on_match);

free(target_str);

return true;
#endif
printf("ERROR: Unsupported architecture for targetcmp\n");
return false;

}

void uninit_plugin(void *self) {
if (outfile.is_open()) {
outfile.close();
}
free((void*)target_str);
}
}
14 changes: 14 additions & 0 deletions panda/plugins/targetcmp/targetcmp_int_fns.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef __TARGETCMP_INT_FNS_H_
#define __TARGETCMP_INT_FNS_H_

// BEGIN_PYPANDA_NEEDS_THIS -- do not delete this comment bc pypanda
// api autogen needs it. And don't put any compiler directives
// between this and END_PYPANDA_NEEDS_THIS except includes of other
// files in this directory that contain subsections like this one.

bool add_target(const char* arg);
bool remove_target(const char* arg);
void reset_targets();

// END_PYPANDA_NEEDS_THIS -- do not delete this comment!
#endif
13 changes: 13 additions & 0 deletions panda/plugins/targetcmp/targetcmp_ppp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef __TARGETCMP_PPP_H_
#define __TARGETCMP_PPP_H_


// BEGIN_PYPANDA_NEEDS_THIS -- do not delete this comment bc pypanda
// api autogen needs it. And don't put any compiler directives
// between this and END_PYPANDA_NEEDS_THIS except includes of other
// files in this directory that contain subsections like this one.

PPP_CB_TYPEDEF(void, on_tcm, CPUState *env, char* known, char* match);

// END_PYPANDA_NEEDS_THIS -- do not delete this comment!
#endif
4 changes: 3 additions & 1 deletion panda/python/core/create_panda_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ def expand_ppp_def(line):
define_clean_header(ffi, include_dir + "/proc_start_linux_ppp.h")
define_clean_header(ffi, include_dir + "/forcedexec_ppp.h")
define_clean_header(ffi, include_dir + "/stringsearch_ppp.h")
define_clean_header(ffi, include_dir + "/targetcmp_ppp.h")
# END PPP headers

define_clean_header(ffi, include_dir + "/breakpoints.h")
Expand Down Expand Up @@ -380,8 +381,9 @@ def main(install=False,recompile=True):
# TODO: programtically copy anything that ends with _ppp.h
copy_ppp_header("%s/%s" % (PLUGINS_DIR+"/forcedexec", "forcedexec_ppp.h"))
copy_ppp_header("%s/%s" % (PLUGINS_DIR+"/stringsearch", "stringsearch_ppp.h"))
copy_ppp_header("%s/%s" % (PLUGINS_DIR+"/targetcmp", "targetcmp_ppp.h"))
create_pypanda_header("%s/%s" % (PLUGINS_DIR+"/hooks2", "hooks2.h"))

copy_ppp_header("%s/%s" % (PLUGINS_DIR+"/proc_start_linux", "proc_start_linux_ppp.h"))
create_pypanda_header("%s/%s" % (PLUGINS_DIR+"/proc_start_linux", "proc_start_linux.h"))
create_pypanda_header("%s/%s" % (PLUGINS_DIR+"/cosi", "cosi.h"))
Expand Down
Loading