Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support for vyper dispatcher in selector detection #542

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions crates/cfg/src/core/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub fn build_cfg(

let assembly = format!(
"{} {} {}",
encode_hex_reduced(U256::from(operation.last_instruction.instruction)),
encode_hex_reduced(U256::from(operation.last_instruction.pc)),
opcode_name,
if opcode_name.contains("PUSH") {
encode_hex_reduced(
Expand Down Expand Up @@ -61,8 +61,8 @@ pub fn build_cfg(
.first()
.ok_or_eyre("failed to get first operation")?
.last_instruction
.opcode ==
JUMPDEST,
.opcode
== JUMPDEST,
)?;
}

Expand Down
13 changes: 2 additions & 11 deletions crates/cfg/src/core/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
pub(crate) mod graph;

use alloy::primitives::Address;
use eyre::eyre;
use heimdall_common::{ether::compiler::detect_compiler, utils::strings::StringExt};
use heimdall_vm::core::vm::VM;
use heimdall_vm::core::vm::Vm;

use petgraph::{dot::Dot, Graph};
use std::time::{Duration, Instant};
Expand Down Expand Up @@ -65,15 +64,7 @@ pub async fn cfg(args: CfgArgs) -> Result<CfgResult, Error> {

// create a new EVM instance. we will use this for finding function selectors,
// performing symbolic execution, and more.
let mut evm = VM::new(
&contract_bytecode,
&[],
Address::default(),
Address::default(),
Address::default(),
0,
u128::MAX,
);
let mut evm = Vm::new_with_bytecode(&contract_bytecode);

info!("performing symbolic execution on '{}'", args.target.truncate(64));
let start_sym_exec_time = Instant::now();
Expand Down
148 changes: 96 additions & 52 deletions crates/common/src/ether/signatures.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
use std::path::PathBuf;
use std::{
path::PathBuf,
sync::{Arc, Mutex},
time::Instant,
};

use alloy_dyn_abi::{DynSolType, DynSolValue};
use alloy_json_abi::JsonAbi;
use async_trait::async_trait;
use hashbrown::HashMap;

use crate::{
ether::types::parse_function_parameters,
Expand All @@ -15,7 +20,7 @@ use crate::{
use eyre::{OptionExt, Result};
use heimdall_cache::{store_cache, with_cache};
use serde::{Deserialize, Serialize};
use tracing::{debug, trace};
use tracing::{debug, error, info, trace, warn};

use super::types::DynSolValueExt;

Expand Down Expand Up @@ -306,6 +311,56 @@ impl ResolveSelector for ResolvedFunction {
}
}

impl TryFrom<&ResolvedFunction> for TraceFactory {
// eyre
type Error = eyre::Report;

fn try_from(function: &ResolvedFunction) -> Result<Self, Self::Error> {
let mut trace = TraceFactory::default();
let decode_call = trace.add_call(
0,
line!(),
"heimdall".to_string(),
"decode".to_string(),
vec![],
"()".to_string(),
);
trace.br(decode_call);
trace.add_message(decode_call, line!(), vec![format!("signature: {}", function.signature)]);
trace.br(decode_call);

// build inputs
for (i, input) in function.decoded_inputs.as_ref().unwrap_or(&Vec::new()).iter().enumerate()
{
let mut decoded_inputs_as_message = display(vec![input.to_owned()], " ");
if decoded_inputs_as_message.is_empty() {
break;
}

if i == 0 {
decoded_inputs_as_message[0] = format!(
"input {}:{}{}",
i,
" ".repeat(4 - i.to_string().len()),
decoded_inputs_as_message[0].replacen(" ", "", 1)
)
} else {
decoded_inputs_as_message[0] = format!(
" {}:{}{}",
i,
" ".repeat(4 - i.to_string().len()),
decoded_inputs_as_message[0].replacen(" ", "", 1)
)
}

// add to trace and decoded string
trace.add_message(decode_call, 1, decoded_inputs_as_message);
}

Ok(trace)
}
}

/// Given the path to an ABI file, parses all [`ResolvedFunction`]s, [`ResolvedError`]s, and
/// [`ResolvedLog`]s from the ABI and saves them to the cache.
pub fn cache_signatures_from_abi(path: PathBuf) -> Result<()> {
Expand Down Expand Up @@ -356,6 +411,7 @@ pub fn cache_signatures_from_abi(path: PathBuf) -> Result<()> {
Ok(())
}

/// A heuristic function to score a function signature based on its spamminess.
pub fn score_signature(signature: &str, num_words: Option<usize>) -> u32 {
// the score starts at 1000
let mut score = 1000;
Expand Down Expand Up @@ -390,64 +446,52 @@ pub fn score_signature(signature: &str, num_words: Option<usize>) -> u32 {
score
}

/// trait impls
/// trait impls
/// trait impls

impl TryFrom<&ResolvedFunction> for TraceFactory {
// eyre
type Error = eyre::Report;

fn try_from(function: &ResolvedFunction) -> Result<Self, Self::Error> {
let mut trace = TraceFactory::default();
let decode_call = trace.add_call(
0,
line!(),
"heimdall".to_string(),
"decode".to_string(),
vec![],
"()".to_string(),
);
trace.br(decode_call);
trace.add_message(decode_call, line!(), vec![format!("signature: {}", function.signature)]);
trace.br(decode_call);

// build inputs
for (i, input) in function.decoded_inputs.as_ref().unwrap_or(&Vec::new()).iter().enumerate()
{
let mut decoded_inputs_as_message = display(vec![input.to_owned()], " ");
if decoded_inputs_as_message.is_empty() {
break;
}
/// Resolve a list of selectors to their function signatures.
pub async fn resolve_selectors<T>(selectors: Vec<String>) -> HashMap<String, Vec<T>>
where
T: ResolveSelector + Send + Clone + 'static, {
// short-circuit if there are no selectors
if selectors.is_empty() {
return HashMap::new();
}

if i == 0 {
decoded_inputs_as_message[0] = format!(
"input {}:{}{}",
i,
" ".repeat(4 - i.to_string().len()),
decoded_inputs_as_message[0].replacen(" ", "", 1)
)
} else {
decoded_inputs_as_message[0] = format!(
" {}:{}{}",
i,
" ".repeat(4 - i.to_string().len()),
decoded_inputs_as_message[0].replacen(" ", "", 1)
)
let resolved_functions: Arc<Mutex<HashMap<String, Vec<T>>>> =
Arc::new(Mutex::new(HashMap::new()));
let mut threads = Vec::new();
let start_time = Instant::now();
let selector_count = selectors.len();

for selector in selectors {
let function_clone = resolved_functions.clone();

// create a new thread for each selector
threads.push(tokio::task::spawn(async move {
if let Ok(Some(function)) = T::resolve(&selector).await {
let mut _resolved_functions =
function_clone.lock().expect("Could not obtain lock on function_clone.");
_resolved_functions.insert(selector, function);
}
}));
}

// add to trace and decoded string
trace.add_message(decode_call, 1, decoded_inputs_as_message);
// wait for all threads to finish
for thread in threads {
if let Err(e) = thread.await {
// Handle error
error!("failed to resolve selector: {:?}", e);
}
}

Ok(trace)
let signatures =
resolved_functions.lock().expect("failed to obtain lock on resolved_functions.").clone();
if signatures.is_empty() {
warn!("failed to resolve any signatures from {} selectors", selector_count);
}
info!("resolved {} signatures from {} selectors", signatures.len(), selector_count);
debug!("signature resolution took {:?}", start_time.elapsed());
signatures
}

/// tests
/// tests
/// tests

#[cfg(test)]
mod tests {
use heimdall_cache::delete_cache;
Expand Down
1 change: 0 additions & 1 deletion crates/decompile/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,4 @@ alloy = { version = "0.3.3", features = [
hashbrown = "0.14.5"
tokio = { version = "1", features = ["full"] }

heimdall-disassembler.workspace = true
heimdall-vm.workspace = true
33 changes: 5 additions & 28 deletions crates/decompile/src/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ pub(crate) mod out;
pub(crate) mod postprocess;
pub(crate) mod resolve;

use alloy::primitives::Address;
use alloy_dyn_abi::{DynSolType, DynSolValue};
use alloy_json_abi::JsonAbi;
use eyre::eyre;
Expand All @@ -12,18 +11,14 @@ use heimdall_common::{
ether::{
compiler::detect_compiler,
signatures::{
cache_signatures_from_abi, score_signature, ResolvedError, ResolvedFunction,
ResolvedLog,
cache_signatures_from_abi, resolve_selectors, score_signature, ResolvedError,
ResolvedFunction, ResolvedLog,
},
types::to_type,
},
utils::strings::{decode_hex, encode_hex, encode_hex_reduced, StringExt},
};
use heimdall_disassembler::{disassemble, DisassemblerArgsBuilder};
use heimdall_vm::{
core::vm::VM,
ext::selectors::{find_function_selectors, resolve_selectors},
};
use heimdall_vm::{core::vm::Vm, ext::selectors::find_function_selectors};
use std::time::{Duration, Instant};

use crate::{
Expand Down Expand Up @@ -92,29 +87,11 @@ pub async fn decompile(args: DecompilerArgs) -> Result<DecompileResult, Error> {

// create a new EVM instance. we will use this for finding function selectors,
// performing symbolic execution, and more.
let mut evm = VM::new(
&contract_bytecode,
&[],
Address::default(),
Address::default(),
Address::default(),
0,
u128::MAX,
);

// disassemble the contract's bytecode
let assembly = disassemble(
DisassemblerArgsBuilder::new()
.target(encode_hex(&contract_bytecode))
.build()
.expect("impossible case: failed to build disassembly arguments"),
)
.await
.map_err(|e| Error::Eyre(eyre!("disassembling contract bytecode failed: {}", e)))?;
let mut evm = Vm::new_with_bytecode(&contract_bytecode);

// find all the function selectors in the bytecode
let start_selectors_time = Instant::now();
let selectors = find_function_selectors(&evm, &assembly);
let selectors = find_function_selectors(&contract_bytecode);
debug!("finding function selectors took {:?}", start_selectors_time.elapsed());

// resolve selectors (if enabled)
Expand Down
2 changes: 0 additions & 2 deletions crates/decompile/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
pub enum Error {
#[error("Fetch error: {0}")]
FetchError(String),
#[error("Disassembly error: {0}")]
DisassemblyError(#[from] heimdall_disassembler::Error),
#[error("Internal error: {0}")]
Eyre(#[from] eyre::Report),
}
12 changes: 6 additions & 6 deletions crates/decompile/src/utils/heuristics/arguments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ pub fn argument_heuristic<'a>(
// calculate the argument index, with the 4byte signature padding removed
// for example, CALLDATALOAD(4) -> (4-4)/32 = 0
// CALLDATALOAD(36) -> (36-4)/32 = 1
let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) /
U256::from(32))
let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4))
/ U256::from(32))
.try_into()
.unwrap_or(usize::MAX);

Expand Down Expand Up @@ -77,7 +77,7 @@ pub fn argument_heuristic<'a>(
{
debug!(
"instruction {} ({}) indicates argument {} is masked to {} bytes",
state.last_instruction.instruction,
state.last_instruction.pc,
opcode_name(state.last_instruction.opcode),
arg_index,
mask_size_bytes
Expand Down Expand Up @@ -227,7 +227,7 @@ pub fn argument_heuristic<'a>(
{
debug!(
"instruction {} ({}) indicates argument {} may be a numeric type",
state.last_instruction.instruction,
state.last_instruction.pc,
opcode_name(state.last_instruction.opcode),
arg_index
);
Expand All @@ -251,7 +251,7 @@ pub fn argument_heuristic<'a>(
{
debug!(
"instruction {} ({}) indicates argument {} may be a bytes type",
state.last_instruction.instruction,
state.last_instruction.pc,
opcode_name(state.last_instruction.opcode),
arg_index
);
Expand All @@ -278,7 +278,7 @@ pub fn argument_heuristic<'a>(
{
debug!(
"instruction {} ({}) indicates argument {} may be a boolean",
state.last_instruction.instruction,
state.last_instruction.pc,
opcode_name(state.last_instruction.opcode),
arg_index
);
Expand Down
4 changes: 2 additions & 2 deletions crates/decompile/src/utils/heuristics/extcall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub fn extcall_heuristic<'a>(
if gas_solidified.contains("0x08fc") {
trace!(
"instruction {} ({}) with 2300 gas indicates a value transfer",
instruction.instruction,
instruction.pc,
opcode_name(instruction.opcode)
);
function.logic.push(format!(
Expand All @@ -51,7 +51,7 @@ pub fn extcall_heuristic<'a>(
if extcalldata.is_empty() {
trace!(
"instruction {} ({}) with no calldata indicates a value transfer",
instruction.instruction,
instruction.pc,
opcode_name(instruction.opcode)
);
function.logic.push(format!(
Expand Down
Loading
Loading