From 56ccb521cf53768f74732df841a170013b3f6506 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Fri, 11 Oct 2024 14:41:30 -0500 Subject: [PATCH 01/14] chore(decompile): separate extcall logic from solidity heuristic --- crates/cache/src/lib.rs | 17 +- crates/cfg/src/core/graph.rs | 4 +- crates/common/src/ether/signatures.rs | 10 +- crates/common/src/ether/tokenize.rs | 28 +-- crates/common/src/utils/strings.rs | 4 +- crates/common/src/utils/sync.rs | 3 +- crates/common/src/utils/version.rs | 40 ++-- crates/decode/src/utils/abi.rs | 20 +- crates/decode/src/utils/constructor.rs | 12 +- crates/decompile/src/core/analyze.rs | 13 +- crates/decompile/src/core/out/source.rs | 6 +- crates/decompile/src/core/postprocess.rs | 4 +- .../src/utils/heuristics/arguments.rs | 4 +- .../decompile/src/utils/heuristics/extcall.rs | 199 ++++++++++++++++++ crates/decompile/src/utils/heuristics/mod.rs | 2 + .../src/utils/heuristics/modifiers.rs | 6 +- .../src/utils/heuristics/solidity.rs | 80 +------ crates/decompile/src/utils/heuristics/yul.rs | 4 +- .../src/utils/postprocessors/memory.rs | 10 +- .../src/utils/postprocessors/storage.rs | 4 +- .../src/utils/postprocessors/transient.rs | 4 +- .../src/utils/postprocessors/variable.rs | 4 +- crates/vm/src/core/types.rs | 4 +- crates/vm/src/core/vm.rs | 94 ++++----- crates/vm/src/ext/exec/util.rs | 12 +- crates/vm/src/ext/lexers/solidity.rs | 4 +- crates/vm/src/ext/range_map.rs | 12 +- crates/vm/src/ext/selectors.rs | 11 +- 28 files changed, 370 insertions(+), 245 deletions(-) create mode 100644 crates/decompile/src/utils/heuristics/extcall.rs diff --git a/crates/cache/src/lib.rs b/crates/cache/src/lib.rs index 3eca410d..a564499c 100644 --- a/crates/cache/src/lib.rs +++ b/crates/cache/src/lib.rs @@ -215,8 +215,7 @@ pub fn delete_cache(key: &str) -> Result<(), Error> { #[allow(deprecated)] pub fn read_cache(key: &str) -> Result, Error> where - T: 'static + DeserializeOwned, -{ + T: 'static + DeserializeOwned, { let home = home_dir().ok_or(Error::Generic( "failed to get home directory. does your os support `std::env::home_dir()`?".to_string(), ))?; @@ -239,8 +238,8 @@ where .map_err(|e| Error::Generic(format!("failed to deserialize cache object: {:?}", e)))?; // check if the cache has expired, if so, delete it and return None - if cache.expiry - < std::time::SystemTime::now() + if cache.expiry < + std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map_err(|e| Error::Generic(format!("failed to get current time: {:?}", e)))? .as_secs() @@ -267,8 +266,7 @@ where #[allow(deprecated)] pub fn store_cache(key: &str, value: T, expiry: Option) -> Result<(), Error> where - T: Serialize, -{ + T: Serialize, { let home = home_dir().ok_or(Error::Generic( "failed to get home directory. does your os support `std::env::home_dir()`?".to_string(), ))?; @@ -280,8 +278,8 @@ where std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map_err(|e| Error::Generic(format!("failed to get current time: {:?}", e)))? - .as_secs() - + 60 * 60 * 24 * 90, + .as_secs() + + 60 * 60 * 24 * 90, ); let cache = Cache { value, expiry }; @@ -306,8 +304,7 @@ pub async fn with_cache(key: &str, func: F) -> eyre::Result where T: 'static + Serialize + DeserializeOwned + Send + Sync, F: FnOnce() -> Fut + Send, - Fut: std::future::Future> + Send, -{ + Fut: std::future::Future> + Send, { // Try to read from cache match read_cache::(key) { Ok(Some(cached_value)) => { diff --git a/crates/cfg/src/core/graph.rs b/crates/cfg/src/core/graph.rs index 165e8814..702d31a4 100644 --- a/crates/cfg/src/core/graph.rs +++ b/crates/cfg/src/core/graph.rs @@ -61,8 +61,8 @@ pub fn build_cfg( .first() .ok_or_eyre("failed to get first operation")? .last_instruction - .opcode - == JUMPDEST, + .opcode == + JUMPDEST, )?; } diff --git a/crates/common/src/ether/signatures.rs b/crates/common/src/ether/signatures.rs index 69823bd1..f93e21d2 100644 --- a/crates/common/src/ether/signatures.rs +++ b/crates/common/src/ether/signatures.rs @@ -286,8 +286,8 @@ pub fn score_signature(signature: &str, num_words: Option) -> u32 { // prioritize signatures with less numbers score -= (signature.split('(').next().unwrap_or("").matches(|c: char| c.is_numeric()).count() - as u32) - * 3; + as u32) * + 3; // prioritize signatures with parameters let num_params = signature.matches(',').count() + 1; @@ -295,9 +295,9 @@ pub fn score_signature(signature: &str, num_words: Option) -> u32 { // count the number of parameters in the signature, if enabled if let Some(num_words) = num_words { - let num_dyn_params = signature.matches("bytes").count() - + signature.matches("string").count() - + signature.matches('[').count(); + let num_dyn_params = signature.matches("bytes").count() + + signature.matches("string").count() + + signature.matches('[').count(); let num_static_params = num_params - num_dyn_params; // reduce the score if the signature has less static parameters than there are words in the diff --git a/crates/common/src/ether/tokenize.rs b/crates/common/src/ether/tokenize.rs index 6a81820b..e2d3f37c 100644 --- a/crates/common/src/ether/tokenize.rs +++ b/crates/common/src/ether/tokenize.rs @@ -130,17 +130,17 @@ pub fn tokenize(s: &str) -> Token { let mut op = ch.to_string(); iter.next(); if let Some(&next_ch) = iter.peek() { - if (ch == '=' && (next_ch == '=' || next_ch == '>')) - || (ch == '&' && next_ch == '&') - || (ch == '|' && next_ch == '|') - || (ch == '<' && next_ch == '=') - || (ch == '>' && next_ch == '=') - || (ch == '!' && next_ch == '=') - || (ch == '+' && next_ch == '+') - || (ch == '-' && next_ch == '-') - || (ch == '*' && next_ch == '*') - || (ch == '>' && next_ch == '>') - || (ch == '<' && next_ch == '<') + if (ch == '=' && (next_ch == '=' || next_ch == '>')) || + (ch == '&' && next_ch == '&') || + (ch == '|' && next_ch == '|') || + (ch == '<' && next_ch == '=') || + (ch == '>' && next_ch == '=') || + (ch == '!' && next_ch == '=') || + (ch == '+' && next_ch == '+') || + (ch == '-' && next_ch == '-') || + (ch == '*' && next_ch == '*') || + (ch == '>' && next_ch == '>') || + (ch == '<' && next_ch == '<') { op.push(next_ch); iter.next(); @@ -188,9 +188,9 @@ fn parse_literal(iter: &mut std::iter::Peekable) -> String { } // literal validation - if literal.starts_with("0x") - && literal.len() > 2 - && literal[2..].chars().all(|c| c.is_ascii_hexdigit()) + if literal.starts_with("0x") && + literal.len() > 2 && + literal[2..].chars().all(|c| c.is_ascii_hexdigit()) { return literal; } diff --git a/crates/common/src/utils/strings.rs b/crates/common/src/utils/strings.rs index 4e663775..d9327a9e 100644 --- a/crates/common/src/utils/strings.rs +++ b/crates/common/src/utils/strings.rs @@ -330,8 +330,8 @@ pub fn tokenize(s: &str) -> Vec { // Check if current character and last character form a compound operator (like "==", // ">=", "&&", "||") if let Some(last) = last_char { - if compound_operator_first_chars.contains(&last) - && (c == '=' || c == '&' || c == '|') + if compound_operator_first_chars.contains(&last) && + (c == '=' || c == '&' || c == '|') { // Remove the last character as a single token tokens.pop(); diff --git a/crates/common/src/utils/sync.rs b/crates/common/src/utils/sync.rs index 151f8a36..1e9bdb94 100644 --- a/crates/common/src/utils/sync.rs +++ b/crates/common/src/utils/sync.rs @@ -1,7 +1,6 @@ /// Take in a non-async function and await it. This functions should be blocking. pub fn blocking_await(f: F) -> T where - F: FnOnce() -> T, -{ + F: FnOnce() -> T, { tokio::task::block_in_place(f) } diff --git a/crates/common/src/utils/version.rs b/crates/common/src/utils/version.rs index a72e42fd..54b6d0a0 100644 --- a/crates/common/src/utils/version.rs +++ b/crates/common/src/utils/version.rs @@ -93,46 +93,46 @@ impl Display for Version { impl Version { /// greater than pub fn gt(&self, other: &Version) -> bool { - self.major > other.major - || (self.major == other.major && self.minor > other.minor) - || (self.major == other.major && self.minor == other.minor && self.patch > other.patch) + self.major > other.major || + (self.major == other.major && self.minor > other.minor) || + (self.major == other.major && self.minor == other.minor && self.patch > other.patch) } /// greater than or equal to pub fn gte(&self, other: &Version) -> bool { - self.major > other.major - || (self.major == other.major && self.minor > other.minor) - || (self.major == other.major && self.minor == other.minor && self.patch >= other.patch) + self.major > other.major || + (self.major == other.major && self.minor > other.minor) || + (self.major == other.major && self.minor == other.minor && self.patch >= other.patch) } /// less than pub fn lt(&self, other: &Version) -> bool { - self.major < other.major - || (self.major == other.major && self.minor < other.minor) - || (self.major == other.major && self.minor == other.minor && self.patch < other.patch) + self.major < other.major || + (self.major == other.major && self.minor < other.minor) || + (self.major == other.major && self.minor == other.minor && self.patch < other.patch) } /// less than or equal to pub fn lte(&self, other: &Version) -> bool { - self.major < other.major - || (self.major == other.major && self.minor < other.minor) - || (self.major == other.major && self.minor == other.minor && self.patch <= other.patch) + self.major < other.major || + (self.major == other.major && self.minor < other.minor) || + (self.major == other.major && self.minor == other.minor && self.patch <= other.patch) } #[allow(clippy::should_implement_trait)] pub fn eq(&self, other: &Version) -> bool { - self.major == other.major - && self.minor == other.minor - && self.patch == other.patch - && self.channel == other.channel + self.major == other.major && + self.minor == other.minor && + self.patch == other.patch && + self.channel == other.channel } /// not equal to pub fn ne(&self, other: &Version) -> bool { - self.major != other.major - || self.minor != other.minor - || self.patch != other.patch - || self.channel != other.channel + self.major != other.major || + self.minor != other.minor || + self.patch != other.patch || + self.channel != other.channel } /// if the version is a nightly version diff --git a/crates/decode/src/utils/abi.rs b/crates/decode/src/utils/abi.rs index c1f44cc7..b8321a13 100644 --- a/crates/decode/src/utils/abi.rs +++ b/crates/decode/src/utils/abi.rs @@ -185,8 +185,8 @@ fn try_decode_dynamic_parameter_bytes( // (5) we've covered all words from `data_start_word_offset` to `data_end_word_offset`, // so add them to `word_coverages`. coverages.extend( - (word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)) + (word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)) .collect::>(), ); @@ -211,8 +211,8 @@ fn try_decode_dynamic_parameter_array( // (1) join all words from `data_start_word_offset` to `data_end_word_offset`. This is where // the encoded data may be stored. - let data_words = &calldata_words[data_start_word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)]; + let data_words = &calldata_words[data_start_word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)]; trace!("potential array items: {:#?}", data_words); // (2) first, check if this is a `string` type, since some string encodings may appear to be @@ -234,8 +234,8 @@ fn try_decode_dynamic_parameter_array( // `word_coverages` with the indices of all words from `data_start_word_offset` to // `data_end_word_offset`, since we've now covered all words in the ABI-encoded type. coverages.extend( - (word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)) + (word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)) .collect::>(), ); @@ -296,8 +296,8 @@ fn try_decode_dynamic_parameter_string( trace!( "with data: {:#?}", encode_hex( - &calldata_words[data_start_word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)] + &calldata_words[data_start_word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)] .concat() ) ); @@ -321,8 +321,8 @@ fn try_decode_dynamic_parameter_string( // (5) we've covered all words from `data_start_word_offset` to `data_end_word_offset`, // so add them to `word_coverages`. coverages.extend( - (word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)) + (word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)) .collect::>(), ); diff --git a/crates/decode/src/utils/constructor.rs b/crates/decode/src/utils/constructor.rs index 881968b3..836ba2f9 100644 --- a/crates/decode/src/utils/constructor.rs +++ b/crates/decode/src/utils/constructor.rs @@ -42,16 +42,16 @@ pub fn parse_deployment_bytecode(input: Vec) -> Result { let constructor_offset = 0; let metadata_length = u32::from_str_radix( - &input[(contract_offset + contract_length - 4) as usize - ..(contract_offset + contract_length) as usize], + &input[(contract_offset + contract_length - 4) as usize.. + (contract_offset + contract_length) as usize], 16, - )? * 2 - + 4; + )? * 2 + + 4; let constructor = &input[constructor_offset as usize..contract_offset as usize]; let contract = &input[contract_offset as usize..(contract_offset + contract_length) as usize]; - let metadata = &input[(contract_offset + contract_length - metadata_length) as usize - ..(contract_offset + contract_length) as usize]; + let metadata = &input[(contract_offset + contract_length - metadata_length) as usize.. + (contract_offset + contract_length) as usize]; let arguments = &input[(contract_offset + contract_length) as usize..]; Ok(Constructor { diff --git a/crates/decompile/src/core/analyze.rs b/crates/decompile/src/core/analyze.rs index 9e70150b..7bc62def 100644 --- a/crates/decompile/src/core/analyze.rs +++ b/crates/decompile/src/core/analyze.rs @@ -6,8 +6,8 @@ use tracing::debug; use crate::{ interfaces::AnalyzedFunction, utils::heuristics::{ - argument_heuristic, event_heuristic, modifier_heuristic, solidity_heuristic, yul_heuristic, - Heuristic, + argument_heuristic, event_heuristic, extcall_heuristic, modifier_heuristic, + solidity_heuristic, yul_heuristic, Heuristic, }, Error, }; @@ -85,6 +85,7 @@ impl Analyzer { self.heuristics.push(Heuristic::new(solidity_heuristic)); self.heuristics.push(Heuristic::new(argument_heuristic)); self.heuristics.push(Heuristic::new(modifier_heuristic)); + self.heuristics.push(Heuristic::new(extcall_heuristic)); } AnalyzerType::Yul => { self.heuristics.push(Heuristic::new(event_heuristic)); @@ -156,8 +157,8 @@ impl Analyzer { } // check if the ending brackets are needed - if analyzer_state.jumped_conditional.is_some() - && analyzer_state.conditional_stack.contains( + if analyzer_state.jumped_conditional.is_some() && + analyzer_state.conditional_stack.contains( analyzer_state .jumped_conditional .as_ref() @@ -166,8 +167,8 @@ impl Analyzer { { // remove the conditional for (i, conditional) in analyzer_state.conditional_stack.iter().enumerate() { - if conditional - == analyzer_state.jumped_conditional.as_ref().expect( + if conditional == + analyzer_state.jumped_conditional.as_ref().expect( "impossible case: should have short-circuited in previous conditional", ) { diff --git a/crates/decompile/src/core/out/source.rs b/crates/decompile/src/core/out/source.rs index 5f81a0a4..cc915795 100644 --- a/crates/decompile/src/core/out/source.rs +++ b/crates/decompile/src/core/out/source.rs @@ -67,9 +67,9 @@ pub fn build_source( functions .iter() .filter(|f| { - !f.fallback - && (analyzer_type == AnalyzerType::Yul - || (f.maybe_getter_for.is_none() && !f.is_constant())) + !f.fallback && + (analyzer_type == AnalyzerType::Yul || + (f.maybe_getter_for.is_none() && !f.is_constant())) }) .for_each(|f| { let mut function_source = Vec::new(); diff --git a/crates/decompile/src/core/postprocess.rs b/crates/decompile/src/core/postprocess.rs index fff35e49..6219dd26 100644 --- a/crates/decompile/src/core/postprocess.rs +++ b/crates/decompile/src/core/postprocess.rs @@ -130,8 +130,8 @@ impl PostprocessOrchestrator { // Note: this can't be done with a postprocessor because it needs all lines if !function.payable && (function.pure || function.view) && function.arguments.is_empty() { // check for RLP encoding. very naive check, but it works for now - if function.logic.iter().any(|line| line.contains("0x0100 *")) - && function.logic.iter().any(|line| line.contains("0x01) &")) + if function.logic.iter().any(|line| line.contains("0x0100 *")) && + function.logic.iter().any(|line| line.contains("0x01) &")) { // find any storage accesses let joined = function.logic.join(" "); diff --git a/crates/decompile/src/utils/heuristics/arguments.rs b/crates/decompile/src/utils/heuristics/arguments.rs index fce28e2c..4f14fe25 100644 --- a/crates/decompile/src/utils/heuristics/arguments.rs +++ b/crates/decompile/src/utils/heuristics/arguments.rs @@ -28,8 +28,8 @@ pub fn argument_heuristic( // calculate the argument index, with the 4byte signature padding removed // for example, CALLDATALOAD(4) -> (4-4)/32 = 0 // CALLDATALOAD(36) -> (36-4)/32 = 1 - let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) - / U256::from(32)) + let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) / + U256::from(32)) .try_into() .unwrap_or(usize::MAX); diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs new file mode 100644 index 00000000..e245989d --- /dev/null +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -0,0 +1,199 @@ +use alloy::primitives::U256; +use alloy_dyn_abi::{DynSolType, DynSolValue}; +use heimdall_common::utils::strings::encode_hex_reduced; +use heimdall_vm::core::{opcodes::opcode_name, vm::State}; + +use crate::{ + core::analyze::AnalyzerState, interfaces::AnalyzedFunction, + utils::precompile::decode_precompile, Error, +}; + +pub fn extcall_heuristic( + function: &mut AnalyzedFunction, + state: &State, + analyzer_state: &mut AnalyzerState, +) -> Result<(), Error> { + let instruction = &state.last_instruction; + + match instruction.opcode { + // CALL / CALLCODE + 0xf1 | 0xf2 => { + let gas = format!("gas: {}", instruction.input_operations[0].solidify()); + let address = instruction.input_operations[1].solidify(); + let value = format!("value: {}", instruction.input_operations[2].solidify()); + let calldata = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); + + // build the modifier w/ gas and value + let modifier = format!("{{ {}, {} }}", gas, value); + + // check if the external call is a precompiled contract + match decode_precompile( + instruction.inputs[1], + &calldata, + &instruction.input_operations[5], + ) { + (true, precompile_logic) => { + function.logic.push(precompile_logic); + } + _ => { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", + address, + modifier, + calldata + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )); + } + } + } + + // STATICCALL / DELEGATECALL + 0xfa | 0xf4 => { + let gas = format!("gas: {}", instruction.input_operations[0].solidify()); + let address = instruction.input_operations[1].solidify(); + let calldata = function.get_memory_range(instruction.inputs[2], instruction.inputs[3]); + + // build the modifier w/ gas + let modifier = format!("{{ {} }}", gas); + + // check if the external call is a precompiled contract + match decode_precompile( + instruction.inputs[1], + &calldata, + &instruction.input_operations[4], + ) { + (true, precompile_logic) => { + function.logic.push(precompile_logic); + } + _ => { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", + address, + opcode_name(instruction.opcode).to_lowercase(), + modifier, + calldata + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )); + } + } + } + + // REVERT + 0xfd => { + // Safely convert U256 to usize + let offset: usize = instruction.inputs[0].try_into().unwrap_or(0); + let size: usize = instruction.inputs[1].try_into().unwrap_or(0); + let revert_data = state.memory.read(offset, size); + + // (1) if revert_data starts with 0x08c379a0, the folling is an error string + // abiencoded (2) if revert_data starts with 0x4e487b71, the + // following is a compiler panic (3) if revert_data starts with any + // other 4byte selector, it is a custom error and should + // be resolved and added to the generated ABI + // (4) if revert_data is empty, it is an empty revert. Ex: + // - if (true != false) { revert() }; + // - require(true != false) + let revert_logic; + + // handle case with error string abiencoded + if revert_data.starts_with(&[0x08, 0xc3, 0x79, 0xa0]) { + let revert_string = match revert_data.get(4..) { + Some(hex_data) => match DynSolType::String.abi_decode(hex_data) { + Ok(revert) => match revert { + DynSolValue::String(revert) => revert, + _ => "decoding error".to_string(), + }, + Err(_) => "decoding error".to_string(), + }, + None => "decoding error".to_string(), + }; + revert_logic = match analyzer_state.jumped_conditional.clone() { + Some(condition) => { + analyzer_state.jumped_conditional = None; + format!("require({condition}, \"{revert_string}\");") + } + None => { + // loop backwards through logic to find the last IF statement + for i in (0..function.logic.len()).rev() { + if function.logic[i].starts_with("if") { + let conditional = match analyzer_state.conditional_stack.pop() { + Some(condition) => condition, + None => break, + }; + + function.logic[i] = + format!("require({conditional}, \"{revert_string}\");"); + } + } + return Ok(()); + } + } + } + // handle case with custom error OR empty revert + else if !revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { + let custom_error_placeholder = match revert_data.get(0..4) { + Some(selector) => { + function.errors.insert(U256::from_be_slice(selector)); + format!( + "CustomError_{}()", + encode_hex_reduced(U256::from_be_slice(selector)).replacen("0x", "", 1) + ) + } + None => "()".to_string(), + }; + + revert_logic = match analyzer_state.jumped_conditional.clone() { + Some(condition) => { + analyzer_state.jumped_conditional = None; + if custom_error_placeholder == *"()" { + format!("require({condition});",) + } else { + format!("require({condition}, {custom_error_placeholder});") + } + } + None => { + // loop backwards through logic to find the last IF statement + for i in (0..function.logic.len()).rev() { + if function.logic[i].starts_with("if") { + let conditional = match analyzer_state.conditional_stack.pop() { + Some(condition) => condition, + None => break, + }; + + if custom_error_placeholder == *"()" { + function.logic[i] = format!("require({conditional});",); + } else { + function.logic[i] = format!( + "require({conditional}, {custom_error_placeholder});" + ); + } + } + } + return Ok(()); + } + } + } else { + return Ok(()); + } + + function.logic.push(revert_logic); + } + + // SELFDESTRUCT + 0xff => { + function + .logic + .push(format!("selfdestruct({});", instruction.input_operations[0].solidify())); + } + + _ => {} + }; + + Ok(()) +} diff --git a/crates/decompile/src/utils/heuristics/mod.rs b/crates/decompile/src/utils/heuristics/mod.rs index bc7881bc..413e0409 100644 --- a/crates/decompile/src/utils/heuristics/mod.rs +++ b/crates/decompile/src/utils/heuristics/mod.rs @@ -5,6 +5,7 @@ use crate::{core::analyze::AnalyzerState, interfaces::AnalyzedFunction, Error}; // import heuristics mod arguments; mod events; +mod extcall; mod modifiers; mod solidity; mod yul; @@ -12,6 +13,7 @@ mod yul; // re-export heuristics pub use arguments::argument_heuristic; pub use events::event_heuristic; +pub use extcall::extcall_heuristic; pub use modifiers::modifier_heuristic; pub use solidity::solidity_heuristic; pub use yul::yul_heuristic; diff --git a/crates/decompile/src/utils/heuristics/modifiers.rs b/crates/decompile/src/utils/heuristics/modifiers.rs index a66c7b50..074ff11f 100644 --- a/crates/decompile/src/utils/heuristics/modifiers.rs +++ b/crates/decompile/src/utils/heuristics/modifiers.rs @@ -38,9 +38,9 @@ pub fn modifier_heuristic( // if the instruction is a JUMPI with non-zero CALLVALUE requirement, the function is // non-payable exactly: ISZERO(CALLVALUE()) - if function.payable - && state.last_instruction.opcode == JUMPI - && state.last_instruction.input_operations[1] == w_iszero!(w_callvalue!()) + if function.payable && + state.last_instruction.opcode == JUMPI && + state.last_instruction.input_operations[1] == w_iszero!(w_callvalue!()) { debug!( "conditional at instruction {} indicates a non-payable function", diff --git a/crates/decompile/src/utils/heuristics/solidity.rs b/crates/decompile/src/utils/heuristics/solidity.rs index f3a70e59..e467f7cc 100644 --- a/crates/decompile/src/utils/heuristics/solidity.rs +++ b/crates/decompile/src/utils/heuristics/solidity.rs @@ -1,12 +1,12 @@ use alloy::primitives::U256; use alloy_dyn_abi::{DynSolType, DynSolValue}; use heimdall_common::utils::strings::encode_hex_reduced; -use heimdall_vm::core::{opcodes::opcode_name, vm::State}; +use heimdall_vm::core::vm::State; use crate::{ core::analyze::AnalyzerState, interfaces::{AnalyzedFunction, StorageFrame}, - utils::{constants::VARIABLE_SIZE_CHECK_REGEX, precompile::decode_precompile}, + utils::constants::VARIABLE_SIZE_CHECK_REGEX, Error, }; @@ -95,10 +95,10 @@ pub fn solidity_heuristic( // perform a series of checks to determine if the condition // is added by the compiler and can be ignored - if (conditional.contains("msg.data.length") && conditional.contains("0x04")) - || VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) - || (conditional.replace('!', "") == "success") - || (conditional == "!msg.value") + if (conditional.contains("msg.data.length") && conditional.contains("0x04")) || + VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) || + (conditional.replace('!', "") == "success") || + (conditional == "!msg.value") { return Ok(()); } @@ -132,74 +132,6 @@ pub fn solidity_heuristic( )); } - // CALL / CALLCODE - 0xf1 | 0xf2 => { - let gas = format!("gas: {}", instruction.input_operations[0].solidify()); - let address = instruction.input_operations[1].solidify(); - let value = format!("value: {}", instruction.input_operations[2].solidify()); - let calldata = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); - - // build the modifier w/ gas and value - let modifier = format!("{{ {}, {} }}", gas, value); - - // check if the external call is a precompiled contract - match decode_precompile( - instruction.inputs[1], - &calldata, - &instruction.input_operations[5], - ) { - (true, precompile_logic) => { - function.logic.push(precompile_logic); - } - _ => { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", - address, - modifier, - calldata - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - )); - } - } - } - - // STATICCALL / DELEGATECALL - 0xfa | 0xf4 => { - let gas = format!("gas: {}", instruction.input_operations[0].solidify()); - let address = instruction.input_operations[1].solidify(); - let calldata = function.get_memory_range(instruction.inputs[2], instruction.inputs[3]); - - // build the modifier w/ gas - let modifier = format!("{{ {} }}", gas); - - // check if the external call is a precompiled contract - match decode_precompile( - instruction.inputs[1], - &calldata, - &instruction.input_operations[4], - ) { - (true, precompile_logic) => { - function.logic.push(precompile_logic); - } - _ => { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", - address, - opcode_name(instruction.opcode).to_lowercase(), - modifier, - calldata - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - )); - } - } - } - // REVERT 0xfd => { // Safely convert U256 to usize diff --git a/crates/decompile/src/utils/heuristics/yul.rs b/crates/decompile/src/utils/heuristics/yul.rs index 18805609..cdbbeeb8 100644 --- a/crates/decompile/src/utils/heuristics/yul.rs +++ b/crates/decompile/src/utils/heuristics/yul.rs @@ -76,8 +76,8 @@ pub fn yul_heuristic( // CALLDATACOPY, CODECOPY, EXTCODECOPY, RETURNDATACOPY, TSTORE, // SSTORE, RETURN, SELFDESTRUCT, LOG0, LOG1, LOG2, LOG3, LOG4 // we simply want to add the operation to the function's logic - 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa - | 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { + 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa | + 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { function.logic.push(format!( "{}({})", opcode_name(instruction.opcode).to_lowercase(), diff --git a/crates/decompile/src/utils/postprocessors/memory.rs b/crates/decompile/src/utils/postprocessors/memory.rs index 538ebfad..052133fc 100644 --- a/crates/decompile/src/utils/postprocessors/memory.rs +++ b/crates/decompile/src/utils/postprocessors/memory.rs @@ -52,9 +52,9 @@ pub fn memory_postprocessor( // infer the type from args and vars in the expression for (var, var_type) in state.memory_type_map.iter() { - if line.contains(var) - && !state.memory_type_map.contains_key(&var_name) - && !var_type.is_empty() + if line.contains(var) && + !state.memory_type_map.contains_key(&var_name) && + !var_type.is_empty() { *line = format!("{var_type} {line}"); state.memory_type_map.insert(var_name.to_string(), var_type.to_string()); @@ -78,8 +78,8 @@ pub fn memory_postprocessor( } // we can do some type inference here - if ["+", "-", "/", "*", "int", ">=", "<="].iter().any(|op| line.contains(op)) - || assignment[1].replace(';', "").parse::().is_ok() + if ["+", "-", "/", "*", "int", ">=", "<="].iter().any(|op| line.contains(op)) || + assignment[1].replace(';', "").parse::().is_ok() { *line = format!("uint256 {line}"); state.memory_type_map.insert(var_name.to_string(), "uint256".to_string()); diff --git a/crates/decompile/src/utils/postprocessors/storage.rs b/crates/decompile/src/utils/postprocessors/storage.rs index 74ec98e1..d5620a36 100644 --- a/crates/decompile/src/utils/postprocessors/storage.rs +++ b/crates/decompile/src/utils/postprocessors/storage.rs @@ -64,8 +64,8 @@ pub fn storage_postprocessor( } // if there is an assignment to a memory variable, save it to variable_map - if (line.trim().starts_with("store_") || line.trim().starts_with("storage_map_")) - && line.contains(" = ") + if (line.trim().starts_with("store_") || line.trim().starts_with("storage_map_")) && + line.contains(" = ") { let assignment: Vec = line.split(" = ").collect::>().iter().map(|x| x.to_string()).collect(); diff --git a/crates/decompile/src/utils/postprocessors/transient.rs b/crates/decompile/src/utils/postprocessors/transient.rs index eb9ebe76..93f7e28f 100644 --- a/crates/decompile/src/utils/postprocessors/transient.rs +++ b/crates/decompile/src/utils/postprocessors/transient.rs @@ -64,8 +64,8 @@ pub fn transient_postprocessor( } // if there is an assignment to a memory variable, save it to variable_map - if (line.trim().starts_with("tstore_") || line.trim().starts_with("transient_map_")) - && line.contains(" = ") + if (line.trim().starts_with("tstore_") || line.trim().starts_with("transient_map_")) && + line.contains(" = ") { let assignment: Vec = line.split(" = ").collect::>().iter().map(|x| x.to_string()).collect(); diff --git a/crates/decompile/src/utils/postprocessors/variable.rs b/crates/decompile/src/utils/postprocessors/variable.rs index 01dca862..97f88da8 100644 --- a/crates/decompile/src/utils/postprocessors/variable.rs +++ b/crates/decompile/src/utils/postprocessors/variable.rs @@ -12,8 +12,8 @@ pub fn variable_postprocessor( .chain(state.transient_map.iter()) .for_each(|(variable, expr)| { // skip exprs that are already variables - if !expr.contains(' ') - && ["store", "tstore", "transient", "storage", "var"] + if !expr.contains(' ') && + ["store", "tstore", "transient", "storage", "var"] .iter() .any(|x| expr.starts_with(x)) { diff --git a/crates/vm/src/core/types.rs b/crates/vm/src/core/types.rs index 531b909f..2d67566c 100644 --- a/crates/vm/src/core/types.rs +++ b/crates/vm/src/core/types.rs @@ -189,8 +189,8 @@ pub fn get_padding(bytes: &[u8]) -> Padding { // we can avoid doing a full check if any of the following are true: // there are no null bytes OR // neither first nor last byte is a null byte, it is not padded - if null_byte_indices.is_empty() - || null_byte_indices[0] != 0 && null_byte_indices[null_byte_indices.len() - 1] != size - 1 + if null_byte_indices.is_empty() || + null_byte_indices[0] != 0 && null_byte_indices[null_byte_indices.len() - 1] != size - 1 { return Padding::None; } diff --git a/crates/vm/src/core/vm.rs b/crates/vm/src/core/vm.rs index 31367d47..20e26e4e 100644 --- a/crates/vm/src/core/vm.rs +++ b/crates/vm/src/core/vm.rs @@ -303,8 +303,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -321,8 +321,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -339,8 +339,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -360,8 +360,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&numerator.operation.opcode) - && (0x5f..=0x7f).contains(&denominator.operation.opcode) + if (0x5f..=0x7f).contains(&numerator.operation.opcode) && + (0x5f..=0x7f).contains(&denominator.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -381,8 +381,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&numerator.operation.opcode) - && (0x5f..=0x7f).contains(&denominator.operation.opcode) + if (0x5f..=0x7f).contains(&numerator.operation.opcode) && + (0x5f..=0x7f).contains(&denominator.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result.into_raw())]) @@ -403,8 +403,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&modulus.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&modulus.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -424,8 +424,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&modulus.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&modulus.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result.into_raw())]) @@ -447,8 +447,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -469,8 +469,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -487,8 +487,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&exponent.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&exponent.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -591,8 +591,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -609,8 +609,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -627,8 +627,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -677,8 +677,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -697,8 +697,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -721,8 +721,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result.into_raw())]) @@ -1121,13 +1121,12 @@ impl VM { let pc: u128 = pc.try_into()?; // Check if JUMPDEST is valid and throw with 790 if not (invalid jump destination) - if (pc - <= self - .bytecode + if (pc <= + self.bytecode .len() .try_into() - .expect("impossible case: bytecode is larger than u128::MAX")) - && (self.bytecode[pc as usize] != 0x5b) + .expect("impossible case: bytecode is larger than u128::MAX")) && + (self.bytecode[pc as usize] != 0x5b) { self.exit(790, Vec::new()); return Ok(Instruction { @@ -1154,13 +1153,12 @@ impl VM { if !condition.eq(&U256::from(0u8)) { // Check if JUMPDEST is valid and throw with 790 if not (invalid jump // destination) - if (pc - <= self - .bytecode + if (pc <= + self.bytecode .len() .try_into() - .expect("impossible case: bytecode is larger than u128::MAX")) - && (self.bytecode[pc as usize] != 0x5b) + .expect("impossible case: bytecode is larger than u128::MAX")) && + (self.bytecode[pc as usize] != 0x5b) { self.exit(790, Vec::new()); return Ok(Instruction { @@ -1301,9 +1299,9 @@ impl VM { let data = self.memory.read(offset, size); // consume dynamic gas - let gas_cost = (375 * (topic_count as u128)) - + 8 * (size as u128) - + self.memory.expansion_cost(offset, size); + let gas_cost = (375 * (topic_count as u128)) + + 8 * (size as u128) + + self.memory.expansion_cost(offset, size); self.consume_gas(gas_cost); // no need for a panic check because the length of events should never be larger @@ -1496,9 +1494,9 @@ impl VM { let mut vm_clone = self.clone(); for _ in 0..n { - if vm_clone.bytecode.len() < vm_clone.instruction as usize - || vm_clone.exitcode != 255 - || !vm_clone.returndata.is_empty() + if vm_clone.bytecode.len() < vm_clone.instruction as usize || + vm_clone.exitcode != 255 || + !vm_clone.returndata.is_empty() { break; } diff --git a/crates/vm/src/ext/exec/util.rs b/crates/vm/src/ext/exec/util.rs index 9be91e11..38e86a41 100644 --- a/crates/vm/src/ext/exec/util.rs +++ b/crates/vm/src/ext/exec/util.rs @@ -69,13 +69,11 @@ pub fn jump_stack_depth_less_than_max_stack_depth( /// If the stack contains more than 16 of the same item (with the same sources), it is considered a /// loop. pub fn stack_contains_too_many_of_the_same_item(stack: &Stack) -> bool { - if stack.size() > 16 - && stack.stack.iter().any(|frame| { - let solidified_frame_source = frame.operation.solidify(); - stack.stack.iter().filter(|f| f.operation.solidify() == solidified_frame_source).count() - >= 16 - }) - { + if stack.size() > 16 && stack.stack.iter().any(|frame| { + let solidified_frame_source = frame.operation.solidify(); + stack.stack.iter().filter(|f| f.operation.solidify() == solidified_frame_source).count() >= + 16 + }) { trace!("jump matches loop-detection heuristic: 'stack_contains_too_many_of_the_same_item'",); return true; } diff --git a/crates/vm/src/ext/lexers/solidity.rs b/crates/vm/src/ext/lexers/solidity.rs index c009270c..8bdd7d41 100644 --- a/crates/vm/src/ext/lexers/solidity.rs +++ b/crates/vm/src/ext/lexers/solidity.rs @@ -219,8 +219,8 @@ impl WrappedOpcode { .push_str(format!("arg{}", (slot - 4) / 32).as_str()); } Err(_) => { - if solidified_slot.contains("0x04 + ") - || solidified_slot.contains("+ 0x04") + if solidified_slot.contains("0x04 + ") || + solidified_slot.contains("+ 0x04") { solidified_wrapped_opcode.push_str( solidified_slot diff --git a/crates/vm/src/ext/range_map.rs b/crates/vm/src/ext/range_map.rs index 872a8fea..834dc737 100644 --- a/crates/vm/src/ext/range_map.rs +++ b/crates/vm/src/ext/range_map.rs @@ -107,12 +107,12 @@ impl RangeMap { } fn range_collides(incoming: &Range, incumbent: &Range) -> bool { - !(incoming.start <= incumbent.start - && incoming.end < incumbent.end - && incoming.end < incumbent.start - || incoming.start > incumbent.start - && incoming.end >= incumbent.end - && incoming.start > incumbent.end) + !(incoming.start <= incumbent.start && + incoming.end < incumbent.end && + incoming.end < incumbent.start || + incoming.start > incumbent.start && + incoming.end >= incumbent.end && + incoming.start > incumbent.end) } } diff --git a/crates/vm/src/ext/selectors.rs b/crates/vm/src/ext/selectors.rs index b1b9f3d4..1b8cd239 100644 --- a/crates/vm/src/ext/selectors.rs +++ b/crates/vm/src/ext/selectors.rs @@ -113,10 +113,10 @@ pub fn resolve_entry_point(vm: &mut VM, selector: &str) -> u128 { let jump_condition = call.last_instruction.input_operations[1].solidify(); let jump_taken = call.last_instruction.inputs[1].try_into().unwrap_or(1); - if jump_condition.contains(selector) - && jump_condition.contains("msg.data[0]") - && jump_condition.contains(" == ") - && jump_taken == 1 + if jump_condition.contains(selector) && + jump_condition.contains("msg.data[0]") && + jump_condition.contains(" == ") && + jump_taken == 1 { return call.last_instruction.inputs[0].try_into().unwrap_or(0); } else if jump_taken == 1 { @@ -142,8 +142,7 @@ pub fn resolve_entry_point(vm: &mut VM, selector: &str) -> u128 { /// Resolve a list of selectors to their function signatures. pub async fn resolve_selectors(selectors: Vec) -> HashMap> where - T: ResolveSelector + Send + Clone + 'static, -{ + T: ResolveSelector + Send + Clone + 'static, { // short-circuit if there are no selectors if selectors.is_empty() { return HashMap::new(); From 1a20b3de6756156aea5ecc2bfbde5daaee122c23 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Wed, 16 Oct 2024 11:19:16 -0400 Subject: [PATCH 02/14] wip --- crates/decompile/src/interfaces/function.rs | 1 + .../decompile/src/utils/heuristics/extcall.rs | 37 ++++++++++--------- .../src/utils/heuristics/solidity.rs | 11 +++--- crates/decompile/src/utils/heuristics/yul.rs | 7 ++-- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/crates/decompile/src/interfaces/function.rs b/crates/decompile/src/interfaces/function.rs index 5a1a3ff0..64f0367d 100644 --- a/crates/decompile/src/interfaces/function.rs +++ b/crates/decompile/src/interfaces/function.rs @@ -61,6 +61,7 @@ pub struct AnalyzedFunction { #[derive(Clone, Debug)] pub struct StorageFrame { pub operation: WrappedOpcode, + pub value: U256, } #[derive(Clone, Debug)] diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index e245989d..f24d51e0 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -1,6 +1,9 @@ use alloy::primitives::U256; use alloy_dyn_abi::{DynSolType, DynSolValue}; -use heimdall_common::utils::strings::encode_hex_reduced; +use heimdall_common::utils::{ + hex::ToLowerHex, + strings::{encode_hex, encode_hex_reduced}, +}; use heimdall_vm::core::{opcodes::opcode_name, vm::State}; use crate::{ @@ -21,7 +24,12 @@ pub fn extcall_heuristic( let gas = format!("gas: {}", instruction.input_operations[0].solidify()); let address = instruction.input_operations[1].solidify(); let value = format!("value: {}", instruction.input_operations[2].solidify()); - let calldata = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); + let memory = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); + let extcalldata = memory + .iter() + .map(|x| x.value.to_lower_hex().trim_start_matches("0x").to_owned()) + .collect::>() + .join(""); // build the modifier w/ gas and value let modifier = format!("{{ {}, {} }}", gas, value); @@ -29,7 +37,7 @@ pub fn extcall_heuristic( // check if the external call is a precompiled contract match decode_precompile( instruction.inputs[1], - &calldata, + &memory, &instruction.input_operations[5], ) { (true, precompile_logic) => { @@ -38,13 +46,7 @@ pub fn extcall_heuristic( _ => { function.logic.push(format!( "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", - address, - modifier, - calldata - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") + address, modifier, extcalldata )); } } @@ -54,7 +56,12 @@ pub fn extcall_heuristic( 0xfa | 0xf4 => { let gas = format!("gas: {}", instruction.input_operations[0].solidify()); let address = instruction.input_operations[1].solidify(); - let calldata = function.get_memory_range(instruction.inputs[2], instruction.inputs[3]); + let memory = function.get_memory_range(instruction.inputs[2], instruction.inputs[3]); + let extcalldata = memory + .iter() + .map(|x| x.value.to_lower_hex().trim_start_matches("0x").to_owned()) + .collect::>() + .join(""); // build the modifier w/ gas let modifier = format!("{{ {} }}", gas); @@ -62,7 +69,7 @@ pub fn extcall_heuristic( // check if the external call is a precompiled contract match decode_precompile( instruction.inputs[1], - &calldata, + &memory, &instruction.input_operations[4], ) { (true, precompile_logic) => { @@ -74,11 +81,7 @@ pub fn extcall_heuristic( address, opcode_name(instruction.opcode).to_lowercase(), modifier, - calldata - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") + extcalldata )); } } diff --git a/crates/decompile/src/utils/heuristics/solidity.rs b/crates/decompile/src/utils/heuristics/solidity.rs index e467f7cc..c508c861 100644 --- a/crates/decompile/src/utils/heuristics/solidity.rs +++ b/crates/decompile/src/utils/heuristics/solidity.rs @@ -68,10 +68,11 @@ pub fn solidity_heuristic( // MSTORE / MSTORE8 0x52 | 0x53 => { let key = instruction.inputs[0]; + let value = instruction.inputs[1]; let operation = instruction.input_operations[1].to_owned(); // add the mstore to the function's memory map - function.memory.insert(key, StorageFrame { operation }); + function.memory.insert(key, StorageFrame { operation, value }); function.logic.push(format!( "memory[{}] = {};", encode_hex_reduced(key), @@ -95,10 +96,10 @@ pub fn solidity_heuristic( // perform a series of checks to determine if the condition // is added by the compiler and can be ignored - if (conditional.contains("msg.data.length") && conditional.contains("0x04")) || - VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) || - (conditional.replace('!', "") == "success") || - (conditional == "!msg.value") + if (conditional.contains("msg.data.length") && conditional.contains("0x04")) + || VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) + || (conditional.replace('!', "") == "success") + || (conditional == "!msg.value") { return Ok(()); } diff --git a/crates/decompile/src/utils/heuristics/yul.rs b/crates/decompile/src/utils/heuristics/yul.rs index cdbbeeb8..352f4eb4 100644 --- a/crates/decompile/src/utils/heuristics/yul.rs +++ b/crates/decompile/src/utils/heuristics/yul.rs @@ -18,10 +18,11 @@ pub fn yul_heuristic( // MSTORE / MSTORE8 0x52 | 0x53 => { let key = instruction.inputs[0]; + let value = instruction.inputs[1]; let operation = instruction.input_operations[1].clone(); // add the mstore to the function's memory map - function.memory.insert(key, StorageFrame { operation }); + function.memory.insert(key, StorageFrame { operation, value }); function.logic.push(format!( "{}({}, {})", opcode_name(instruction.opcode).to_lowercase(), @@ -76,8 +77,8 @@ pub fn yul_heuristic( // CALLDATACOPY, CODECOPY, EXTCODECOPY, RETURNDATACOPY, TSTORE, // SSTORE, RETURN, SELFDESTRUCT, LOG0, LOG1, LOG2, LOG3, LOG4 // we simply want to add the operation to the function's logic - 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa | - 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { + 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa + | 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { function.logic.push(format!( "{}({})", opcode_name(instruction.opcode).to_lowercase(), From 153f067653bf420065ec009958966d8d67e5efdc Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Mon, 4 Nov 2024 12:00:39 -0500 Subject: [PATCH 03/14] wip --- Cargo.lock | 1 + crates/common/src/ether/calldata.rs | 42 +++- crates/core/tests/test_decode.rs | 2 + crates/decode/src/interfaces/args.rs | 7 +- crates/decompile/Cargo.toml | 7 +- crates/decompile/src/core/analyze.rs | 8 +- .../decompile/src/utils/heuristics/extcall.rs | 235 +++++++----------- crates/decompile/src/utils/precompile.rs | 72 +++--- 8 files changed, 168 insertions(+), 206 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c675b131..ac0397e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2212,6 +2212,7 @@ dependencies = [ "serde", "serde_json", "thiserror", + "tokio", "tracing", ] diff --git a/crates/common/src/ether/calldata.rs b/crates/common/src/ether/calldata.rs index 80cc2cf3..1c43b5d5 100644 --- a/crates/common/src/ether/calldata.rs +++ b/crates/common/src/ether/calldata.rs @@ -4,13 +4,16 @@ use alloy::primitives::TxHash; use eyre::{bail, eyre, Result}; /// Given a target, return calldata of the target. -pub async fn get_calldata_from_target(target: &str, rpc_url: &str) -> Result> { +pub async fn get_calldata_from_target(target: &str, raw: bool, rpc_url: &str) -> Result> { // If the target is a transaction hash, fetch the calldata from the RPC provider. if let Ok(address) = target.parse::() { - return get_transaction(address, rpc_url) - .await - .map(|tx| tx.input.to_vec()) - .map_err(|_| eyre!("failed to fetch transaction from RPC provider")); + // if raw is true, the user specified that the target is raw calldata. skip fetching the transaction. + if !raw { + return get_transaction(address, rpc_url) + .await + .map(|tx| tx.input.to_vec()) + .map_err(|_| eyre!("failed to fetch transaction from RPC provider")); + } } // If the target is not a transaction hash, it could be calldata. @@ -34,6 +37,7 @@ mod tests { let calldata = get_calldata_from_target( "0x317907eeece00619fd4418c18a4ec4ebe5c87cdbff808f4b01cc2c6384799837", + false, &rpc_url, ) .await @@ -51,6 +55,7 @@ mod tests { let calldata = get_calldata_from_target( "0xf14fcbc8bf9eac48d61719f80efb268ef1099a248fa332ed639041337954647ec6583f2e", + false, &rpc_url, ) .await @@ -66,10 +71,31 @@ mod tests { std::process::exit(0); }); - let calldata = - get_calldata_from_target("asfnsdalkfasdlfnlasdkfnalkdsfndaskljfnasldkjfnasf", &rpc_url) - .await; + let calldata = get_calldata_from_target( + "asfnsdalkfasdlfnlasdkfnalkdsfndaskljfnasldkjfnasf", + false, + &rpc_url, + ) + .await; assert!(calldata.is_err()); } + + #[tokio::test] + async fn test_get_calldata_when_target_is_calldata_that_is_exactly_32_bytes() { + let rpc_url = std::env::var("RPC_URL").unwrap_or_else(|_| { + println!("RPC_URL not set, skipping test"); + std::process::exit(0); + }); + + let calldata = get_calldata_from_target( + "0x317907eeece00619fd4418c18a4ec4ebe5c87cdbff808f4b01cc2c6384799837", + true, + &rpc_url, + ) + .await + .expect("failed to get calldata from target"); + + assert!(calldata.len() == 32); + } } diff --git a/crates/core/tests/test_decode.rs b/crates/core/tests/test_decode.rs index a8338347..c59da7eb 100644 --- a/crates/core/tests/test_decode.rs +++ b/crates/core/tests/test_decode.rs @@ -14,6 +14,7 @@ mod integration_tests { constructor: false, truncate_calldata: false, skip_resolving: false, + raw: false, }; let _ = heimdall_decoder::decode(args).await; } @@ -29,6 +30,7 @@ mod integration_tests { constructor: false, truncate_calldata: false, skip_resolving: false, + raw: false, }; let _ = heimdall_decoder::decode(args).await; } diff --git a/crates/decode/src/interfaces/args.rs b/crates/decode/src/interfaces/args.rs index e13c05bd..b1c3a114 100644 --- a/crates/decode/src/interfaces/args.rs +++ b/crates/decode/src/interfaces/args.rs @@ -43,11 +43,15 @@ pub struct DecodeArgs { /// Whether to skip resolving selectors. Heimdall will attempt to guess types. #[clap(long = "skip-resolving")] pub skip_resolving: bool, + + /// Whether to treat the target as a raw calldata string. Useful if the target is exactly 32 bytes. + #[clap(long, short)] + pub raw: bool, } impl DecodeArgs { pub async fn get_calldata(&self) -> Result> { - get_calldata_from_target(&self.target, &self.rpc_url).await + get_calldata_from_target(&self.target, self.raw, &self.rpc_url).await } } @@ -62,6 +66,7 @@ impl DecodeArgsBuilder { constructor: Some(false), truncate_calldata: Some(false), skip_resolving: Some(false), + raw: Some(false), } } } diff --git a/crates/decompile/Cargo.toml b/crates/decompile/Cargo.toml index e5b2d504..eb5815a5 100644 --- a/crates/decompile/Cargo.toml +++ b/crates/decompile/Cargo.toml @@ -30,8 +30,13 @@ fancy-regex = "0.11.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" alloy-dyn-abi = "0.8.3" -alloy = { version = "0.3.3", features = ["full", "rpc-types-debug", "rpc-types-trace"] } +alloy = { version = "0.3.3", features = [ + "full", + "rpc-types-debug", + "rpc-types-trace", +] } hashbrown = "0.14.5" +tokio = { version = "1", features = ["full"] } heimdall-disassembler.workspace = true heimdall-vm.workspace = true diff --git a/crates/decompile/src/core/analyze.rs b/crates/decompile/src/core/analyze.rs index 7bc62def..fcf51b8e 100644 --- a/crates/decompile/src/core/analyze.rs +++ b/crates/decompile/src/core/analyze.rs @@ -157,8 +157,8 @@ impl Analyzer { } // check if the ending brackets are needed - if analyzer_state.jumped_conditional.is_some() && - analyzer_state.conditional_stack.contains( + if analyzer_state.jumped_conditional.is_some() + && analyzer_state.conditional_stack.contains( analyzer_state .jumped_conditional .as_ref() @@ -167,8 +167,8 @@ impl Analyzer { { // remove the conditional for (i, conditional) in analyzer_state.conditional_stack.iter().enumerate() { - if conditional == - analyzer_state.jumped_conditional.as_ref().expect( + if conditional + == analyzer_state.jumped_conditional.as_ref().expect( "impossible case: should have short-circuited in previous conditional", ) { diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index f24d51e0..29a20a06 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -1,29 +1,28 @@ use alloy::primitives::U256; -use alloy_dyn_abi::{DynSolType, DynSolValue}; -use heimdall_common::utils::{ - hex::ToLowerHex, - strings::{encode_hex, encode_hex_reduced}, +use eyre::eyre; +use heimdall_common::utils::{hex::ToLowerHex, sync::blocking_await}; +use heimdall_vm::{ + core::{opcodes::opcode_name, vm::State}, + w_gas, }; -use heimdall_vm::core::{opcodes::opcode_name, vm::State}; use crate::{ core::analyze::AnalyzerState, interfaces::AnalyzedFunction, utils::precompile::decode_precompile, Error, }; +use heimdall_decoder::{decode, DecodeArgsBuilder}; pub fn extcall_heuristic( function: &mut AnalyzedFunction, state: &State, - analyzer_state: &mut AnalyzerState, + _: &mut AnalyzerState, ) -> Result<(), Error> { let instruction = &state.last_instruction; match instruction.opcode { // CALL / CALLCODE 0xf1 | 0xf2 => { - let gas = format!("gas: {}", instruction.input_operations[0].solidify()); let address = instruction.input_operations[1].solidify(); - let value = format!("value: {}", instruction.input_operations[2].solidify()); let memory = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); let extcalldata = memory .iter() @@ -31,24 +30,51 @@ pub fn extcall_heuristic( .collect::>() .join(""); - // build the modifier w/ gas and value - let modifier = format!("{{ {}, {} }}", gas, value); + let decoded = blocking_await(move || { + let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); + + rt.block_on(async { + decode( + DecodeArgsBuilder::new() + .target(extcalldata) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await + }) + }) + .map_err(|e| eyre!("Failed to decode extcalldata: {}", e))?; + + // build modifiers + // - if gas is just the default (GAS()), we don't need to include it + // - if value is just the default (0), we don't need to include it + let mut modifiers = vec![]; + if instruction.input_operations[0] != w_gas!() { + modifiers.push(format!("gas: {}", instruction.input_operations[0].solidify())); + } + if instruction.inputs[2] != U256::ZERO { + modifiers.push(format!("value: {}", instruction.input_operations[2].solidify())); + } + let modifier = if modifiers.is_empty() { + "".to_string() + } else { + format!("{{ {} }}", modifiers.join(", ")) + }; // check if the external call is a precompiled contract - match decode_precompile( - instruction.inputs[1], - &memory, - &instruction.input_operations[5], - ) { - (true, precompile_logic) => { - function.logic.push(precompile_logic); - } - _ => { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", - address, modifier, extcalldata - )); - } + if let Some(precompile_logic) = + decode_precompile(instruction.inputs[1], &memory, &instruction.input_operations[5]) + { + function.logic.push(precompile_logic); + } else { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); } } @@ -63,136 +89,44 @@ pub fn extcall_heuristic( .collect::>() .join(""); + let decoded = blocking_await(move || { + let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); + + rt.block_on(async { + decode( + DecodeArgsBuilder::new() + .target(extcalldata) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await + }) + }) + .map_err(|e| eyre!("Failed to decode extcalldata: {}", e))?; + // build the modifier w/ gas - let modifier = format!("{{ {} }}", gas); + // if the modifier is just the default (GAS()), we don't need to include it + let modifier = if instruction.input_operations[0] != w_gas!() { + format!("{{ {} }}", gas) + } else { + "".to_string() + }; // check if the external call is a precompiled contract - match decode_precompile( - instruction.inputs[1], - &memory, - &instruction.input_operations[4], - ) { - (true, precompile_logic) => { - function.logic.push(precompile_logic); - } - _ => { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", - address, - opcode_name(instruction.opcode).to_lowercase(), - modifier, - extcalldata - )); - } - } - } - - // REVERT - 0xfd => { - // Safely convert U256 to usize - let offset: usize = instruction.inputs[0].try_into().unwrap_or(0); - let size: usize = instruction.inputs[1].try_into().unwrap_or(0); - let revert_data = state.memory.read(offset, size); - - // (1) if revert_data starts with 0x08c379a0, the folling is an error string - // abiencoded (2) if revert_data starts with 0x4e487b71, the - // following is a compiler panic (3) if revert_data starts with any - // other 4byte selector, it is a custom error and should - // be resolved and added to the generated ABI - // (4) if revert_data is empty, it is an empty revert. Ex: - // - if (true != false) { revert() }; - // - require(true != false) - let revert_logic; - - // handle case with error string abiencoded - if revert_data.starts_with(&[0x08, 0xc3, 0x79, 0xa0]) { - let revert_string = match revert_data.get(4..) { - Some(hex_data) => match DynSolType::String.abi_decode(hex_data) { - Ok(revert) => match revert { - DynSolValue::String(revert) => revert, - _ => "decoding error".to_string(), - }, - Err(_) => "decoding error".to_string(), - }, - None => "decoding error".to_string(), - }; - revert_logic = match analyzer_state.jumped_conditional.clone() { - Some(condition) => { - analyzer_state.jumped_conditional = None; - format!("require({condition}, \"{revert_string}\");") - } - None => { - // loop backwards through logic to find the last IF statement - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - let conditional = match analyzer_state.conditional_stack.pop() { - Some(condition) => condition, - None => break, - }; - - function.logic[i] = - format!("require({conditional}, \"{revert_string}\");"); - } - } - return Ok(()); - } - } - } - // handle case with custom error OR empty revert - else if !revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { - let custom_error_placeholder = match revert_data.get(0..4) { - Some(selector) => { - function.errors.insert(U256::from_be_slice(selector)); - format!( - "CustomError_{}()", - encode_hex_reduced(U256::from_be_slice(selector)).replacen("0x", "", 1) - ) - } - None => "()".to_string(), - }; - - revert_logic = match analyzer_state.jumped_conditional.clone() { - Some(condition) => { - analyzer_state.jumped_conditional = None; - if custom_error_placeholder == *"()" { - format!("require({condition});",) - } else { - format!("require({condition}, {custom_error_placeholder});") - } - } - None => { - // loop backwards through logic to find the last IF statement - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - let conditional = match analyzer_state.conditional_stack.pop() { - Some(condition) => condition, - None => break, - }; - - if custom_error_placeholder == *"()" { - function.logic[i] = format!("require({conditional});",); - } else { - function.logic[i] = format!( - "require({conditional}, {custom_error_placeholder});" - ); - } - } - } - return Ok(()); - } - } + if let Some(precompile_logic) = + decode_precompile(instruction.inputs[1], &memory, &instruction.input_operations[4]) + { + function.logic.push(precompile_logic); } else { - return Ok(()); + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); } - - function.logic.push(revert_logic); - } - - // SELFDESTRUCT - 0xff => { - function - .logic - .push(format!("selfdestruct({});", instruction.input_operations[0].solidify())); } _ => {} @@ -200,3 +134,6 @@ pub fn extcall_heuristic( Ok(()) } + +// TODO: handle skip_resolving (need to fix in inspect mod too) +// TODO: handle case where decoding fails diff --git a/crates/decompile/src/utils/precompile.rs b/crates/decompile/src/utils/precompile.rs index b63aa30c..014acb49 100644 --- a/crates/decompile/src/utils/precompile.rs +++ b/crates/decompile/src/utils/precompile.rs @@ -14,54 +14,40 @@ pub fn decode_precompile( precompile_address: U256, extcalldata_memory: &[StorageFrame], return_data_offset: &WrappedOpcode, -) -> (bool, String) { +) -> Option { // safely convert the precompile address to a usize. let address: usize = match precompile_address.try_into() { Ok(x) => x, Err(_) => usize::MAX, }; - let mut is_ext_call_precompile = false; - let mut ext_call_logic = String::new(); - match address { - 1 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "address memory[{}] = ecrecover({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - 2 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "bytes memory[{}] = sha256({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - 3 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "bytes memory[{}] = ripemd160({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - _ => {} + 1 => Some(format!( + "address memory[{}] = ecrecover({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + 2 => Some(format!( + "bytes memory[{}] = sha256({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + 3 => Some(format!( + "bytes memory[{}] = ripemd160({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + _ => None, } - - (is_ext_call_precompile, ext_call_logic) } From c3fee86e1f01e5b22402a28d3f4afa6583b46e8c Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Thu, 7 Nov 2024 15:05:37 -0500 Subject: [PATCH 04/14] wip --- .../decompile/src/utils/heuristics/extcall.rs | 60 ++++++++++++------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index 29a20a06..0e2fc2f0 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -3,7 +3,7 @@ use eyre::eyre; use heimdall_common::utils::{hex::ToLowerHex, sync::blocking_await}; use heimdall_vm::{ core::{opcodes::opcode_name, vm::State}, - w_gas, + w_gas, w_push0, }; use crate::{ @@ -30,13 +30,14 @@ pub fn extcall_heuristic( .collect::>() .join(""); + let extcalldata_clone = extcalldata.clone(); let decoded = blocking_await(move || { let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); rt.block_on(async { decode( DecodeArgsBuilder::new() - .target(extcalldata) + .target(extcalldata_clone) .raw(true) .build() .expect("Failed to build DecodeArgs"), @@ -44,7 +45,7 @@ pub fn extcall_heuristic( .await }) }) - .map_err(|e| eyre!("Failed to decode extcalldata: {}", e))?; + .ok(); // build modifiers // - if gas is just the default (GAS()), we don't need to include it @@ -53,7 +54,7 @@ pub fn extcall_heuristic( if instruction.input_operations[0] != w_gas!() { modifiers.push(format!("gas: {}", instruction.input_operations[0].solidify())); } - if instruction.inputs[2] != U256::ZERO { + if instruction.input_operations[2] != w_push0!() { modifiers.push(format!("value: {}", instruction.input_operations[2].solidify())); } let modifier = if modifiers.is_empty() { @@ -68,13 +69,20 @@ pub fn extcall_heuristic( { function.logic.push(precompile_logic); } else { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", - address, - modifier, - decoded.decoded.name, - opcode_name(instruction.opcode).to_lowercase(), - )); + if let Some(decoded) = decoded { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); + } else { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", + address, modifier, extcalldata + )); + } } } @@ -89,13 +97,14 @@ pub fn extcall_heuristic( .collect::>() .join(""); + let extcalldata_clone = extcalldata.clone(); let decoded = blocking_await(move || { let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); rt.block_on(async { decode( DecodeArgsBuilder::new() - .target(extcalldata) + .target(extcalldata_clone) .raw(true) .build() .expect("Failed to build DecodeArgs"), @@ -103,7 +112,7 @@ pub fn extcall_heuristic( .await }) }) - .map_err(|e| eyre!("Failed to decode extcalldata: {}", e))?; + .ok(); // build the modifier w/ gas // if the modifier is just the default (GAS()), we don't need to include it @@ -119,13 +128,23 @@ pub fn extcall_heuristic( { function.logic.push(precompile_logic); } else { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", - address, - modifier, - decoded.decoded.name, - opcode_name(instruction.opcode).to_lowercase(), - )); + if let Some(decoded) = decoded { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); + } else { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", + address, + opcode_name(instruction.opcode).to_lowercase(), + modifier, + extcalldata + )); + } } } @@ -136,4 +155,3 @@ pub fn extcall_heuristic( } // TODO: handle skip_resolving (need to fix in inspect mod too) -// TODO: handle case where decoding fails From bbab941a455e84866b087057b2474f09333bc8e9 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Fri, 15 Nov 2024 15:32:44 -0600 Subject: [PATCH 05/14] feat: parse `CALL`/`CALLCODE` `value` as ether, if possible --- crates/cfg/src/core/mod.rs | 10 +- crates/cfg/src/interfaces/args.rs | 8 +- crates/cfg/src/interfaces/mod.rs | 2 +- crates/cfg/src/lib.rs | 4 +- crates/cli/src/args.rs | 4 +- crates/cli/src/main.rs | 2 +- crates/common/src/ether/calldata.rs | 3 +- crates/common/src/utils/hex.rs | 10 +- crates/common/src/utils/threading.rs | 2 +- crates/core/benches/bench_cfg.rs | 6 +- crates/core/tests/test_cfg.rs | 10 +- crates/decode/src/interfaces/args.rs | 3 +- crates/decompile/src/core/analyze.rs | 8 +- .../decompile/src/utils/heuristics/extcall.rs | 91 +++++++++++-------- .../src/utils/heuristics/solidity.rs | 8 +- crates/decompile/src/utils/heuristics/yul.rs | 4 +- .../src/utils/postprocessors/arithmetic.rs | 2 +- crates/vm/src/core/memory.rs | 2 +- crates/vm/src/core/stack.rs | 1 + examples/cfg/src/main.rs | 6 +- 20 files changed, 101 insertions(+), 85 deletions(-) diff --git a/crates/cfg/src/core/mod.rs b/crates/cfg/src/core/mod.rs index 9ac86621..054cbf96 100644 --- a/crates/cfg/src/core/mod.rs +++ b/crates/cfg/src/core/mod.rs @@ -8,17 +8,17 @@ use heimdall_vm::core::vm::VM; use petgraph::{dot::Dot, Graph}; use std::time::{Duration, Instant}; -use super::CFGArgs; +use super::CfgArgs; use crate::{core::graph::build_cfg, error::Error}; use tracing::{debug, info}; #[derive(Debug, Clone)] -pub struct CFGResult { +pub struct CfgResult { pub graph: Graph, } -impl CFGResult { +impl CfgResult { pub fn as_dot(&self, color_edges: bool) -> String { let output = format!("{}", Dot::with_config(&self.graph, &[])); @@ -44,7 +44,7 @@ impl CFGResult { } } -pub async fn cfg(args: CFGArgs) -> Result { +pub async fn cfg(args: CfgArgs) -> Result { // init let start_time = Instant::now(); @@ -99,5 +99,5 @@ pub async fn cfg(args: CFGArgs) -> Result { debug!("cfg generated in {:?}", start_time.elapsed()); info!("generated cfg successfully"); - Ok(CFGResult { graph: contract_cfg }) + Ok(CfgResult { graph: contract_cfg }) } diff --git a/crates/cfg/src/interfaces/args.rs b/crates/cfg/src/interfaces/args.rs index 3338ad5a..9e9cc5d8 100644 --- a/crates/cfg/src/interfaces/args.rs +++ b/crates/cfg/src/interfaces/args.rs @@ -10,8 +10,8 @@ use heimdall_config::parse_url_arg; after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki", override_usage = "heimdall cfg [OPTIONS]" )] -pub struct CFGArgs { - /// The target to generate a CFG for, either a file, bytecode, contract address, or ENS name. +pub struct CfgArgs { + /// The target to generate a Cfg for, either a file, bytecode, contract address, or ENS name. #[clap(required = true)] pub target: String, @@ -42,13 +42,13 @@ pub struct CFGArgs { pub timeout: u64, } -impl CFGArgs { +impl CfgArgs { pub async fn get_bytecode(&self) -> Result> { get_bytecode_from_target(&self.target, &self.rpc_url).await } } -impl CFGArgsBuilder { +impl CfgArgsBuilder { pub fn new() -> Self { Self { target: Some(String::new()), diff --git a/crates/cfg/src/interfaces/mod.rs b/crates/cfg/src/interfaces/mod.rs index 3ead88ea..8471a6dd 100644 --- a/crates/cfg/src/interfaces/mod.rs +++ b/crates/cfg/src/interfaces/mod.rs @@ -1,4 +1,4 @@ mod args; // re-export the public interface -pub use args::{CFGArgs, CFGArgsBuilder}; +pub use args::{CfgArgs, CfgArgsBuilder}; diff --git a/crates/cfg/src/lib.rs b/crates/cfg/src/lib.rs index f909564c..a5a0bc8f 100644 --- a/crates/cfg/src/lib.rs +++ b/crates/cfg/src/lib.rs @@ -4,6 +4,6 @@ mod core; mod interfaces; // re-export the public interface -pub use core::{cfg, CFGResult}; +pub use core::{cfg, CfgResult}; pub use error::Error; -pub use interfaces::{CFGArgs, CFGArgsBuilder}; +pub use interfaces::{CfgArgs, CfgArgsBuilder}; diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index 248283b6..e0a614b1 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -4,7 +4,7 @@ use clap::{ArgAction, Args, ValueEnum}; use heimdall_cache::CacheArgs; use heimdall_config::ConfigArgs; use heimdall_core::{ - heimdall_cfg::CFGArgs, heimdall_decoder::DecodeArgs, heimdall_decompiler::DecompilerArgs, + heimdall_cfg::CfgArgs, heimdall_decoder::DecodeArgs, heimdall_decompiler::DecompilerArgs, heimdall_disassembler::DisassemblerArgs, heimdall_dump::DumpArgs, heimdall_inspect::InspectArgs, }; @@ -42,7 +42,7 @@ pub enum Subcommands { Decompile(DecompilerArgs), #[clap(name = "cfg", about = "Generate a visual control flow graph for EVM bytecode")] - CFG(CFGArgs), + Cfg(CfgArgs), #[clap(name = "decode", about = "Decode calldata into readable types")] Decode(DecodeArgs), diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 942cafcf..362a5031 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -160,7 +160,7 @@ async fn main() -> Result<()> { result.display() } - Subcommands::CFG(mut cmd) => { + Subcommands::Cfg(mut cmd) => { // if the user has not specified a rpc url, use the default if cmd.rpc_url.as_str() == "" { cmd.rpc_url = configuration.rpc_url; diff --git a/crates/common/src/ether/calldata.rs b/crates/common/src/ether/calldata.rs index 1c43b5d5..bd559a07 100644 --- a/crates/common/src/ether/calldata.rs +++ b/crates/common/src/ether/calldata.rs @@ -7,7 +7,8 @@ use eyre::{bail, eyre, Result}; pub async fn get_calldata_from_target(target: &str, raw: bool, rpc_url: &str) -> Result> { // If the target is a transaction hash, fetch the calldata from the RPC provider. if let Ok(address) = target.parse::() { - // if raw is true, the user specified that the target is raw calldata. skip fetching the transaction. + // if raw is true, the user specified that the target is raw calldata. skip fetching the + // transaction. if !raw { return get_transaction(address, rpc_url) .await diff --git a/crates/common/src/utils/hex.rs b/crates/common/src/utils/hex.rs index b40edecf..67b0d473 100644 --- a/crates/common/src/utils/hex.rs +++ b/crates/common/src/utils/hex.rs @@ -1,5 +1,5 @@ use super::strings::encode_hex; -use alloy::primitives::{Address, Bytes, FixedBytes, I256, U256}; +use alloy::primitives::{Address, Bytes, FixedBytes, U256}; /// A convenience function which encodes a given EVM type into a sized, lowercase hex string. pub trait ToLowerHex { @@ -20,13 +20,7 @@ impl ToLowerHex for bytes::Bytes { impl ToLowerHex for U256 { fn to_lower_hex(&self) -> String { - format!("{:#032x}", self) - } -} - -impl ToLowerHex for I256 { - fn to_lower_hex(&self) -> String { - format!("{:#032x}", self) + encode_hex(&self.to_be_bytes_vec()) } } diff --git a/crates/common/src/utils/threading.rs b/crates/common/src/utils/threading.rs index 83763e3b..bbb1e947 100644 --- a/crates/common/src/utils/threading.rs +++ b/crates/common/src/utils/threading.rs @@ -35,7 +35,7 @@ pub fn task_pool< let mut handles = Vec::new(); // Split items into chunks for each thread to process - let chunk_size = (items.len() + num_threads - 1) / num_threads; + let chunk_size = items.len().div_ceil(num_threads); let chunks = items.chunks(chunk_size); // Share ownership of f across threads with Arc diff --git a/crates/core/benches/bench_cfg.rs b/crates/core/benches/bench_cfg.rs index f53e0e7a..e97ce26e 100644 --- a/crates/core/benches/bench_cfg.rs +++ b/crates/core/benches/bench_cfg.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use heimdall_cfg::{cfg, CFGArgsBuilder}; +use heimdall_cfg::{cfg, CfgArgsBuilder}; use tokio::runtime::Runtime; fn test_cfg(c: &mut Criterion) { @@ -17,10 +17,10 @@ fn test_cfg(c: &mut Criterion) { group.bench_with_input(BenchmarkId::from_parameter(name), &contract, |b, c| { b.to_async::(Runtime::new().unwrap()).iter(|| async { let start = std::time::Instant::now(); - let args = CFGArgsBuilder::new() + let args = CfgArgsBuilder::new() .target(c.to_string()) .build() - .expect("Failed to build CFGArgs"); + .expect("Failed to build CfgArgs"); let _ = cfg(args).await; start.elapsed() }); diff --git a/crates/core/tests/test_cfg.rs b/crates/core/tests/test_cfg.rs index 0ed78ba4..50a4adad 100644 --- a/crates/core/tests/test_cfg.rs +++ b/crates/core/tests/test_cfg.rs @@ -3,7 +3,7 @@ mod integration_tests { use memory_stats::memory_stats; use std::path::PathBuf; - use heimdall_cfg::{cfg, CFGArgs, CFGArgsBuilder}; + use heimdall_cfg::{cfg, CfgArgs, CfgArgsBuilder}; use petgraph::dot::Dot; use serde_json::Value; @@ -14,7 +14,7 @@ mod integration_tests { std::process::exit(0); }); - let result = heimdall_cfg::cfg(CFGArgs { + let result = heimdall_cfg::cfg(CfgArgs { target: String::from("0x1bf797219482a29013d804ad96d1c6f84fba4c45"), rpc_url, default: true, @@ -43,7 +43,7 @@ mod integration_tests { std::process::exit(0); }); - let result = heimdall_cfg::cfg(CFGArgs { + let result = heimdall_cfg::cfg(CfgArgs { target: String::from("0xE90d8Fb7B79C8930B5C8891e61c298b412a6e81a"), rpc_url, default: true, @@ -110,8 +110,8 @@ mod integration_tests { let mut fail_count = 0; for (contract_address, bytecode) in contracts { - println!("Generating CFG for contract {contract_address}"); - let args = CFGArgsBuilder::new() + println!("Generating Cfg for contract {contract_address}"); + let args = CfgArgsBuilder::new() .target(bytecode) .timeout(10000) .build() diff --git a/crates/decode/src/interfaces/args.rs b/crates/decode/src/interfaces/args.rs index b1c3a114..89eaffd1 100644 --- a/crates/decode/src/interfaces/args.rs +++ b/crates/decode/src/interfaces/args.rs @@ -44,7 +44,8 @@ pub struct DecodeArgs { #[clap(long = "skip-resolving")] pub skip_resolving: bool, - /// Whether to treat the target as a raw calldata string. Useful if the target is exactly 32 bytes. + /// Whether to treat the target as a raw calldata string. Useful if the target is exactly 32 + /// bytes. #[clap(long, short)] pub raw: bool, } diff --git a/crates/decompile/src/core/analyze.rs b/crates/decompile/src/core/analyze.rs index fcf51b8e..7bc62def 100644 --- a/crates/decompile/src/core/analyze.rs +++ b/crates/decompile/src/core/analyze.rs @@ -157,8 +157,8 @@ impl Analyzer { } // check if the ending brackets are needed - if analyzer_state.jumped_conditional.is_some() - && analyzer_state.conditional_stack.contains( + if analyzer_state.jumped_conditional.is_some() && + analyzer_state.conditional_stack.contains( analyzer_state .jumped_conditional .as_ref() @@ -167,8 +167,8 @@ impl Analyzer { { // remove the conditional for (i, conditional) in analyzer_state.conditional_stack.iter().enumerate() { - if conditional - == analyzer_state.jumped_conditional.as_ref().expect( + if conditional == + analyzer_state.jumped_conditional.as_ref().expect( "impossible case: should have short-circuited in previous conditional", ) { diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index 0e2fc2f0..41ab8be3 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -1,10 +1,9 @@ -use alloy::primitives::U256; -use eyre::eyre; use heimdall_common::utils::{hex::ToLowerHex, sync::blocking_await}; use heimdall_vm::{ core::{opcodes::opcode_name, vm::State}, w_gas, w_push0, }; +use tracing::trace; use crate::{ core::analyze::AnalyzerState, interfaces::AnalyzedFunction, @@ -24,11 +23,27 @@ pub fn extcall_heuristic( 0xf1 | 0xf2 => { let address = instruction.input_operations[1].solidify(); let memory = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); + let extcalldata = memory .iter() - .map(|x| x.value.to_lower_hex().trim_start_matches("0x").to_owned()) + .map(|x| x.value.to_lower_hex().to_owned()) .collect::>() .join(""); + let gas_solidified = instruction.input_operations[0].solidify(); + let value_solidified = instruction.input_operations[2].solidify(); + + // if gas is 2,300, this is a value transfer + if gas_solidified.contains("0x08fc") { + trace!( + "instruction {} ({}) with 2300 gas indicates a value transfer", + instruction.instruction, + opcode_name(instruction.opcode) + ); + function + .logic + .push(format!("address({}).transfer({});", address, value_solidified)); + return Ok(()); + } let extcalldata_clone = extcalldata.clone(); let decoded = blocking_await(move || { @@ -52,10 +67,18 @@ pub fn extcall_heuristic( // - if value is just the default (0), we don't need to include it let mut modifiers = vec![]; if instruction.input_operations[0] != w_gas!() { - modifiers.push(format!("gas: {}", instruction.input_operations[0].solidify())); + modifiers.push(format!("gas: {}", gas_solidified)); } if instruction.input_operations[2] != w_push0!() { - modifiers.push(format!("value: {}", instruction.input_operations[2].solidify())); + // if the value is just a hex string, we can parse it as ether for readability + if let Ok(value) = + u128::from_str_radix(value_solidified.trim_start_matches("0x"), 16) + { + let ether_value = value as f64 / 10_f64.powi(18); + modifiers.push(format!("value: {} ether", ether_value)); + } else { + modifiers.push(format!("value: {}", value_solidified)); + } } let modifier = if modifiers.is_empty() { "".to_string() @@ -68,21 +91,19 @@ pub fn extcall_heuristic( decode_precompile(instruction.inputs[1], &memory, &instruction.input_operations[5]) { function.logic.push(precompile_logic); + } else if let Some(decoded) = decoded { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); } else { - if let Some(decoded) = decoded { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", - address, - modifier, - decoded.decoded.name, - opcode_name(instruction.opcode).to_lowercase(), - )); - } else { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", - address, modifier, extcalldata - )); - } + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", + address, modifier, extcalldata + )); } } @@ -127,24 +148,22 @@ pub fn extcall_heuristic( decode_precompile(instruction.inputs[1], &memory, &instruction.input_operations[4]) { function.logic.push(precompile_logic); + } else if let Some(decoded) = decoded { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); } else { - if let Some(decoded) = decoded { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", - address, - modifier, - decoded.decoded.name, - opcode_name(instruction.opcode).to_lowercase(), - )); - } else { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", - address, - opcode_name(instruction.opcode).to_lowercase(), - modifier, - extcalldata - )); - } + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", + address, + opcode_name(instruction.opcode).to_lowercase(), + modifier, + extcalldata + )); } } diff --git a/crates/decompile/src/utils/heuristics/solidity.rs b/crates/decompile/src/utils/heuristics/solidity.rs index c508c861..21062d0d 100644 --- a/crates/decompile/src/utils/heuristics/solidity.rs +++ b/crates/decompile/src/utils/heuristics/solidity.rs @@ -96,10 +96,10 @@ pub fn solidity_heuristic( // perform a series of checks to determine if the condition // is added by the compiler and can be ignored - if (conditional.contains("msg.data.length") && conditional.contains("0x04")) - || VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) - || (conditional.replace('!', "") == "success") - || (conditional == "!msg.value") + if (conditional.contains("msg.data.length") && conditional.contains("0x04")) || + VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) || + (conditional.replace('!', "") == "success") || + (conditional == "!msg.value") { return Ok(()); } diff --git a/crates/decompile/src/utils/heuristics/yul.rs b/crates/decompile/src/utils/heuristics/yul.rs index 352f4eb4..ac498cfd 100644 --- a/crates/decompile/src/utils/heuristics/yul.rs +++ b/crates/decompile/src/utils/heuristics/yul.rs @@ -77,8 +77,8 @@ pub fn yul_heuristic( // CALLDATACOPY, CODECOPY, EXTCODECOPY, RETURNDATACOPY, TSTORE, // SSTORE, RETURN, SELFDESTRUCT, LOG0, LOG1, LOG2, LOG3, LOG4 // we simply want to add the operation to the function's logic - 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa - | 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { + 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa | + 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { function.logic.push(format!( "{}({})", opcode_name(instruction.opcode).to_lowercase(), diff --git a/crates/decompile/src/utils/postprocessors/arithmetic.rs b/crates/decompile/src/utils/postprocessors/arithmetic.rs index 8acc442d..4bb11ba0 100644 --- a/crates/decompile/src/utils/postprocessors/arithmetic.rs +++ b/crates/decompile/src/utils/postprocessors/arithmetic.rs @@ -71,7 +71,7 @@ pub fn simplify_parentheses(line: &str, paren_index: usize) -> Result Option { self.bytes.get_by_offset(byte) } diff --git a/crates/vm/src/core/stack.rs b/crates/vm/src/core/stack.rs index 594ac1b5..bb1ef77a 100644 --- a/crates/vm/src/core/stack.rs +++ b/crates/vm/src/core/stack.rs @@ -18,6 +18,7 @@ pub struct Stack { } /// The [`StackFrame`] struct represents a single frame on the stack. +/// /// It holds a [`U256`] value and the [`WrappedOpcode`] that pushed it onto the stack. \ /// \ /// By doing this, we can keep track of the source of each value on the stack in a recursive manner. diff --git a/examples/cfg/src/main.rs b/examples/cfg/src/main.rs index 66e7ac0f..48750964 100644 --- a/examples/cfg/src/main.rs +++ b/examples/cfg/src/main.rs @@ -1,15 +1,15 @@ -use heimdall_cfg::{cfg, CFGArgsBuilder}; +use heimdall_cfg::{cfg, CfgArgsBuilder}; #[tokio::main] async fn main() -> Result<(), Box> { - let args = CFGArgsBuilder::new() + let args = CfgArgsBuilder::new() .target("0x9f00c43700bc0000Ff91bE00841F8e04c0495000".to_string()) .rpc_url("https://eth.llamarpc.com".to_string()) .build()?; let result = cfg(args).await?; - println!("Contract CFG: {:#?}", result); + println!("Contract Cfg: {:#?}", result); Ok(()) } From 2b2ec279a9353bfd2a5ad898e17b26268195dbf8 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Fri, 15 Nov 2024 16:12:30 -0600 Subject: [PATCH 06/14] wip --- crates/decompile/src/utils/heuristics/extcall.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index 41ab8be3..3035d1ff 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -23,7 +23,6 @@ pub fn extcall_heuristic( 0xf1 | 0xf2 => { let address = instruction.input_operations[1].solidify(); let memory = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); - let extcalldata = memory .iter() .map(|x| x.value.to_lower_hex().to_owned()) @@ -101,8 +100,11 @@ pub fn extcall_heuristic( )); } else { function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", - address, modifier, extcalldata + "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(...); // {}", + address, + extcalldata.get(2..10).unwrap_or(""), + modifier, + opcode_name(instruction.opcode).to_lowercase(), )); } } @@ -158,11 +160,11 @@ pub fn extcall_heuristic( )); } else { function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", + "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(...); // {}", address, - opcode_name(instruction.opcode).to_lowercase(), + extcalldata.get(2..10).unwrap_or(""), modifier, - extcalldata + opcode_name(instruction.opcode).to_lowercase(), )); } } From 11fb8888345ebb4ab0996f1d1e10c33104bfa200 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Fri, 6 Dec 2024 12:51:35 -0500 Subject: [PATCH 07/14] wip --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4d9a8d0b..80751592 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -68,7 +68,7 @@ jobs: - uses: dtolnay/rust-toolchain@nightly with: components: rustfmt - - run: cargo fmt --check --all + - run: cargo +nightly fmt --check --all check: runs-on: ubuntu-latest From b87ab09c5299c2cf073419fd2fbb1ef4399adfde Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Fri, 6 Dec 2024 15:45:34 -0500 Subject: [PATCH 08/14] convert heuristics to async fns --- crates/common/src/utils/sync.rs | 4 + crates/decompile/src/core/analyze.rs | 82 ++-- crates/decompile/src/core/mod.rs | 17 +- .../src/utils/heuristics/arguments.rs | 438 ++++++++--------- .../decompile/src/utils/heuristics/events.rs | 116 ++--- .../decompile/src/utils/heuristics/extcall.rs | 301 ++++++------ crates/decompile/src/utils/heuristics/mod.rs | 24 +- .../src/utils/heuristics/modifiers.rs | 77 +-- .../src/utils/heuristics/solidity.rs | 440 +++++++++--------- crates/decompile/src/utils/heuristics/yul.rs | 152 +++--- 10 files changed, 857 insertions(+), 794 deletions(-) diff --git a/crates/common/src/utils/sync.rs b/crates/common/src/utils/sync.rs index 1e9bdb94..db307ee7 100644 --- a/crates/common/src/utils/sync.rs +++ b/crates/common/src/utils/sync.rs @@ -1,6 +1,10 @@ +use std::{future::Future, pin::Pin}; + /// Take in a non-async function and await it. This functions should be blocking. pub fn blocking_await(f: F) -> T where F: FnOnce() -> T, { tokio::task::block_in_place(f) } + +pub type BoxFuture<'a, T> = Pin + 'a>>; diff --git a/crates/decompile/src/core/analyze.rs b/crates/decompile/src/core/analyze.rs index 7bc62def..6178f1fd 100644 --- a/crates/decompile/src/core/analyze.rs +++ b/crates/decompile/src/core/analyze.rs @@ -1,5 +1,6 @@ use std::{fmt::Display, time::Instant}; +use futures::future::BoxFuture; use heimdall_vm::ext::exec::VMTrace; use tracing::debug; @@ -104,7 +105,7 @@ impl Analyzer { } /// Performs analysis - pub fn analyze(&mut self, trace_root: VMTrace) -> Result { + pub async fn analyze(&mut self, trace_root: VMTrace) -> Result { debug!( "analzying symbolic execution trace for '{}' with the {} analyzer", self.function.selector, self.typ @@ -123,7 +124,7 @@ impl Analyzer { }; // Perform analysis - self.analyze_inner(&trace_root, &mut analyzer_state)?; + self.analyze_inner(&trace_root, &mut analyzer_state).await?; debug!( "analysis for '{}' completed in {:?}", @@ -135,51 +136,52 @@ impl Analyzer { } /// Inner analysis implementation - fn analyze_inner( - &mut self, - branch: &VMTrace, - analyzer_state: &mut AnalyzerState, - ) -> Result<(), Error> { - // reset jumped conditional, we dont propagate conditionals across branches - analyzer_state.jumped_conditional = None; - - // for each operation in the current trace branch, peform analysis with registerred - // heuristics - for operation in &branch.operations { - for heuristic in &self.heuristics { - heuristic.run(&mut self.function, operation, analyzer_state)?; + fn analyze_inner<'a>( + &'a mut self, + branch: &'a VMTrace, + analyzer_state: &'a mut AnalyzerState, + ) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + // reset jumped conditional, we dont propagate conditionals across branches + analyzer_state.jumped_conditional = None; + + // for each operation in the current trace branch, peform analysis with registerred + // heuristics + for operation in &branch.operations { + for heuristic in &self.heuristics { + heuristic.run(&mut self.function, operation, analyzer_state).await?; + } } - } - // recurse into the children of the current trace branch - for child in &branch.children { - self.analyze_inner(child, analyzer_state)?; - } + // recurse into the children of the current trace branch + for child in &branch.children { + self.analyze_inner(child, analyzer_state).await?; + } - // check if the ending brackets are needed - if analyzer_state.jumped_conditional.is_some() && - analyzer_state.conditional_stack.contains( - analyzer_state - .jumped_conditional - .as_ref() - .expect("impossible case: should have short-circuited in previous conditional"), - ) - { - // remove the conditional - for (i, conditional) in analyzer_state.conditional_stack.iter().enumerate() { - if conditional == + // check if the ending brackets are needed + if analyzer_state.jumped_conditional.is_some() && + analyzer_state.conditional_stack.contains( analyzer_state.jumped_conditional.as_ref().expect( "impossible case: should have short-circuited in previous conditional", - ) - { - analyzer_state.conditional_stack.remove(i); - break; + ), + ) + { + // remove the conditional + for (i, conditional) in analyzer_state.conditional_stack.iter().enumerate() { + if conditional == + analyzer_state.jumped_conditional.as_ref().expect( + "impossible case: should have short-circuited in previous conditional", + ) + { + analyzer_state.conditional_stack.remove(i); + break; + } } - } - self.function.logic.push("}".to_string()); - } + self.function.logic.push("}".to_string()); + } - Ok(()) + Ok(()) + }) } } diff --git a/crates/decompile/src/core/mod.rs b/crates/decompile/src/core/mod.rs index ae0cff47..4234fcc1 100644 --- a/crates/decompile/src/core/mod.rs +++ b/crates/decompile/src/core/mod.rs @@ -156,21 +156,21 @@ pub async fn decompile(args: DecompilerArgs) -> Result { info!("symbolically executed {} selectors", symbolic_execution_maps.len()); let start_analysis_time = Instant::now(); - let mut analyzed_functions = symbolic_execution_maps - .into_iter() - .map(|(selector, trace_root)| { + let handles = symbolic_execution_maps.into_iter().map(|(selector, trace_root)| { + let mut evm_clone = evm.clone(); + async move { let mut analyzer = Analyzer::new( analyzer_type, AnalyzedFunction::new(&selector, selector == "fallback"), ); // analyze the symbolic execution trace - let mut analyzed_function = analyzer.analyze(trace_root)?; + let mut analyzed_function = analyzer.analyze(trace_root).await?; // if the function is constant, we can get the exact val if analyzed_function.is_constant() && !analyzed_function.fallback { - evm.reset(); - let x = evm.call(&decode_hex(&selector).expect("invalid selector"), 0)?; + evm_clone.reset(); + let x = evm_clone.call(&decode_hex(&selector).expect("invalid selector"), 0)?; let returns_param_type = analyzed_function .returns @@ -192,8 +192,9 @@ pub async fn decompile(args: DecompilerArgs) -> Result { } Ok::<_, Error>(analyzed_function) - }) - .collect::, Error>>()?; + } + }); + let mut analyzed_functions = futures::future::try_join_all(handles).await?; debug!("analyzing symbolic execution results took {:?}", start_analysis_time.elapsed()); info!("analyzed {} symbolic execution traces", analyzed_functions.len()); diff --git a/crates/decompile/src/utils/heuristics/arguments.rs b/crates/decompile/src/utils/heuristics/arguments.rs index 4f14fe25..54a5aa82 100644 --- a/crates/decompile/src/utils/heuristics/arguments.rs +++ b/crates/decompile/src/utils/heuristics/arguments.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use hashbrown::HashSet; use alloy::primitives::U256; @@ -17,152 +18,150 @@ use crate::{ Error, }; -pub fn argument_heuristic( - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, -) -> Result<(), Error> { - match state.last_instruction.opcode { - // CALLDATALOAD - 0x35 => { - // calculate the argument index, with the 4byte signature padding removed - // for example, CALLDATALOAD(4) -> (4-4)/32 = 0 - // CALLDATALOAD(36) -> (36-4)/32 = 1 - let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) / - U256::from(32)) - .try_into() - .unwrap_or(usize::MAX); +pub fn argument_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + match state.last_instruction.opcode { + // CALLDATALOAD + 0x35 => { + // calculate the argument index, with the 4byte signature padding removed + // for example, CALLDATALOAD(4) -> (4-4)/32 = 0 + // CALLDATALOAD(36) -> (36-4)/32 = 1 + let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) / + U256::from(32)) + .try_into() + .unwrap_or(usize::MAX); - // insert only if this argument is not already in the hashmap - function.arguments.entry(arg_index).or_insert_with(|| { - debug!( - "discovered new argument at index {} from CALLDATALOAD({})", - arg_index, state.last_instruction.inputs[0] - ); - CalldataFrame { - arg_op: state.last_instruction.input_operations[0].to_string(), - mask_size: 32, // init to 32 because all CALLDATALOADs are 32 bytes - heuristics: HashSet::new(), - } - }); - } - - // CALLDATACOPY - 0x37 => { - // TODO: implement CALLDATACOPY support - trace!("CALLDATACOPY detected; not implemented"); - } + // insert only if this argument is not already in the hashmap + function.arguments.entry(arg_index).or_insert_with(|| { + debug!( + "discovered new argument at index {} from CALLDATALOAD({})", + arg_index, state.last_instruction.inputs[0] + ); + CalldataFrame { + arg_op: state.last_instruction.input_operations[0].to_string(), + mask_size: 32, // init to 32 because all CALLDATALOADs are 32 bytes + heuristics: HashSet::new(), + } + }); + } - // AND | OR - 0x16 | 0x17 => { - // if this is a bitwise mask operation on CALLDATALOAD, we can use it to determine the - // size (and consequently type) of the variable - if let Some(calldataload_op) = - state.last_instruction.input_operations.iter().find(|op| op.opcode == CALLDATALOAD) - { - // this is a bitwise mask, we can use it to determine the size of the variable - let (mask_size_bytes, _potential_types) = convert_bitmask(&state.last_instruction); + // CALLDATACOPY + 0x37 => { + // TODO: implement CALLDATACOPY support + trace!("CALLDATACOPY detected; not implemented"); + } - // yulify the calldataload operation, and find the associated argument index - // this MUST exist, as we have already inserted it in the CALLDATALOAD heuristic - let arg_op = calldataload_op.inputs[0].to_string(); - if let Some((arg_index, frame)) = - function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op) + // AND | OR + 0x16 | 0x17 => { + // if this is a bitwise mask operation on CALLDATALOAD, we can use it to determine + // the size (and consequently type) of the variable + if let Some(calldataload_op) = state + .last_instruction + .input_operations + .iter() + .find(|op| op.opcode == CALLDATALOAD) { - debug!( - "instruction {} ({}) indicates argument {} is masked to {} bytes", - state.last_instruction.instruction, - opcode_name(state.last_instruction.opcode), - arg_index, - mask_size_bytes - ); + // this is a bitwise mask, we can use it to determine the size of the variable + let (mask_size_bytes, _potential_types) = + convert_bitmask(&state.last_instruction); + + // yulify the calldataload operation, and find the associated argument index + // this MUST exist, as we have already inserted it in the CALLDATALOAD heuristic + let arg_op = calldataload_op.inputs[0].to_string(); + if let Some((arg_index, frame)) = + function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op) + { + debug!( + "instruction {} ({}) indicates argument {} is masked to {} bytes", + state.last_instruction.instruction, + opcode_name(state.last_instruction.opcode), + arg_index, + mask_size_bytes + ); - frame.mask_size = mask_size_bytes; + frame.mask_size = mask_size_bytes; + } } } - } - // RETURN - 0xf3 => { - // Safely convert U256 to usize - let size: usize = state.last_instruction.inputs[1].try_into().unwrap_or(0); + // RETURN + 0xf3 => { + // Safely convert U256 to usize + let size: usize = state.last_instruction.inputs[1].try_into().unwrap_or(0); - let return_memory_operations = function.get_memory_range( - state.last_instruction.inputs[0], - state.last_instruction.inputs[1], - ); - let return_memory_operations_solidified = return_memory_operations - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", "); + let return_memory_operations = function.get_memory_range( + state.last_instruction.inputs[0], + state.last_instruction.inputs[1], + ); + let return_memory_operations_solidified = return_memory_operations + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", "); - // add the return statement to the function logic - if analyzer_state.analyzer_type == AnalyzerType::Solidity { - if return_memory_operations.len() <= 1 { - function.logic.push(format!("return {return_memory_operations_solidified};")); - } else { + // add the return statement to the function logic + if analyzer_state.analyzer_type == AnalyzerType::Solidity { + if return_memory_operations.len() <= 1 { + function + .logic + .push(format!("return {return_memory_operations_solidified};")); + } else { + function.logic.push(format!( + "return abi.encodePacked({return_memory_operations_solidified});" + )); + } + } else if analyzer_state.analyzer_type == AnalyzerType::Yul { function.logic.push(format!( - "return abi.encodePacked({return_memory_operations_solidified});" + "return({}, {})", + state.last_instruction.input_operations[0].yulify(), + state.last_instruction.input_operations[1].yulify() )); } - } else if analyzer_state.analyzer_type == AnalyzerType::Yul { - function.logic.push(format!( - "return({}, {})", - state.last_instruction.input_operations[0].yulify(), - state.last_instruction.input_operations[1].yulify() - )); - } - // if we've already determined a return type, we don't want to do it again. - // we use bytes32 as a default return type - if function.returns.is_some() && function.returns.as_deref() != Some("bytes32") { - return Ok(()); - } + // if we've already determined a return type, we don't want to do it again. + // we use bytes32 as a default return type + if function.returns.is_some() && function.returns.as_deref() != Some("bytes32") { + return Ok(()); + } - // if the any input op is ISZERO(x), this is a boolean return - if return_memory_operations.iter().any(|x| x.operation.opcode == ISZERO) { - function.returns = Some(String::from("bool")); - } - // if the input op is any of the following, it is a uint256 return - // this is because these push numeric values onto the stack - else if return_memory_operations.iter().any(|x| { - [0x31, 0x34, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x58, 0x5a] - .contains(&x.operation.opcode) - }) { - function.returns = Some(String::from("uint256")); - } - // if the input op is any of the following, it is an address return - // this is because these push address values onto the stack - else if return_memory_operations - .iter() - .any(|x| [0x30, 0x32, 0x33, 0x41].contains(&x.operation.opcode)) - { - function.returns = Some(String::from("address")); - } - // if the size of returndata is > 32, it must be a bytes or string return. - else if size > 32 { - // some hardcoded function selectors where the return type is known to be a string - if ["06fdde03", "95d89b41", "6a98de4c", "9d2b0822", "1a0d4bca"] - .contains(&function.selector.as_str()) - { - function.returns = Some(String::from("string memory")); - } else { - function.returns = Some(String::from("bytes memory")); + // if the any input op is ISZERO(x), this is a boolean return + if return_memory_operations.iter().any(|x| x.operation.opcode == ISZERO) { + function.returns = Some(String::from("bool")); } - } else { - // attempt to find a return type within the return memory operations - let byte_size = match AND_BITMASK_REGEX - .find(&return_memory_operations_solidified) - .ok() - .flatten() + // if the input op is any of the following, it is a uint256 return + // this is because these push numeric values onto the stack + else if return_memory_operations.iter().any(|x| { + [0x31, 0x34, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x58, 0x5a] + .contains(&x.operation.opcode) + }) { + function.returns = Some(String::from("uint256")); + } + // if the input op is any of the following, it is an address return + // this is because these push address values onto the stack + else if return_memory_operations + .iter() + .any(|x| [0x30, 0x32, 0x33, 0x41].contains(&x.operation.opcode)) { - Some(bitmask) => { - let cast = bitmask.as_str(); - - cast.matches("ff").count() + function.returns = Some(String::from("address")); + } + // if the size of returndata is > 32, it must be a bytes or string return. + else if size > 32 { + // some hardcoded function selectors where the return type is known to be a + // string + if ["06fdde03", "95d89b41", "6a98de4c", "9d2b0822", "1a0d4bca"] + .contains(&function.selector.as_str()) + { + function.returns = Some(String::from("string memory")); + } else { + function.returns = Some(String::from("bytes memory")); } - None => match AND_BITMASK_REGEX_2 + } else { + // attempt to find a return type within the return memory operations + let byte_size = match AND_BITMASK_REGEX .find(&return_memory_operations_solidified) .ok() .flatten() @@ -172,109 +171,130 @@ pub fn argument_heuristic( cast.matches("ff").count() } - None => 32, - }, - }; + None => match AND_BITMASK_REGEX_2 + .find(&return_memory_operations_solidified) + .ok() + .flatten() + { + Some(bitmask) => { + let cast = bitmask.as_str(); - // convert the cast size to a string - let (_, cast_types) = byte_size_to_type(byte_size); - function.returns = Some(cast_types[0].to_string()); - } - - // check if this is a state getter - if function.arguments.is_empty() { - if let Some(storage_access) = - STORAGE_ACCESS_REGEX.find(&return_memory_operations_solidified).unwrap_or(None) - { - let storage_access = storage_access.as_str(); - let access_range = find_balanced_encapsulator(storage_access, ('[', ']')) - .map_err(|e| eyre!("failed to find access range: {e}"))?; + cast.matches("ff").count() + } + None => 32, + }, + }; - function.maybe_getter_for = - Some(format!("storage[{}]", &storage_access[access_range])); + // convert the cast size to a string + let (_, cast_types) = byte_size_to_type(byte_size); + function.returns = Some(cast_types[0].to_string()); } - } - debug!( - "return type determined to be '{:?}' from ops '{}'", - function.returns, return_memory_operations_solidified - ); - } + // check if this is a state getter + if function.arguments.is_empty() { + if let Some(storage_access) = STORAGE_ACCESS_REGEX + .find(&return_memory_operations_solidified) + .unwrap_or(None) + { + let storage_access = storage_access.as_str(); + let access_range = + find_balanced_encapsulator(storage_access, ('[', ']')) + .map_err(|e| eyre!("failed to find access range: {e}"))?; + + function.maybe_getter_for = + Some(format!("storage[{}]", &storage_access[access_range])); + } + } - // integer type heuristics - 0x02 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0b | 0x10 | 0x11 | 0x12 | 0x13 => { - // check if this instruction is operating on a known argument. - // if it is, add 'integer' to the list of heuristics - // TODO: we probably want to use an enum for heuristics - if let Some((arg_index, frame)) = function.arguments.iter_mut().find(|(_, frame)| { - state - .last_instruction - .output_operations - .iter() - .any(|operation| operation.to_string().contains(frame.arg_op.as_str())) - }) { debug!( - "instruction {} ({}) indicates argument {} may be a numeric type", - state.last_instruction.instruction, - opcode_name(state.last_instruction.opcode), - arg_index + "return type determined to be '{:?}' from ops '{}'", + function.returns, return_memory_operations_solidified ); - - frame.heuristics.insert(TypeHeuristic::Numeric); } - } - // bytes type heuristics - 0x18 | 0x1a | 0x1b | 0x1c | 0x1d | 0x20 => { - // check if this instruction is operating on a known argument. - // if it is, add 'bytes' to the list of heuristics - // TODO: we probably want to use an enum for heuristics - if let Some((arg_index, frame)) = function.arguments.iter_mut().find(|(_, frame)| { - state - .last_instruction - .output_operations - .iter() - .any(|operation| operation.to_string().contains(frame.arg_op.as_str())) - }) { - debug!( - "instruction {} ({}) indicates argument {} may be a bytes type", - state.last_instruction.instruction, - opcode_name(state.last_instruction.opcode), - arg_index - ); + // integer type heuristics + 0x02 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0b | 0x10 | 0x11 | 0x12 | 0x13 => { + // check if this instruction is operating on a known argument. + // if it is, add 'integer' to the list of heuristics + // TODO: we probably want to use an enum for heuristics + if let Some((arg_index, frame)) = + function.arguments.iter_mut().find(|(_, frame)| { + state + .last_instruction + .output_operations + .iter() + .any(|operation| operation.to_string().contains(frame.arg_op.as_str())) + }) + { + debug!( + "instruction {} ({}) indicates argument {} may be a numeric type", + state.last_instruction.instruction, + opcode_name(state.last_instruction.opcode), + arg_index + ); - frame.heuristics.insert(TypeHeuristic::Bytes); + frame.heuristics.insert(TypeHeuristic::Numeric); + } } - } - // boolean type heuristics - 0x15 => { - // if this is a boolean check on CALLDATALOAD, we can add boolean to the potential types - if let Some(calldataload_op) = - state.last_instruction.input_operations.iter().find(|op| op.opcode == CALLDATALOAD) - { - // yulify the calldataload operation, and find the associated argument index - // this MUST exist, as we have already inserted it in the CALLDATALOAD heuristic - let arg_op = calldataload_op.inputs[0].to_string(); + // bytes type heuristics + 0x18 | 0x1a | 0x1b | 0x1c | 0x1d | 0x20 => { + // check if this instruction is operating on a known argument. + // if it is, add 'bytes' to the list of heuristics + // TODO: we probably want to use an enum for heuristics if let Some((arg_index, frame)) = - function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op) + function.arguments.iter_mut().find(|(_, frame)| { + state + .last_instruction + .output_operations + .iter() + .any(|operation| operation.to_string().contains(frame.arg_op.as_str())) + }) { debug!( - "instruction {} ({}) indicates argument {} may be a boolean", + "instruction {} ({}) indicates argument {} may be a bytes type", state.last_instruction.instruction, opcode_name(state.last_instruction.opcode), arg_index ); - // NOTE: we don't want to update mask_size here, as we are only adding potential - // types - frame.heuristics.insert(TypeHeuristic::Boolean); + frame.heuristics.insert(TypeHeuristic::Bytes); + } + } + + // boolean type heuristics + 0x15 => { + // if this is a boolean check on CALLDATALOAD, we can add boolean to the potential + // types + if let Some(calldataload_op) = state + .last_instruction + .input_operations + .iter() + .find(|op| op.opcode == CALLDATALOAD) + { + // yulify the calldataload operation, and find the associated argument index + // this MUST exist, as we have already inserted it in the CALLDATALOAD heuristic + let arg_op = calldataload_op.inputs[0].to_string(); + if let Some((arg_index, frame)) = + function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op) + { + debug!( + "instruction {} ({}) indicates argument {} may be a boolean", + state.last_instruction.instruction, + opcode_name(state.last_instruction.opcode), + arg_index + ); + + // NOTE: we don't want to update mask_size here, as we are only adding + // potential types + frame.heuristics.insert(TypeHeuristic::Boolean); + } } } - } - _ => {} - }; + _ => {} + }; - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/heuristics/events.rs b/crates/decompile/src/utils/heuristics/events.rs index 3d630fcc..29676b68 100644 --- a/crates/decompile/src/utils/heuristics/events.rs +++ b/crates/decompile/src/utils/heuristics/events.rs @@ -1,5 +1,6 @@ use alloy::primitives::U256; use eyre::OptionExt; +use futures::future::BoxFuture; use heimdall_common::utils::hex::ToLowerHex; use heimdall_vm::core::vm::State; @@ -9,64 +10,71 @@ use crate::{ Error, }; -pub fn event_heuristic( - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, -) -> Result<(), Error> { - if (0xA0..=0xA4).contains(&state.last_instruction.opcode) { - // this should be the last event in state - let event = state.events.last().ok_or_eyre("no events in state")?; - let selector = event.topics.first().unwrap_or(&U256::ZERO).to_owned(); - let anonymous = selector == U256::ZERO; +pub fn event_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + if (0xA0..=0xA4).contains(&state.last_instruction.opcode) { + // this should be the last event in state + let event = state.events.last().ok_or_eyre("no events in state")?; + let selector = event.topics.first().unwrap_or(&U256::ZERO).to_owned(); + let anonymous = selector == U256::ZERO; - // insert this selector into events - function.events.insert(selector); + // insert this selector into events + function.events.insert(selector); - // decode the data field - let data_mem_ops = function - .get_memory_range(state.last_instruction.inputs[0], state.last_instruction.inputs[1]); - let data_mem_ops_solidified = - data_mem_ops.iter().map(|x| x.operation.solidify()).collect::>().join(", "); + // decode the data field + let data_mem_ops = function.get_memory_range( + state.last_instruction.inputs[0], + state.last_instruction.inputs[1], + ); + let data_mem_ops_solidified = data_mem_ops + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", "); - // add the event emission to the function's logic - if analyzer_state.analyzer_type == AnalyzerType::Solidity { - function.logic.push(format!( - "emit Event_{}({}{});{}", - &event - .topics - .first() - .unwrap_or(&U256::from(0)) - .to_lower_hex() - .replacen("0x", "", 1)[0..8], - event - .topics - .get(1..) - .map(|topics| { - if !event.data.is_empty() && !topics.is_empty() { - let mut solidified_topics: Vec = Vec::new(); - for (i, _) in topics.iter().enumerate() { - solidified_topics.push( - state.last_instruction.input_operations[i + 3].solidify(), - ); + // add the event emission to the function's logic + if analyzer_state.analyzer_type == AnalyzerType::Solidity { + function.logic.push(format!( + "emit Event_{}({}{});{}", + &event + .topics + .first() + .unwrap_or(&U256::from(0)) + .to_lower_hex() + .replacen("0x", "", 1)[0..8], + event + .topics + .get(1..) + .map(|topics| { + if !event.data.is_empty() && !topics.is_empty() { + let mut solidified_topics: Vec = Vec::new(); + for (i, _) in topics.iter().enumerate() { + solidified_topics.push( + state.last_instruction.input_operations[i + 3].solidify(), + ); + } + format!("{}, ", solidified_topics.join(", ")) + } else { + let mut solidified_topics: Vec = Vec::new(); + for (i, _) in topics.iter().enumerate() { + solidified_topics.push( + state.last_instruction.input_operations[i + 3].solidify(), + ); + } + solidified_topics.join(", ") } - format!("{}, ", solidified_topics.join(", ")) - } else { - let mut solidified_topics: Vec = Vec::new(); - for (i, _) in topics.iter().enumerate() { - solidified_topics.push( - state.last_instruction.input_operations[i + 3].solidify(), - ); - } - solidified_topics.join(", ") - } - }) - .unwrap_or("".to_string()), - data_mem_ops_solidified, - if anonymous { " // anonymous event" } else { "" } - )); + }) + .unwrap_or("".to_string()), + data_mem_ops_solidified, + if anonymous { " // anonymous event" } else { "" } + )); + } } - } - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index 3035d1ff..5921d5ab 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use heimdall_common::utils::{hex::ToLowerHex, sync::blocking_await}; use heimdall_vm::{ core::{opcodes::opcode_name, vm::State}, @@ -11,168 +12,180 @@ use crate::{ }; use heimdall_decoder::{decode, DecodeArgsBuilder}; -pub fn extcall_heuristic( - function: &mut AnalyzedFunction, - state: &State, - _: &mut AnalyzerState, -) -> Result<(), Error> { - let instruction = &state.last_instruction; - - match instruction.opcode { - // CALL / CALLCODE - 0xf1 | 0xf2 => { - let address = instruction.input_operations[1].solidify(); - let memory = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); - let extcalldata = memory - .iter() - .map(|x| x.value.to_lower_hex().to_owned()) - .collect::>() - .join(""); - let gas_solidified = instruction.input_operations[0].solidify(); - let value_solidified = instruction.input_operations[2].solidify(); - - // if gas is 2,300, this is a value transfer - if gas_solidified.contains("0x08fc") { - trace!( - "instruction {} ({}) with 2300 gas indicates a value transfer", - instruction.instruction, - opcode_name(instruction.opcode) - ); - function - .logic - .push(format!("address({}).transfer({});", address, value_solidified)); - return Ok(()); - } +pub fn extcall_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + _: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + let instruction = &state.last_instruction; + + match instruction.opcode { + // CALL / CALLCODE + 0xf1 | 0xf2 => { + let address = instruction.input_operations[1].solidify(); + let memory = + function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); + let extcalldata = memory + .iter() + .map(|x| x.value.to_lower_hex().to_owned()) + .collect::>() + .join(""); + let gas_solidified = instruction.input_operations[0].solidify(); + let value_solidified = instruction.input_operations[2].solidify(); + + // if gas is 2,300, this is a value transfer + if gas_solidified.contains("0x08fc") { + trace!( + "instruction {} ({}) with 2300 gas indicates a value transfer", + instruction.instruction, + opcode_name(instruction.opcode) + ); + function + .logic + .push(format!("address({}).transfer({});", address, value_solidified)); + return Ok(()); + } - let extcalldata_clone = extcalldata.clone(); - let decoded = blocking_await(move || { - let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); - - rt.block_on(async { - decode( - DecodeArgsBuilder::new() - .target(extcalldata_clone) - .raw(true) - .build() - .expect("Failed to build DecodeArgs"), - ) - .await + let extcalldata_clone = extcalldata.clone(); + let decoded = blocking_await(move || { + let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); + + rt.block_on(async { + decode( + DecodeArgsBuilder::new() + .target(extcalldata_clone) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await + }) }) - }) - .ok(); - - // build modifiers - // - if gas is just the default (GAS()), we don't need to include it - // - if value is just the default (0), we don't need to include it - let mut modifiers = vec![]; - if instruction.input_operations[0] != w_gas!() { - modifiers.push(format!("gas: {}", gas_solidified)); - } - if instruction.input_operations[2] != w_push0!() { - // if the value is just a hex string, we can parse it as ether for readability - if let Ok(value) = - u128::from_str_radix(value_solidified.trim_start_matches("0x"), 16) - { - let ether_value = value as f64 / 10_f64.powi(18); - modifiers.push(format!("value: {} ether", ether_value)); - } else { - modifiers.push(format!("value: {}", value_solidified)); + .ok(); + + // build modifiers + // - if gas is just the default (GAS()), we don't need to include it + // - if value is just the default (0), we don't need to include it + let mut modifiers = vec![]; + if instruction.input_operations[0] != w_gas!() { + modifiers.push(format!("gas: {}", gas_solidified)); } - } - let modifier = if modifiers.is_empty() { - "".to_string() - } else { - format!("{{ {} }}", modifiers.join(", ")) - }; - - // check if the external call is a precompiled contract - if let Some(precompile_logic) = - decode_precompile(instruction.inputs[1], &memory, &instruction.input_operations[5]) - { - function.logic.push(precompile_logic); - } else if let Some(decoded) = decoded { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", - address, - modifier, - decoded.decoded.name, - opcode_name(instruction.opcode).to_lowercase(), - )); - } else { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(...); // {}", + if instruction.input_operations[2] != w_push0!() { + // if the value is just a hex string, we can parse it as ether for readability + if let Ok(value) = + u128::from_str_radix(value_solidified.trim_start_matches("0x"), 16) + { + let ether_value = value as f64 / 10_f64.powi(18); + modifiers.push(format!("value: {} ether", ether_value)); + } else { + modifiers.push(format!("value: {}", value_solidified)); + } + } + let modifier = if modifiers.is_empty() { + "".to_string() + } else { + format!("{{ {} }}", modifiers.join(", ")) + }; + + // check if the external call is a precompiled contract + if let Some(precompile_logic) = decode_precompile( + instruction.inputs[1], + &memory, + &instruction.input_operations[5], + ) { + function.logic.push(precompile_logic); + } else if let Some(decoded) = decoded { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); + } else { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(msg.data[{}:{}]); // {}", address, extcalldata.get(2..10).unwrap_or(""), modifier, + instruction.input_operations[3].solidify(), + instruction.input_operations[4].solidify(), opcode_name(instruction.opcode).to_lowercase(), )); + } } - } - - // STATICCALL / DELEGATECALL - 0xfa | 0xf4 => { - let gas = format!("gas: {}", instruction.input_operations[0].solidify()); - let address = instruction.input_operations[1].solidify(); - let memory = function.get_memory_range(instruction.inputs[2], instruction.inputs[3]); - let extcalldata = memory - .iter() - .map(|x| x.value.to_lower_hex().trim_start_matches("0x").to_owned()) - .collect::>() - .join(""); - - let extcalldata_clone = extcalldata.clone(); - let decoded = blocking_await(move || { - let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); - - rt.block_on(async { - decode( - DecodeArgsBuilder::new() - .target(extcalldata_clone) - .raw(true) - .build() - .expect("Failed to build DecodeArgs"), - ) - .await + + // STATICCALL / DELEGATECALL + 0xfa | 0xf4 => { + let gas = format!("gas: {}", instruction.input_operations[0].solidify()); + let address = instruction.input_operations[1].solidify(); + let memory = + function.get_memory_range(instruction.inputs[2], instruction.inputs[3]); + let extcalldata = memory + .iter() + .map(|x| x.value.to_lower_hex().trim_start_matches("0x").to_owned()) + .collect::>() + .join(""); + + let extcalldata_clone = extcalldata.clone(); + let decoded = blocking_await(move || { + let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); + + rt.block_on(async { + decode( + DecodeArgsBuilder::new() + .target(extcalldata_clone) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await + }) }) - }) - .ok(); - - // build the modifier w/ gas - // if the modifier is just the default (GAS()), we don't need to include it - let modifier = if instruction.input_operations[0] != w_gas!() { - format!("{{ {} }}", gas) - } else { - "".to_string() - }; - - // check if the external call is a precompiled contract - if let Some(precompile_logic) = - decode_precompile(instruction.inputs[1], &memory, &instruction.input_operations[4]) - { - function.logic.push(precompile_logic); - } else if let Some(decoded) = decoded { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", - address, - modifier, - decoded.decoded.name, - opcode_name(instruction.opcode).to_lowercase(), - )); - } else { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(...); // {}", + .ok(); + + // build the modifier w/ gas + // if the modifier is just the default (GAS()), we don't need to include it + let modifier = if instruction.input_operations[0] != w_gas!() { + format!("{{ {} }}", gas) + } else { + "".to_string() + }; + + // check if the external call is a precompiled contract + if let Some(precompile_logic) = decode_precompile( + instruction.inputs[1], + &memory, + &instruction.input_operations[4], + ) { + function.logic.push(precompile_logic); + } else if let Some(decoded) = decoded { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); + } else { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(msg.data[{}:{}]); // {}", address, extcalldata.get(2..10).unwrap_or(""), modifier, + instruction.input_operations[2].solidify(), + instruction.input_operations[3].solidify(), opcode_name(instruction.opcode).to_lowercase(), )); + } } - } - _ => {} - }; + _ => {} + }; - Ok(()) + Ok(()) + }) } // TODO: handle skip_resolving (need to fix in inspect mod too) diff --git a/crates/decompile/src/utils/heuristics/mod.rs b/crates/decompile/src/utils/heuristics/mod.rs index 413e0409..604ec163 100644 --- a/crates/decompile/src/utils/heuristics/mod.rs +++ b/crates/decompile/src/utils/heuristics/mod.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use heimdall_vm::core::vm::State; use crate::{core::analyze::AnalyzerState, interfaces::AnalyzedFunction, Error}; @@ -20,24 +21,27 @@ pub use yul::yul_heuristic; /// A heuristic is a function that takes a function and a state and modifies the function based on /// the state +type HeuristicFn = for<'a> fn( + &'a mut AnalyzedFunction, + &'a State, + &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>>; + pub(crate) struct Heuristic { - implementation: fn(&mut AnalyzedFunction, &State, &mut AnalyzerState) -> Result<(), Error>, + implementation: HeuristicFn, } impl Heuristic { - pub fn new( - implementation: fn(&mut AnalyzedFunction, &State, &mut AnalyzerState) -> Result<(), Error>, - ) -> Self { + pub fn new(implementation: HeuristicFn) -> Self { Self { implementation } } - /// Run the heuristic implementation on the given state - pub fn run( + pub async fn run<'a>( &self, - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, ) -> Result<(), Error> { - (self.implementation)(function, state, analyzer_state) + (self.implementation)(function, state, analyzer_state).await } } diff --git a/crates/decompile/src/utils/heuristics/modifiers.rs b/crates/decompile/src/utils/heuristics/modifiers.rs index 074ff11f..118a9ef4 100644 --- a/crates/decompile/src/utils/heuristics/modifiers.rs +++ b/crates/decompile/src/utils/heuristics/modifiers.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use heimdall_vm::{ core::{ opcodes::{OpCodeInfo, JUMPI}, @@ -9,45 +10,47 @@ use tracing::debug; use crate::{core::analyze::AnalyzerState, interfaces::AnalyzedFunction, Error}; -pub fn modifier_heuristic( - function: &mut AnalyzedFunction, - state: &State, - _: &mut AnalyzerState, -) -> Result<(), Error> { - let opcode_info = OpCodeInfo::from(state.last_instruction.opcode); +pub fn modifier_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + _: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + let opcode_info = OpCodeInfo::from(state.last_instruction.opcode); - // if any instruction is non-pure, the function is non-pure - if function.pure && !opcode_info.is_pure() { - debug!( - "instruction {} ({}) indicates a non-pure function", - state.last_instruction.instruction, - opcode_info.name() - ); - function.pure = false; - } + // if any instruction is non-pure, the function is non-pure + if function.pure && !opcode_info.is_pure() { + debug!( + "instruction {} ({}) indicates a non-pure function", + state.last_instruction.instruction, + opcode_info.name() + ); + function.pure = false; + } - // if any instruction is non-view, the function is non-view - if function.view && !opcode_info.is_view() { - debug!( - "instruction {} ({}) indicates a non-view function", - state.last_instruction.instruction, - opcode_info.name() - ); - function.view = false; - } + // if any instruction is non-view, the function is non-view + if function.view && !opcode_info.is_view() { + debug!( + "instruction {} ({}) indicates a non-view function", + state.last_instruction.instruction, + opcode_info.name() + ); + function.view = false; + } - // if the instruction is a JUMPI with non-zero CALLVALUE requirement, the function is - // non-payable exactly: ISZERO(CALLVALUE()) - if function.payable && - state.last_instruction.opcode == JUMPI && - state.last_instruction.input_operations[1] == w_iszero!(w_callvalue!()) - { - debug!( - "conditional at instruction {} indicates a non-payable function", - state.last_instruction.instruction - ); - function.payable = false; - } + // if the instruction is a JUMPI with non-zero CALLVALUE requirement, the function is + // non-payable exactly: ISZERO(CALLVALUE()) + if function.payable && + state.last_instruction.opcode == JUMPI && + state.last_instruction.input_operations[1] == w_iszero!(w_callvalue!()) + { + debug!( + "conditional at instruction {} indicates a non-payable function", + state.last_instruction.instruction + ); + function.payable = false; + } - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/heuristics/solidity.rs b/crates/decompile/src/utils/heuristics/solidity.rs index 21062d0d..d2707861 100644 --- a/crates/decompile/src/utils/heuristics/solidity.rs +++ b/crates/decompile/src/utils/heuristics/solidity.rs @@ -1,5 +1,6 @@ use alloy::primitives::U256; use alloy_dyn_abi::{DynSolType, DynSolValue}; +use futures::future::BoxFuture; use heimdall_common::utils::strings::encode_hex_reduced; use heimdall_vm::core::vm::State; @@ -10,239 +11,242 @@ use crate::{ Error, }; -pub fn solidity_heuristic( - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, -) -> Result<(), Error> { - let instruction = &state.last_instruction; - - match instruction.opcode { - // CALLDATACOPY - 0x37 => { - let memory_offset = &instruction.input_operations[0]; - let source_offset = instruction.inputs[1]; - let size_bytes = instruction.inputs[2]; - - // add the mstore to the function's memory map - function.logic.push(format!( - "memory[{}] = msg.data[{}:{}];", - memory_offset.solidify(), - source_offset, - source_offset.saturating_add(size_bytes) - )); - } - - // CODECOPY - 0x39 => { - let memory_offset = &instruction.input_operations[0]; - let source_offset = instruction.inputs[1]; - let size_bytes = instruction.inputs[2]; - - // add the mstore to the function's memory map - function.logic.push(format!( - "memory[{}] = this.code[{}:{}];", - memory_offset.solidify(), - source_offset, - source_offset.saturating_add(size_bytes) - )); - } - - // EXTCODECOPY - 0x3C => { - let address = &instruction.input_operations[0]; - let memory_offset = &instruction.input_operations[1]; - let source_offset = instruction.inputs[2]; - let size_bytes = instruction.inputs[3]; - - // add the mstore to the function's memory map - function.logic.push(format!( - "memory[{}] = address({}).code[{}:{}]", - memory_offset.solidify(), - address.solidify(), - source_offset, - source_offset.saturating_add(size_bytes) - )); - } - - // MSTORE / MSTORE8 - 0x52 | 0x53 => { - let key = instruction.inputs[0]; - let value = instruction.inputs[1]; - let operation = instruction.input_operations[1].to_owned(); - - // add the mstore to the function's memory map - function.memory.insert(key, StorageFrame { operation, value }); - function.logic.push(format!( - "memory[{}] = {};", - encode_hex_reduced(key), - instruction.input_operations[1].solidify() - )); - } - - // SSTORE - 0x55 => { - function.logic.push(format!( - "storage[{}] = {};", - instruction.input_operations[0].solidify(), - instruction.input_operations[1].solidify(), - )); - } - - // JUMPI - 0x57 => { - // this is an if conditional for the children branches - let conditional = instruction.input_operations[1].solidify(); - - // perform a series of checks to determine if the condition - // is added by the compiler and can be ignored - if (conditional.contains("msg.data.length") && conditional.contains("0x04")) || - VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) || - (conditional.replace('!', "") == "success") || - (conditional == "!msg.value") - { - return Ok(()); +pub fn solidity_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + let instruction = &state.last_instruction; + + match instruction.opcode { + // CALLDATACOPY + 0x37 => { + let memory_offset = &instruction.input_operations[0]; + let source_offset = instruction.inputs[1]; + let size_bytes = instruction.inputs[2]; + + // add the mstore to the function's memory map + function.logic.push(format!( + "memory[{}] = msg.data[{}:{}];", + memory_offset.solidify(), + source_offset, + source_offset.saturating_add(size_bytes) + )); } - function.logic.push(format!("if ({conditional}) {{").to_string()); - - // save a copy of the conditional and add it to the conditional map - analyzer_state.jumped_conditional = Some(conditional.clone()); - analyzer_state.conditional_stack.push(conditional); - } - - // TSTORE - 0x5d => { - function.logic.push(format!( - "transient[{}] = {};", - instruction.input_operations[0].solidify(), - instruction.input_operations[1].solidify(), - )); - } - - // CREATE / CREATE2 - 0xf0 | 0xf5 => { - function.logic.push(format!( - "assembly {{ addr := create({}) }}", - instruction - .input_operations - .iter() - .map(|x| x.solidify()) - .collect::>() - .join(", ") - )); - } - - // REVERT - 0xfd => { - // Safely convert U256 to usize - let offset: usize = instruction.inputs[0].try_into().unwrap_or(0); - let size: usize = instruction.inputs[1].try_into().unwrap_or(0); - let revert_data = state.memory.read(offset, size); - - // (1) if revert_data starts with 0x08c379a0, the folling is an error string - // abiencoded (2) if revert_data starts with 0x4e487b71, the - // following is a compiler panic (3) if revert_data starts with any - // other 4byte selector, it is a custom error and should - // be resolved and added to the generated ABI - // (4) if revert_data is empty, it is an empty revert. Ex: - // - if (true != false) { revert() }; - // - require(true != false) - let revert_logic; - - // handle case with error string abiencoded - if revert_data.starts_with(&[0x08, 0xc3, 0x79, 0xa0]) { - let revert_string = match revert_data.get(4..) { - Some(hex_data) => match DynSolType::String.abi_decode(hex_data) { - Ok(revert) => match revert { - DynSolValue::String(revert) => revert, - _ => "decoding error".to_string(), + // CODECOPY + 0x39 => { + let memory_offset = &instruction.input_operations[0]; + let source_offset = instruction.inputs[1]; + let size_bytes = instruction.inputs[2]; + + // add the mstore to the function's memory map + function.logic.push(format!( + "memory[{}] = this.code[{}:{}];", + memory_offset.solidify(), + source_offset, + source_offset.saturating_add(size_bytes) + )); + } + + // EXTCODECOPY + 0x3C => { + let address = &instruction.input_operations[0]; + let memory_offset = &instruction.input_operations[1]; + let source_offset = instruction.inputs[2]; + let size_bytes = instruction.inputs[3]; + + // add the mstore to the function's memory map + function.logic.push(format!( + "memory[{}] = address({}).code[{}:{}]", + memory_offset.solidify(), + address.solidify(), + source_offset, + source_offset.saturating_add(size_bytes) + )); + } + + // MSTORE / MSTORE8 + 0x52 | 0x53 => { + let key = instruction.inputs[0]; + let value = instruction.inputs[1]; + let operation = instruction.input_operations[1].to_owned(); + + // add the mstore to the function's memory map + function.memory.insert(key, StorageFrame { operation, value }); + function.logic.push(format!( + "memory[{}] = {};", + encode_hex_reduced(key), + instruction.input_operations[1].solidify() + )); + } + + // SSTORE + 0x55 => { + function.logic.push(format!( + "storage[{}] = {};", + instruction.input_operations[0].solidify(), + instruction.input_operations[1].solidify(), + )); + } + + // JUMPI + 0x57 => { + // this is an if conditional for the children branches + let conditional = instruction.input_operations[1].solidify(); + + // perform a series of checks to determine if the condition + // is added by the compiler and can be ignored + if (conditional.contains("msg.data.length") && conditional.contains("0x04")) || + VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) || + (conditional.replace('!', "") == "success") || + (conditional == "!msg.value") + { + return Ok(()); + } + + function.logic.push(format!("if ({conditional}) {{").to_string()); + + // save a copy of the conditional and add it to the conditional map + analyzer_state.jumped_conditional = Some(conditional.clone()); + analyzer_state.conditional_stack.push(conditional); + } + + // TSTORE + 0x5d => { + function.logic.push(format!( + "transient[{}] = {};", + instruction.input_operations[0].solidify(), + instruction.input_operations[1].solidify(), + )); + } + + // CREATE / CREATE2 + 0xf0 | 0xf5 => { + function.logic.push(format!( + "assembly {{ addr := create({}) }}", + instruction + .input_operations + .iter() + .map(|x| x.solidify()) + .collect::>() + .join(", ") + )); + } + + // REVERT + 0xfd => { + // Safely convert U256 to usize + let offset: usize = instruction.inputs[0].try_into().unwrap_or(0); + let size: usize = instruction.inputs[1].try_into().unwrap_or(0); + let revert_data = state.memory.read(offset, size); + + // (1) if revert_data starts with 0x08c379a0, the folling is an error string + // abiencoded (2) if revert_data starts with 0x4e487b71, the + // following is a compiler panic (3) if revert_data starts with any + // other 4byte selector, it is a custom error and should + // be resolved and added to the generated ABI + // (4) if revert_data is empty, it is an empty revert. Ex: + // - if (true != false) { revert() }; + // - require(true != false) + let revert_logic; + + // handle case with error string abiencoded + if revert_data.starts_with(&[0x08, 0xc3, 0x79, 0xa0]) { + let revert_string = match revert_data.get(4..) { + Some(hex_data) => match DynSolType::String.abi_decode(hex_data) { + Ok(revert) => match revert { + DynSolValue::String(revert) => revert, + _ => "decoding error".to_string(), + }, + Err(_) => "decoding error".to_string(), }, - Err(_) => "decoding error".to_string(), - }, - None => "decoding error".to_string(), - }; - revert_logic = match analyzer_state.jumped_conditional.clone() { - Some(condition) => { - analyzer_state.jumped_conditional = None; - format!("require({condition}, \"{revert_string}\");") - } - None => { - // loop backwards through logic to find the last IF statement - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - let conditional = match analyzer_state.conditional_stack.pop() { - Some(condition) => condition, - None => break, - }; - - function.logic[i] = - format!("require({conditional}, \"{revert_string}\");"); + None => "decoding error".to_string(), + }; + revert_logic = match analyzer_state.jumped_conditional.clone() { + Some(condition) => { + analyzer_state.jumped_conditional = None; + format!("require({condition}, \"{revert_string}\");") + } + None => { + // loop backwards through logic to find the last IF statement + for i in (0..function.logic.len()).rev() { + if function.logic[i].starts_with("if") { + let conditional = match analyzer_state.conditional_stack.pop() { + Some(condition) => condition, + None => break, + }; + + function.logic[i] = + format!("require({conditional}, \"{revert_string}\");"); + } } + return Ok(()); } - return Ok(()); } } - } - // handle case with custom error OR empty revert - else if !revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { - let custom_error_placeholder = match revert_data.get(0..4) { - Some(selector) => { - function.errors.insert(U256::from_be_slice(selector)); - format!( - "CustomError_{}()", - encode_hex_reduced(U256::from_be_slice(selector)).replacen("0x", "", 1) - ) - } - None => "()".to_string(), - }; - - revert_logic = match analyzer_state.jumped_conditional.clone() { - Some(condition) => { - analyzer_state.jumped_conditional = None; - if custom_error_placeholder == *"()" { - format!("require({condition});",) - } else { - format!("require({condition}, {custom_error_placeholder});") + // handle case with custom error OR empty revert + else if !revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { + let custom_error_placeholder = match revert_data.get(0..4) { + Some(selector) => { + function.errors.insert(U256::from_be_slice(selector)); + format!( + "CustomError_{}()", + encode_hex_reduced(U256::from_be_slice(selector)) + .replacen("0x", "", 1) + ) } - } - None => { - // loop backwards through logic to find the last IF statement - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - let conditional = match analyzer_state.conditional_stack.pop() { - Some(condition) => condition, - None => break, - }; - - if custom_error_placeholder == *"()" { - function.logic[i] = format!("require({conditional});",); - } else { - function.logic[i] = format!( - "require({conditional}, {custom_error_placeholder});" - ); + None => "()".to_string(), + }; + + revert_logic = match analyzer_state.jumped_conditional.clone() { + Some(condition) => { + analyzer_state.jumped_conditional = None; + if custom_error_placeholder == *"()" { + format!("require({condition});",) + } else { + format!("require({condition}, {custom_error_placeholder});") + } + } + None => { + // loop backwards through logic to find the last IF statement + for i in (0..function.logic.len()).rev() { + if function.logic[i].starts_with("if") { + let conditional = match analyzer_state.conditional_stack.pop() { + Some(condition) => condition, + None => break, + }; + + if custom_error_placeholder == *"()" { + function.logic[i] = format!("require({conditional});",); + } else { + function.logic[i] = format!( + "require({conditional}, {custom_error_placeholder});" + ); + } } } + return Ok(()); } - return Ok(()); } + } else { + return Ok(()); } - } else { - return Ok(()); - } - function.logic.push(revert_logic); - } + function.logic.push(revert_logic); + } - // SELFDESTRUCT - 0xff => { - function - .logic - .push(format!("selfdestruct({});", instruction.input_operations[0].solidify())); - } + // SELFDESTRUCT + 0xff => { + function + .logic + .push(format!("selfdestruct({});", instruction.input_operations[0].solidify())); + } - _ => {} - }; + _ => {} + }; - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/heuristics/yul.rs b/crates/decompile/src/utils/heuristics/yul.rs index ac498cfd..ba845089 100644 --- a/crates/decompile/src/utils/heuristics/yul.rs +++ b/crates/decompile/src/utils/heuristics/yul.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use heimdall_common::utils::strings::encode_hex_reduced; use heimdall_vm::core::{opcodes::opcode_name, vm::State}; @@ -7,92 +8,95 @@ use crate::{ Error, }; -pub fn yul_heuristic( - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, -) -> Result<(), Error> { - let instruction = &state.last_instruction; +pub fn yul_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + let instruction = &state.last_instruction; - match instruction.opcode { - // MSTORE / MSTORE8 - 0x52 | 0x53 => { - let key = instruction.inputs[0]; - let value = instruction.inputs[1]; - let operation = instruction.input_operations[1].clone(); + match instruction.opcode { + // MSTORE / MSTORE8 + 0x52 | 0x53 => { + let key = instruction.inputs[0]; + let value = instruction.inputs[1]; + let operation = instruction.input_operations[1].clone(); - // add the mstore to the function's memory map - function.memory.insert(key, StorageFrame { operation, value }); - function.logic.push(format!( - "{}({}, {})", - opcode_name(instruction.opcode).to_lowercase(), - encode_hex_reduced(key), - instruction.input_operations[1].yulify() - )); - } + // add the mstore to the function's memory map + function.memory.insert(key, StorageFrame { operation, value }); + function.logic.push(format!( + "{}({}, {})", + opcode_name(instruction.opcode).to_lowercase(), + encode_hex_reduced(key), + instruction.input_operations[1].yulify() + )); + } - // JUMPI - 0x57 => { - let conditional = instruction.input_operations[1].yulify(); + // JUMPI + 0x57 => { + let conditional = instruction.input_operations[1].yulify(); - function.logic.push(format!("if {conditional} {{").to_string()); - analyzer_state.jumped_conditional = Some(conditional.clone()); - analyzer_state.conditional_stack.push(conditional); - } + function.logic.push(format!("if {conditional} {{").to_string()); + analyzer_state.jumped_conditional = Some(conditional.clone()); + analyzer_state.conditional_stack.push(conditional); + } - // REVERT - 0xfd => { - let revert_data = state.memory.read( - instruction.inputs[0].try_into().unwrap_or(0), - instruction.inputs[1].try_into().unwrap_or(0), - ); + // REVERT + 0xfd => { + let revert_data = state.memory.read( + instruction.inputs[0].try_into().unwrap_or(0), + instruction.inputs[1].try_into().unwrap_or(0), + ); - // ignore compiler panics, we will reach these due to symbolic execution - if revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { - return Ok(()); - } + // ignore compiler panics, we will reach these due to symbolic execution + if revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { + return Ok(()); + } - // find the if statement that caused this revert, and update it to include the revert - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - // get matching conditional - let conditional = function.logic[i].split("if ").collect::>()[1] - .split(" {") - .collect::>()[0] - .to_string(); + // find the if statement that caused this revert, and update it to include the + // revert + for i in (0..function.logic.len()).rev() { + if function.logic[i].starts_with("if") { + // get matching conditional + let conditional = function.logic[i].split("if ").collect::>()[1] + .split(" {") + .collect::>()[0] + .to_string(); - // we can negate the conditional to get the revert logic - function.logic[i] = format!( - "if {conditional} {{ revert({}, {}); }} else {{", - instruction.input_operations[0].yulify(), - instruction.input_operations[1].yulify() - ); + // we can negate the conditional to get the revert logic + function.logic[i] = format!( + "if {conditional} {{ revert({}, {}); }} else {{", + instruction.input_operations[0].yulify(), + instruction.input_operations[1].yulify() + ); - break; + break; + } } } - } - // STATICCALL, CALL, CALLCODE, DELEGATECALL, CREATE, CREATE2 - // CALLDATACOPY, CODECOPY, EXTCODECOPY, RETURNDATACOPY, TSTORE, - // SSTORE, RETURN, SELFDESTRUCT, LOG0, LOG1, LOG2, LOG3, LOG4 - // we simply want to add the operation to the function's logic - 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa | - 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { - function.logic.push(format!( - "{}({})", - opcode_name(instruction.opcode).to_lowercase(), - instruction - .input_operations - .iter() - .map(|x| x.yulify()) - .collect::>() - .join(", ") - )); - } + // STATICCALL, CALL, CALLCODE, DELEGATECALL, CREATE, CREATE2 + // CALLDATACOPY, CODECOPY, EXTCODECOPY, RETURNDATACOPY, TSTORE, + // SSTORE, RETURN, SELFDESTRUCT, LOG0, LOG1, LOG2, LOG3, LOG4 + // we simply want to add the operation to the function's logic + 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa | + 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { + function.logic.push(format!( + "{}({})", + opcode_name(instruction.opcode).to_lowercase(), + instruction + .input_operations + .iter() + .map(|x| x.yulify()) + .collect::>() + .join(", ") + )); + } - _ => {} - }; + _ => {} + }; - Ok(()) + Ok(()) + }) } From 9a6aceb0379da881532e285420248fffb5eded51 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Fri, 6 Dec 2024 16:02:15 -0500 Subject: [PATCH 09/14] no need to block because async now --- .../decompile/src/utils/heuristics/extcall.rs | 46 +++++++------------ 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index 5921d5ab..a0280869 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -1,5 +1,5 @@ use futures::future::BoxFuture; -use heimdall_common::utils::{hex::ToLowerHex, sync::blocking_await}; +use heimdall_common::utils::hex::ToLowerHex; use heimdall_vm::{ core::{opcodes::opcode_name, vm::State}, w_gas, w_push0, @@ -48,20 +48,14 @@ pub fn extcall_heuristic<'a>( } let extcalldata_clone = extcalldata.clone(); - let decoded = blocking_await(move || { - let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); - - rt.block_on(async { - decode( - DecodeArgsBuilder::new() - .target(extcalldata_clone) - .raw(true) - .build() - .expect("Failed to build DecodeArgs"), - ) - .await - }) - }) + let decoded = decode( + DecodeArgsBuilder::new() + .target(extcalldata_clone) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await .ok(); // build modifiers @@ -129,20 +123,14 @@ pub fn extcall_heuristic<'a>( .join(""); let extcalldata_clone = extcalldata.clone(); - let decoded = blocking_await(move || { - let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); - - rt.block_on(async { - decode( - DecodeArgsBuilder::new() - .target(extcalldata_clone) - .raw(true) - .build() - .expect("Failed to build DecodeArgs"), - ) - .await - }) - }) + let decoded = decode( + DecodeArgsBuilder::new() + .target(extcalldata_clone) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await .ok(); // build the modifier w/ gas From c21281893bc76e82b0d4aa689751343a50c01cea Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Fri, 6 Dec 2024 18:51:53 -0500 Subject: [PATCH 10/14] wip --- crates/core/tests/test_decompile.rs | 3 +- crates/decompile/src/core/out/source.rs | 58 ++++++++++--------- .../decompile/src/utils/heuristics/extcall.rs | 19 +++++- 3 files changed, 50 insertions(+), 30 deletions(-) diff --git a/crates/core/tests/test_decompile.rs b/crates/core/tests/test_decompile.rs index 8827777a..50db61b2 100644 --- a/crates/core/tests/test_decompile.rs +++ b/crates/core/tests/test_decompile.rs @@ -168,7 +168,8 @@ mod integration_tests { include_yul: false, output: String::from(""), name: String::from(""), - timeout: 10000, abi: None, + timeout: 10000, + abi: None, }) .await diff --git a/crates/decompile/src/core/out/source.rs b/crates/decompile/src/core/out/source.rs index cc915795..44162aa9 100644 --- a/crates/decompile/src/core/out/source.rs +++ b/crates/decompile/src/core/out/source.rs @@ -67,9 +67,9 @@ pub fn build_source( functions .iter() .filter(|f| { - !f.fallback && - (analyzer_type == AnalyzerType::Yul || - (f.maybe_getter_for.is_none() && !f.is_constant())) + !f.fallback + && (analyzer_type == AnalyzerType::Yul + || (f.maybe_getter_for.is_none() && !f.is_constant())) }) .for_each(|f| { let mut function_source = Vec::new(); @@ -174,29 +174,35 @@ fn get_function_header(f: &AnalyzedFunction) -> Vec { None => format!("Unresolved_{}", f.selector), }; - let function_signature = format!( - "{}({}) {}", - function_name, - f.sorted_arguments() - .iter() - .enumerate() - .map(|(i, (_, arg))| { - format!( - "{} arg{i}", - match f.resolved_function { - Some(ref sig) => sig.inputs()[i].to_string(), - None => arg - .potential_types() - .first() - .unwrap_or(&"bytes32".to_string()) - .to_string(), - } - ) - }) - .collect::>() - .join(", "), - function_modifiers.join(" ") - ); + let function_signature = match f.resolved_function { + Some(ref sig) => format!( + "{}({}) {}", + function_name, + sig.inputs() + .iter() + .enumerate() + .map(|(i, arg)| { format!("{} arg{i}", arg.to_string()) }) + .collect::>() + .join(", "), + function_modifiers.join(" ") + ), + None => format!( + "{}({}) {}", + function_name, + f.sorted_arguments() + .iter() + .enumerate() + .map(|(i, (_, arg))| { + format!( + "{} arg{i}", + arg.potential_types().first().unwrap_or(&"bytes32".to_string()).to_string() + ) + }) + .collect::>() + .join(", "), + function_modifiers.join(" ") + ), + }; match f.analyzer_type { AnalyzerType::Solidity => { diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index a0280869..9dcd2fd6 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -41,9 +41,22 @@ pub fn extcall_heuristic<'a>( instruction.instruction, opcode_name(instruction.opcode) ); - function - .logic - .push(format!("address({}).transfer({});", address, value_solidified)); + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).transfer({});", + address, value_solidified + )); + return Ok(()); + } + if extcalldata.is_empty() { + trace!( + "instruction {} ({}) with no calldata indicates a value transfer", + instruction.instruction, + opcode_name(instruction.opcode) + ); + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).transfer({});", + address, value_solidified + )); return Ok(()); } From 26b3fe671b7e4ba1796ff4b673a064a2ab060290 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Sat, 7 Dec 2024 11:48:02 -0500 Subject: [PATCH 11/14] wip --- crates/decompile/src/utils/heuristics/extcall.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index 9dcd2fd6..d7a7a654 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -163,10 +163,11 @@ pub fn extcall_heuristic<'a>( function.logic.push(precompile_logic); } else if let Some(decoded) = decoded { function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + "(bool success, bytes memory ret0) = address({}).{}{}({}); // {}", address, modifier, decoded.decoded.name, + decoded.decoded.inputs.join(", "), opcode_name(instruction.opcode).to_lowercase(), )); } else { From bf84317c3cc7d7c35ec247ff942dedbdb9693049 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Sat, 7 Dec 2024 14:04:16 -0500 Subject: [PATCH 12/14] wip --- crates/decompile/src/core/postprocess.rs | 4 ++-- .../decompile/src/utils/heuristics/extcall.rs | 23 ++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/crates/decompile/src/core/postprocess.rs b/crates/decompile/src/core/postprocess.rs index 6219dd26..fff35e49 100644 --- a/crates/decompile/src/core/postprocess.rs +++ b/crates/decompile/src/core/postprocess.rs @@ -130,8 +130,8 @@ impl PostprocessOrchestrator { // Note: this can't be done with a postprocessor because it needs all lines if !function.payable && (function.pure || function.view) && function.arguments.is_empty() { // check for RLP encoding. very naive check, but it works for now - if function.logic.iter().any(|line| line.contains("0x0100 *")) && - function.logic.iter().any(|line| line.contains("0x01) &")) + if function.logic.iter().any(|line| line.contains("0x0100 *")) + && function.logic.iter().any(|line| line.contains("0x01) &")) { // find any storage accesses let joined = function.logic.join(" "); diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index d7a7a654..168f179d 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -1,5 +1,8 @@ +use std::fmt::format; + +use alloy::primitives::U256; use futures::future::BoxFuture; -use heimdall_common::utils::hex::ToLowerHex; +use heimdall_common::utils::{hex::ToLowerHex, strings::encode_hex_reduced}; use heimdall_vm::{ core::{opcodes::opcode_name, vm::State}, w_gas, w_push0, @@ -162,17 +165,31 @@ pub fn extcall_heuristic<'a>( ) { function.logic.push(precompile_logic); } else if let Some(decoded) = decoded { + let start_slot = instruction.inputs[2] + U256::from(4); + function.logic.push(format!( "(bool success, bytes memory ret0) = address({}).{}{}({}); // {}", address, modifier, decoded.decoded.name, - decoded.decoded.inputs.join(", "), + decoded + .decoded + .inputs + .iter() + .enumerate() + .map(|(i, _)| { + format!( + "memory[{}]", + encode_hex_reduced(start_slot + U256::from(i * 32)) + ) + }) + .collect::>() + .join(", "), opcode_name(instruction.opcode).to_lowercase(), )); } else { function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(msg.data[{}:{}]); // {}", + "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(memory[{}:{}]); // {}", address, extcalldata.get(2..10).unwrap_or(""), modifier, From 523ca3e56197c2c7eb47b119ee9ad05bf6fe1b0d Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Sat, 7 Dec 2024 14:07:47 -0500 Subject: [PATCH 13/14] wip --- .../decompile/src/utils/heuristics/extcall.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index 168f179d..f4961519 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -106,11 +106,26 @@ pub fn extcall_heuristic<'a>( ) { function.logic.push(precompile_logic); } else if let Some(decoded) = decoded { + let start_slot = instruction.inputs[3] + U256::from(4); + function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + "(bool success, bytes memory ret0) = address({}).{}{}({}); // {}", address, modifier, decoded.decoded.name, + decoded + .decoded + .inputs + .iter() + .enumerate() + .map(|(i, _)| { + format!( + "memory[{}]", + encode_hex_reduced(start_slot + U256::from(i * 32)) + ) + }) + .collect::>() + .join(", "), opcode_name(instruction.opcode).to_lowercase(), )); } else { From a702a09aadbbf1b5469dfdec3eefee4b734768e9 Mon Sep 17 00:00:00 2001 From: Jon-Becker Date: Sat, 7 Dec 2024 14:10:35 -0500 Subject: [PATCH 14/14] wip --- crates/decompile/src/core/out/source.rs | 8 ++++---- crates/decompile/src/core/postprocess.rs | 4 ++-- crates/decompile/src/utils/heuristics/extcall.rs | 2 -- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/decompile/src/core/out/source.rs b/crates/decompile/src/core/out/source.rs index 44162aa9..573a3134 100644 --- a/crates/decompile/src/core/out/source.rs +++ b/crates/decompile/src/core/out/source.rs @@ -67,9 +67,9 @@ pub fn build_source( functions .iter() .filter(|f| { - !f.fallback - && (analyzer_type == AnalyzerType::Yul - || (f.maybe_getter_for.is_none() && !f.is_constant())) + !f.fallback && + (analyzer_type == AnalyzerType::Yul || + (f.maybe_getter_for.is_none() && !f.is_constant())) }) .for_each(|f| { let mut function_source = Vec::new(); @@ -195,7 +195,7 @@ fn get_function_header(f: &AnalyzedFunction) -> Vec { .map(|(i, (_, arg))| { format!( "{} arg{i}", - arg.potential_types().first().unwrap_or(&"bytes32".to_string()).to_string() + arg.potential_types().first().unwrap_or(&"bytes32".to_string()) ) }) .collect::>() diff --git a/crates/decompile/src/core/postprocess.rs b/crates/decompile/src/core/postprocess.rs index fff35e49..6219dd26 100644 --- a/crates/decompile/src/core/postprocess.rs +++ b/crates/decompile/src/core/postprocess.rs @@ -130,8 +130,8 @@ impl PostprocessOrchestrator { // Note: this can't be done with a postprocessor because it needs all lines if !function.payable && (function.pure || function.view) && function.arguments.is_empty() { // check for RLP encoding. very naive check, but it works for now - if function.logic.iter().any(|line| line.contains("0x0100 *")) - && function.logic.iter().any(|line| line.contains("0x01) &")) + if function.logic.iter().any(|line| line.contains("0x0100 *")) && + function.logic.iter().any(|line| line.contains("0x01) &")) { // find any storage accesses let joined = function.logic.join(" "); diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index f4961519..b4c51305 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -1,5 +1,3 @@ -use std::fmt::format; - use alloy::primitives::U256; use futures::future::BoxFuture; use heimdall_common::utils::{hex::ToLowerHex, strings::encode_hex_reduced};