diff --git a/Cargo.lock b/Cargo.lock index c675b131..ac0397e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2212,6 +2212,7 @@ dependencies = [ "serde", "serde_json", "thiserror", + "tokio", "tracing", ] diff --git a/crates/common/src/ether/calldata.rs b/crates/common/src/ether/calldata.rs index 80cc2cf3..1c43b5d5 100644 --- a/crates/common/src/ether/calldata.rs +++ b/crates/common/src/ether/calldata.rs @@ -4,13 +4,16 @@ use alloy::primitives::TxHash; use eyre::{bail, eyre, Result}; /// Given a target, return calldata of the target. -pub async fn get_calldata_from_target(target: &str, rpc_url: &str) -> Result> { +pub async fn get_calldata_from_target(target: &str, raw: bool, rpc_url: &str) -> Result> { // If the target is a transaction hash, fetch the calldata from the RPC provider. if let Ok(address) = target.parse::() { - return get_transaction(address, rpc_url) - .await - .map(|tx| tx.input.to_vec()) - .map_err(|_| eyre!("failed to fetch transaction from RPC provider")); + // if raw is true, the user specified that the target is raw calldata. skip fetching the transaction. + if !raw { + return get_transaction(address, rpc_url) + .await + .map(|tx| tx.input.to_vec()) + .map_err(|_| eyre!("failed to fetch transaction from RPC provider")); + } } // If the target is not a transaction hash, it could be calldata. @@ -34,6 +37,7 @@ mod tests { let calldata = get_calldata_from_target( "0x317907eeece00619fd4418c18a4ec4ebe5c87cdbff808f4b01cc2c6384799837", + false, &rpc_url, ) .await @@ -51,6 +55,7 @@ mod tests { let calldata = get_calldata_from_target( "0xf14fcbc8bf9eac48d61719f80efb268ef1099a248fa332ed639041337954647ec6583f2e", + false, &rpc_url, ) .await @@ -66,10 +71,31 @@ mod tests { std::process::exit(0); }); - let calldata = - get_calldata_from_target("asfnsdalkfasdlfnlasdkfnalkdsfndaskljfnasldkjfnasf", &rpc_url) - .await; + let calldata = get_calldata_from_target( + "asfnsdalkfasdlfnlasdkfnalkdsfndaskljfnasldkjfnasf", + false, + &rpc_url, + ) + .await; assert!(calldata.is_err()); } + + #[tokio::test] + async fn test_get_calldata_when_target_is_calldata_that_is_exactly_32_bytes() { + let rpc_url = std::env::var("RPC_URL").unwrap_or_else(|_| { + println!("RPC_URL not set, skipping test"); + std::process::exit(0); + }); + + let calldata = get_calldata_from_target( + "0x317907eeece00619fd4418c18a4ec4ebe5c87cdbff808f4b01cc2c6384799837", + true, + &rpc_url, + ) + .await + .expect("failed to get calldata from target"); + + assert!(calldata.len() == 32); + } } diff --git a/crates/core/tests/test_decode.rs b/crates/core/tests/test_decode.rs index a8338347..c59da7eb 100644 --- a/crates/core/tests/test_decode.rs +++ b/crates/core/tests/test_decode.rs @@ -14,6 +14,7 @@ mod integration_tests { constructor: false, truncate_calldata: false, skip_resolving: false, + raw: false, }; let _ = heimdall_decoder::decode(args).await; } @@ -29,6 +30,7 @@ mod integration_tests { constructor: false, truncate_calldata: false, skip_resolving: false, + raw: false, }; let _ = heimdall_decoder::decode(args).await; } diff --git a/crates/decode/src/interfaces/args.rs b/crates/decode/src/interfaces/args.rs index e13c05bd..b1c3a114 100644 --- a/crates/decode/src/interfaces/args.rs +++ b/crates/decode/src/interfaces/args.rs @@ -43,11 +43,15 @@ pub struct DecodeArgs { /// Whether to skip resolving selectors. Heimdall will attempt to guess types. #[clap(long = "skip-resolving")] pub skip_resolving: bool, + + /// Whether to treat the target as a raw calldata string. Useful if the target is exactly 32 bytes. + #[clap(long, short)] + pub raw: bool, } impl DecodeArgs { pub async fn get_calldata(&self) -> Result> { - get_calldata_from_target(&self.target, &self.rpc_url).await + get_calldata_from_target(&self.target, self.raw, &self.rpc_url).await } } @@ -62,6 +66,7 @@ impl DecodeArgsBuilder { constructor: Some(false), truncate_calldata: Some(false), skip_resolving: Some(false), + raw: Some(false), } } } diff --git a/crates/decompile/Cargo.toml b/crates/decompile/Cargo.toml index e5b2d504..eb5815a5 100644 --- a/crates/decompile/Cargo.toml +++ b/crates/decompile/Cargo.toml @@ -30,8 +30,13 @@ fancy-regex = "0.11.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" alloy-dyn-abi = "0.8.3" -alloy = { version = "0.3.3", features = ["full", "rpc-types-debug", "rpc-types-trace"] } +alloy = { version = "0.3.3", features = [ + "full", + "rpc-types-debug", + "rpc-types-trace", +] } hashbrown = "0.14.5" +tokio = { version = "1", features = ["full"] } heimdall-disassembler.workspace = true heimdall-vm.workspace = true diff --git a/crates/decompile/src/core/analyze.rs b/crates/decompile/src/core/analyze.rs index 7bc62def..fcf51b8e 100644 --- a/crates/decompile/src/core/analyze.rs +++ b/crates/decompile/src/core/analyze.rs @@ -157,8 +157,8 @@ impl Analyzer { } // check if the ending brackets are needed - if analyzer_state.jumped_conditional.is_some() && - analyzer_state.conditional_stack.contains( + if analyzer_state.jumped_conditional.is_some() + && analyzer_state.conditional_stack.contains( analyzer_state .jumped_conditional .as_ref() @@ -167,8 +167,8 @@ impl Analyzer { { // remove the conditional for (i, conditional) in analyzer_state.conditional_stack.iter().enumerate() { - if conditional == - analyzer_state.jumped_conditional.as_ref().expect( + if conditional + == analyzer_state.jumped_conditional.as_ref().expect( "impossible case: should have short-circuited in previous conditional", ) { diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs index f24d51e0..29a20a06 100644 --- a/crates/decompile/src/utils/heuristics/extcall.rs +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -1,29 +1,28 @@ use alloy::primitives::U256; -use alloy_dyn_abi::{DynSolType, DynSolValue}; -use heimdall_common::utils::{ - hex::ToLowerHex, - strings::{encode_hex, encode_hex_reduced}, +use eyre::eyre; +use heimdall_common::utils::{hex::ToLowerHex, sync::blocking_await}; +use heimdall_vm::{ + core::{opcodes::opcode_name, vm::State}, + w_gas, }; -use heimdall_vm::core::{opcodes::opcode_name, vm::State}; use crate::{ core::analyze::AnalyzerState, interfaces::AnalyzedFunction, utils::precompile::decode_precompile, Error, }; +use heimdall_decoder::{decode, DecodeArgsBuilder}; pub fn extcall_heuristic( function: &mut AnalyzedFunction, state: &State, - analyzer_state: &mut AnalyzerState, + _: &mut AnalyzerState, ) -> Result<(), Error> { let instruction = &state.last_instruction; match instruction.opcode { // CALL / CALLCODE 0xf1 | 0xf2 => { - let gas = format!("gas: {}", instruction.input_operations[0].solidify()); let address = instruction.input_operations[1].solidify(); - let value = format!("value: {}", instruction.input_operations[2].solidify()); let memory = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); let extcalldata = memory .iter() @@ -31,24 +30,51 @@ pub fn extcall_heuristic( .collect::>() .join(""); - // build the modifier w/ gas and value - let modifier = format!("{{ {}, {} }}", gas, value); + let decoded = blocking_await(move || { + let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); + + rt.block_on(async { + decode( + DecodeArgsBuilder::new() + .target(extcalldata) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await + }) + }) + .map_err(|e| eyre!("Failed to decode extcalldata: {}", e))?; + + // build modifiers + // - if gas is just the default (GAS()), we don't need to include it + // - if value is just the default (0), we don't need to include it + let mut modifiers = vec![]; + if instruction.input_operations[0] != w_gas!() { + modifiers.push(format!("gas: {}", instruction.input_operations[0].solidify())); + } + if instruction.inputs[2] != U256::ZERO { + modifiers.push(format!("value: {}", instruction.input_operations[2].solidify())); + } + let modifier = if modifiers.is_empty() { + "".to_string() + } else { + format!("{{ {} }}", modifiers.join(", ")) + }; // check if the external call is a precompiled contract - match decode_precompile( - instruction.inputs[1], - &memory, - &instruction.input_operations[5], - ) { - (true, precompile_logic) => { - function.logic.push(precompile_logic); - } - _ => { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", - address, modifier, extcalldata - )); - } + if let Some(precompile_logic) = + decode_precompile(instruction.inputs[1], &memory, &instruction.input_operations[5]) + { + function.logic.push(precompile_logic); + } else { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); } } @@ -63,136 +89,44 @@ pub fn extcall_heuristic( .collect::>() .join(""); + let decoded = blocking_await(move || { + let rt = tokio::runtime::Runtime::new().expect("failed to get runtime"); + + rt.block_on(async { + decode( + DecodeArgsBuilder::new() + .target(extcalldata) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await + }) + }) + .map_err(|e| eyre!("Failed to decode extcalldata: {}", e))?; + // build the modifier w/ gas - let modifier = format!("{{ {} }}", gas); + // if the modifier is just the default (GAS()), we don't need to include it + let modifier = if instruction.input_operations[0] != w_gas!() { + format!("{{ {} }}", gas) + } else { + "".to_string() + }; // check if the external call is a precompiled contract - match decode_precompile( - instruction.inputs[1], - &memory, - &instruction.input_operations[4], - ) { - (true, precompile_logic) => { - function.logic.push(precompile_logic); - } - _ => { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", - address, - opcode_name(instruction.opcode).to_lowercase(), - modifier, - extcalldata - )); - } - } - } - - // REVERT - 0xfd => { - // Safely convert U256 to usize - let offset: usize = instruction.inputs[0].try_into().unwrap_or(0); - let size: usize = instruction.inputs[1].try_into().unwrap_or(0); - let revert_data = state.memory.read(offset, size); - - // (1) if revert_data starts with 0x08c379a0, the folling is an error string - // abiencoded (2) if revert_data starts with 0x4e487b71, the - // following is a compiler panic (3) if revert_data starts with any - // other 4byte selector, it is a custom error and should - // be resolved and added to the generated ABI - // (4) if revert_data is empty, it is an empty revert. Ex: - // - if (true != false) { revert() }; - // - require(true != false) - let revert_logic; - - // handle case with error string abiencoded - if revert_data.starts_with(&[0x08, 0xc3, 0x79, 0xa0]) { - let revert_string = match revert_data.get(4..) { - Some(hex_data) => match DynSolType::String.abi_decode(hex_data) { - Ok(revert) => match revert { - DynSolValue::String(revert) => revert, - _ => "decoding error".to_string(), - }, - Err(_) => "decoding error".to_string(), - }, - None => "decoding error".to_string(), - }; - revert_logic = match analyzer_state.jumped_conditional.clone() { - Some(condition) => { - analyzer_state.jumped_conditional = None; - format!("require({condition}, \"{revert_string}\");") - } - None => { - // loop backwards through logic to find the last IF statement - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - let conditional = match analyzer_state.conditional_stack.pop() { - Some(condition) => condition, - None => break, - }; - - function.logic[i] = - format!("require({conditional}, \"{revert_string}\");"); - } - } - return Ok(()); - } - } - } - // handle case with custom error OR empty revert - else if !revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { - let custom_error_placeholder = match revert_data.get(0..4) { - Some(selector) => { - function.errors.insert(U256::from_be_slice(selector)); - format!( - "CustomError_{}()", - encode_hex_reduced(U256::from_be_slice(selector)).replacen("0x", "", 1) - ) - } - None => "()".to_string(), - }; - - revert_logic = match analyzer_state.jumped_conditional.clone() { - Some(condition) => { - analyzer_state.jumped_conditional = None; - if custom_error_placeholder == *"()" { - format!("require({condition});",) - } else { - format!("require({condition}, {custom_error_placeholder});") - } - } - None => { - // loop backwards through logic to find the last IF statement - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - let conditional = match analyzer_state.conditional_stack.pop() { - Some(condition) => condition, - None => break, - }; - - if custom_error_placeholder == *"()" { - function.logic[i] = format!("require({conditional});",); - } else { - function.logic[i] = format!( - "require({conditional}, {custom_error_placeholder});" - ); - } - } - } - return Ok(()); - } - } + if let Some(precompile_logic) = + decode_precompile(instruction.inputs[1], &memory, &instruction.input_operations[4]) + { + function.logic.push(precompile_logic); } else { - return Ok(()); + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}(...); // {}", + address, + modifier, + decoded.decoded.name, + opcode_name(instruction.opcode).to_lowercase(), + )); } - - function.logic.push(revert_logic); - } - - // SELFDESTRUCT - 0xff => { - function - .logic - .push(format!("selfdestruct({});", instruction.input_operations[0].solidify())); } _ => {} @@ -200,3 +134,6 @@ pub fn extcall_heuristic( Ok(()) } + +// TODO: handle skip_resolving (need to fix in inspect mod too) +// TODO: handle case where decoding fails diff --git a/crates/decompile/src/utils/precompile.rs b/crates/decompile/src/utils/precompile.rs index b63aa30c..014acb49 100644 --- a/crates/decompile/src/utils/precompile.rs +++ b/crates/decompile/src/utils/precompile.rs @@ -14,54 +14,40 @@ pub fn decode_precompile( precompile_address: U256, extcalldata_memory: &[StorageFrame], return_data_offset: &WrappedOpcode, -) -> (bool, String) { +) -> Option { // safely convert the precompile address to a usize. let address: usize = match precompile_address.try_into() { Ok(x) => x, Err(_) => usize::MAX, }; - let mut is_ext_call_precompile = false; - let mut ext_call_logic = String::new(); - match address { - 1 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "address memory[{}] = ecrecover({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - 2 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "bytes memory[{}] = sha256({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - 3 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "bytes memory[{}] = ripemd160({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - _ => {} + 1 => Some(format!( + "address memory[{}] = ecrecover({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + 2 => Some(format!( + "bytes memory[{}] = sha256({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + 3 => Some(format!( + "bytes memory[{}] = ripemd160({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + _ => None, } - - (is_ext_call_precompile, ext_call_logic) }