diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4d9a8d0b..80751592 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -68,7 +68,7 @@ jobs: - uses: dtolnay/rust-toolchain@nightly with: components: rustfmt - - run: cargo fmt --check --all + - run: cargo +nightly fmt --check --all check: runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index 7f9ad740..d949ec72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2223,6 +2223,7 @@ dependencies = [ "serde", "serde_json", "thiserror", + "tokio", "tracing", ] diff --git a/crates/cache/src/lib.rs b/crates/cache/src/lib.rs index 3eca410d..a564499c 100644 --- a/crates/cache/src/lib.rs +++ b/crates/cache/src/lib.rs @@ -215,8 +215,7 @@ pub fn delete_cache(key: &str) -> Result<(), Error> { #[allow(deprecated)] pub fn read_cache(key: &str) -> Result, Error> where - T: 'static + DeserializeOwned, -{ + T: 'static + DeserializeOwned, { let home = home_dir().ok_or(Error::Generic( "failed to get home directory. does your os support `std::env::home_dir()`?".to_string(), ))?; @@ -239,8 +238,8 @@ where .map_err(|e| Error::Generic(format!("failed to deserialize cache object: {:?}", e)))?; // check if the cache has expired, if so, delete it and return None - if cache.expiry - < std::time::SystemTime::now() + if cache.expiry < + std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map_err(|e| Error::Generic(format!("failed to get current time: {:?}", e)))? .as_secs() @@ -267,8 +266,7 @@ where #[allow(deprecated)] pub fn store_cache(key: &str, value: T, expiry: Option) -> Result<(), Error> where - T: Serialize, -{ + T: Serialize, { let home = home_dir().ok_or(Error::Generic( "failed to get home directory. does your os support `std::env::home_dir()`?".to_string(), ))?; @@ -280,8 +278,8 @@ where std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map_err(|e| Error::Generic(format!("failed to get current time: {:?}", e)))? - .as_secs() - + 60 * 60 * 24 * 90, + .as_secs() + + 60 * 60 * 24 * 90, ); let cache = Cache { value, expiry }; @@ -306,8 +304,7 @@ pub async fn with_cache(key: &str, func: F) -> eyre::Result where T: 'static + Serialize + DeserializeOwned + Send + Sync, F: FnOnce() -> Fut + Send, - Fut: std::future::Future> + Send, -{ + Fut: std::future::Future> + Send, { // Try to read from cache match read_cache::(key) { Ok(Some(cached_value)) => { diff --git a/crates/cfg/src/core/graph.rs b/crates/cfg/src/core/graph.rs index 165e8814..702d31a4 100644 --- a/crates/cfg/src/core/graph.rs +++ b/crates/cfg/src/core/graph.rs @@ -61,8 +61,8 @@ pub fn build_cfg( .first() .ok_or_eyre("failed to get first operation")? .last_instruction - .opcode - == JUMPDEST, + .opcode == + JUMPDEST, )?; } diff --git a/crates/cfg/src/core/mod.rs b/crates/cfg/src/core/mod.rs index 9ac86621..054cbf96 100644 --- a/crates/cfg/src/core/mod.rs +++ b/crates/cfg/src/core/mod.rs @@ -8,17 +8,17 @@ use heimdall_vm::core::vm::VM; use petgraph::{dot::Dot, Graph}; use std::time::{Duration, Instant}; -use super::CFGArgs; +use super::CfgArgs; use crate::{core::graph::build_cfg, error::Error}; use tracing::{debug, info}; #[derive(Debug, Clone)] -pub struct CFGResult { +pub struct CfgResult { pub graph: Graph, } -impl CFGResult { +impl CfgResult { pub fn as_dot(&self, color_edges: bool) -> String { let output = format!("{}", Dot::with_config(&self.graph, &[])); @@ -44,7 +44,7 @@ impl CFGResult { } } -pub async fn cfg(args: CFGArgs) -> Result { +pub async fn cfg(args: CfgArgs) -> Result { // init let start_time = Instant::now(); @@ -99,5 +99,5 @@ pub async fn cfg(args: CFGArgs) -> Result { debug!("cfg generated in {:?}", start_time.elapsed()); info!("generated cfg successfully"); - Ok(CFGResult { graph: contract_cfg }) + Ok(CfgResult { graph: contract_cfg }) } diff --git a/crates/cfg/src/interfaces/args.rs b/crates/cfg/src/interfaces/args.rs index 3338ad5a..9e9cc5d8 100644 --- a/crates/cfg/src/interfaces/args.rs +++ b/crates/cfg/src/interfaces/args.rs @@ -10,8 +10,8 @@ use heimdall_config::parse_url_arg; after_help = "For more information, read the wiki: https://jbecker.dev/r/heimdall-rs/wiki", override_usage = "heimdall cfg [OPTIONS]" )] -pub struct CFGArgs { - /// The target to generate a CFG for, either a file, bytecode, contract address, or ENS name. +pub struct CfgArgs { + /// The target to generate a Cfg for, either a file, bytecode, contract address, or ENS name. #[clap(required = true)] pub target: String, @@ -42,13 +42,13 @@ pub struct CFGArgs { pub timeout: u64, } -impl CFGArgs { +impl CfgArgs { pub async fn get_bytecode(&self) -> Result> { get_bytecode_from_target(&self.target, &self.rpc_url).await } } -impl CFGArgsBuilder { +impl CfgArgsBuilder { pub fn new() -> Self { Self { target: Some(String::new()), diff --git a/crates/cfg/src/interfaces/mod.rs b/crates/cfg/src/interfaces/mod.rs index 3ead88ea..8471a6dd 100644 --- a/crates/cfg/src/interfaces/mod.rs +++ b/crates/cfg/src/interfaces/mod.rs @@ -1,4 +1,4 @@ mod args; // re-export the public interface -pub use args::{CFGArgs, CFGArgsBuilder}; +pub use args::{CfgArgs, CfgArgsBuilder}; diff --git a/crates/cfg/src/lib.rs b/crates/cfg/src/lib.rs index f909564c..a5a0bc8f 100644 --- a/crates/cfg/src/lib.rs +++ b/crates/cfg/src/lib.rs @@ -4,6 +4,6 @@ mod core; mod interfaces; // re-export the public interface -pub use core::{cfg, CFGResult}; +pub use core::{cfg, CfgResult}; pub use error::Error; -pub use interfaces::{CFGArgs, CFGArgsBuilder}; +pub use interfaces::{CfgArgs, CfgArgsBuilder}; diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index 248283b6..e0a614b1 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -4,7 +4,7 @@ use clap::{ArgAction, Args, ValueEnum}; use heimdall_cache::CacheArgs; use heimdall_config::ConfigArgs; use heimdall_core::{ - heimdall_cfg::CFGArgs, heimdall_decoder::DecodeArgs, heimdall_decompiler::DecompilerArgs, + heimdall_cfg::CfgArgs, heimdall_decoder::DecodeArgs, heimdall_decompiler::DecompilerArgs, heimdall_disassembler::DisassemblerArgs, heimdall_dump::DumpArgs, heimdall_inspect::InspectArgs, }; @@ -42,7 +42,7 @@ pub enum Subcommands { Decompile(DecompilerArgs), #[clap(name = "cfg", about = "Generate a visual control flow graph for EVM bytecode")] - CFG(CFGArgs), + Cfg(CfgArgs), #[clap(name = "decode", about = "Decode calldata into readable types")] Decode(DecodeArgs), diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 942cafcf..362a5031 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -160,7 +160,7 @@ async fn main() -> Result<()> { result.display() } - Subcommands::CFG(mut cmd) => { + Subcommands::Cfg(mut cmd) => { // if the user has not specified a rpc url, use the default if cmd.rpc_url.as_str() == "" { cmd.rpc_url = configuration.rpc_url; diff --git a/crates/common/src/ether/calldata.rs b/crates/common/src/ether/calldata.rs index 80cc2cf3..bd559a07 100644 --- a/crates/common/src/ether/calldata.rs +++ b/crates/common/src/ether/calldata.rs @@ -4,13 +4,17 @@ use alloy::primitives::TxHash; use eyre::{bail, eyre, Result}; /// Given a target, return calldata of the target. -pub async fn get_calldata_from_target(target: &str, rpc_url: &str) -> Result> { +pub async fn get_calldata_from_target(target: &str, raw: bool, rpc_url: &str) -> Result> { // If the target is a transaction hash, fetch the calldata from the RPC provider. if let Ok(address) = target.parse::() { - return get_transaction(address, rpc_url) - .await - .map(|tx| tx.input.to_vec()) - .map_err(|_| eyre!("failed to fetch transaction from RPC provider")); + // if raw is true, the user specified that the target is raw calldata. skip fetching the + // transaction. + if !raw { + return get_transaction(address, rpc_url) + .await + .map(|tx| tx.input.to_vec()) + .map_err(|_| eyre!("failed to fetch transaction from RPC provider")); + } } // If the target is not a transaction hash, it could be calldata. @@ -34,6 +38,7 @@ mod tests { let calldata = get_calldata_from_target( "0x317907eeece00619fd4418c18a4ec4ebe5c87cdbff808f4b01cc2c6384799837", + false, &rpc_url, ) .await @@ -51,6 +56,7 @@ mod tests { let calldata = get_calldata_from_target( "0xf14fcbc8bf9eac48d61719f80efb268ef1099a248fa332ed639041337954647ec6583f2e", + false, &rpc_url, ) .await @@ -66,10 +72,31 @@ mod tests { std::process::exit(0); }); - let calldata = - get_calldata_from_target("asfnsdalkfasdlfnlasdkfnalkdsfndaskljfnasldkjfnasf", &rpc_url) - .await; + let calldata = get_calldata_from_target( + "asfnsdalkfasdlfnlasdkfnalkdsfndaskljfnasldkjfnasf", + false, + &rpc_url, + ) + .await; assert!(calldata.is_err()); } + + #[tokio::test] + async fn test_get_calldata_when_target_is_calldata_that_is_exactly_32_bytes() { + let rpc_url = std::env::var("RPC_URL").unwrap_or_else(|_| { + println!("RPC_URL not set, skipping test"); + std::process::exit(0); + }); + + let calldata = get_calldata_from_target( + "0x317907eeece00619fd4418c18a4ec4ebe5c87cdbff808f4b01cc2c6384799837", + true, + &rpc_url, + ) + .await + .expect("failed to get calldata from target"); + + assert!(calldata.len() == 32); + } } diff --git a/crates/common/src/ether/signatures.rs b/crates/common/src/ether/signatures.rs index 704c6007..8bc507fa 100644 --- a/crates/common/src/ether/signatures.rs +++ b/crates/common/src/ether/signatures.rs @@ -339,8 +339,8 @@ pub fn score_signature(signature: &str, num_words: Option) -> u32 { // prioritize signatures with less numbers score -= (signature.split('(').next().unwrap_or("").matches(|c: char| c.is_numeric()).count() - as u32) - * 3; + as u32) * + 3; // prioritize signatures with parameters let num_params = signature.matches(',').count() + 1; @@ -348,9 +348,9 @@ pub fn score_signature(signature: &str, num_words: Option) -> u32 { // count the number of parameters in the signature, if enabled if let Some(num_words) = num_words { - let num_dyn_params = signature.matches("bytes").count() - + signature.matches("string").count() - + signature.matches('[').count(); + let num_dyn_params = signature.matches("bytes").count() + + signature.matches("string").count() + + signature.matches('[').count(); let num_static_params = num_params - num_dyn_params; // reduce the score if the signature has less static parameters than there are words in the diff --git a/crates/common/src/ether/tokenize.rs b/crates/common/src/ether/tokenize.rs index 6a81820b..e2d3f37c 100644 --- a/crates/common/src/ether/tokenize.rs +++ b/crates/common/src/ether/tokenize.rs @@ -130,17 +130,17 @@ pub fn tokenize(s: &str) -> Token { let mut op = ch.to_string(); iter.next(); if let Some(&next_ch) = iter.peek() { - if (ch == '=' && (next_ch == '=' || next_ch == '>')) - || (ch == '&' && next_ch == '&') - || (ch == '|' && next_ch == '|') - || (ch == '<' && next_ch == '=') - || (ch == '>' && next_ch == '=') - || (ch == '!' && next_ch == '=') - || (ch == '+' && next_ch == '+') - || (ch == '-' && next_ch == '-') - || (ch == '*' && next_ch == '*') - || (ch == '>' && next_ch == '>') - || (ch == '<' && next_ch == '<') + if (ch == '=' && (next_ch == '=' || next_ch == '>')) || + (ch == '&' && next_ch == '&') || + (ch == '|' && next_ch == '|') || + (ch == '<' && next_ch == '=') || + (ch == '>' && next_ch == '=') || + (ch == '!' && next_ch == '=') || + (ch == '+' && next_ch == '+') || + (ch == '-' && next_ch == '-') || + (ch == '*' && next_ch == '*') || + (ch == '>' && next_ch == '>') || + (ch == '<' && next_ch == '<') { op.push(next_ch); iter.next(); @@ -188,9 +188,9 @@ fn parse_literal(iter: &mut std::iter::Peekable) -> String { } // literal validation - if literal.starts_with("0x") - && literal.len() > 2 - && literal[2..].chars().all(|c| c.is_ascii_hexdigit()) + if literal.starts_with("0x") && + literal.len() > 2 && + literal[2..].chars().all(|c| c.is_ascii_hexdigit()) { return literal; } diff --git a/crates/common/src/utils/hex.rs b/crates/common/src/utils/hex.rs index b40edecf..67b0d473 100644 --- a/crates/common/src/utils/hex.rs +++ b/crates/common/src/utils/hex.rs @@ -1,5 +1,5 @@ use super::strings::encode_hex; -use alloy::primitives::{Address, Bytes, FixedBytes, I256, U256}; +use alloy::primitives::{Address, Bytes, FixedBytes, U256}; /// A convenience function which encodes a given EVM type into a sized, lowercase hex string. pub trait ToLowerHex { @@ -20,13 +20,7 @@ impl ToLowerHex for bytes::Bytes { impl ToLowerHex for U256 { fn to_lower_hex(&self) -> String { - format!("{:#032x}", self) - } -} - -impl ToLowerHex for I256 { - fn to_lower_hex(&self) -> String { - format!("{:#032x}", self) + encode_hex(&self.to_be_bytes_vec()) } } diff --git a/crates/common/src/utils/strings.rs b/crates/common/src/utils/strings.rs index 4e663775..d9327a9e 100644 --- a/crates/common/src/utils/strings.rs +++ b/crates/common/src/utils/strings.rs @@ -330,8 +330,8 @@ pub fn tokenize(s: &str) -> Vec { // Check if current character and last character form a compound operator (like "==", // ">=", "&&", "||") if let Some(last) = last_char { - if compound_operator_first_chars.contains(&last) - && (c == '=' || c == '&' || c == '|') + if compound_operator_first_chars.contains(&last) && + (c == '=' || c == '&' || c == '|') { // Remove the last character as a single token tokens.pop(); diff --git a/crates/common/src/utils/sync.rs b/crates/common/src/utils/sync.rs index 151f8a36..db307ee7 100644 --- a/crates/common/src/utils/sync.rs +++ b/crates/common/src/utils/sync.rs @@ -1,7 +1,10 @@ +use std::{future::Future, pin::Pin}; + /// Take in a non-async function and await it. This functions should be blocking. pub fn blocking_await(f: F) -> T where - F: FnOnce() -> T, -{ + F: FnOnce() -> T, { tokio::task::block_in_place(f) } + +pub type BoxFuture<'a, T> = Pin + 'a>>; diff --git a/crates/common/src/utils/version.rs b/crates/common/src/utils/version.rs index a72e42fd..54b6d0a0 100644 --- a/crates/common/src/utils/version.rs +++ b/crates/common/src/utils/version.rs @@ -93,46 +93,46 @@ impl Display for Version { impl Version { /// greater than pub fn gt(&self, other: &Version) -> bool { - self.major > other.major - || (self.major == other.major && self.minor > other.minor) - || (self.major == other.major && self.minor == other.minor && self.patch > other.patch) + self.major > other.major || + (self.major == other.major && self.minor > other.minor) || + (self.major == other.major && self.minor == other.minor && self.patch > other.patch) } /// greater than or equal to pub fn gte(&self, other: &Version) -> bool { - self.major > other.major - || (self.major == other.major && self.minor > other.minor) - || (self.major == other.major && self.minor == other.minor && self.patch >= other.patch) + self.major > other.major || + (self.major == other.major && self.minor > other.minor) || + (self.major == other.major && self.minor == other.minor && self.patch >= other.patch) } /// less than pub fn lt(&self, other: &Version) -> bool { - self.major < other.major - || (self.major == other.major && self.minor < other.minor) - || (self.major == other.major && self.minor == other.minor && self.patch < other.patch) + self.major < other.major || + (self.major == other.major && self.minor < other.minor) || + (self.major == other.major && self.minor == other.minor && self.patch < other.patch) } /// less than or equal to pub fn lte(&self, other: &Version) -> bool { - self.major < other.major - || (self.major == other.major && self.minor < other.minor) - || (self.major == other.major && self.minor == other.minor && self.patch <= other.patch) + self.major < other.major || + (self.major == other.major && self.minor < other.minor) || + (self.major == other.major && self.minor == other.minor && self.patch <= other.patch) } #[allow(clippy::should_implement_trait)] pub fn eq(&self, other: &Version) -> bool { - self.major == other.major - && self.minor == other.minor - && self.patch == other.patch - && self.channel == other.channel + self.major == other.major && + self.minor == other.minor && + self.patch == other.patch && + self.channel == other.channel } /// not equal to pub fn ne(&self, other: &Version) -> bool { - self.major != other.major - || self.minor != other.minor - || self.patch != other.patch - || self.channel != other.channel + self.major != other.major || + self.minor != other.minor || + self.patch != other.patch || + self.channel != other.channel } /// if the version is a nightly version diff --git a/crates/core/benches/bench_cfg.rs b/crates/core/benches/bench_cfg.rs index f53e0e7a..e97ce26e 100644 --- a/crates/core/benches/bench_cfg.rs +++ b/crates/core/benches/bench_cfg.rs @@ -1,5 +1,5 @@ use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use heimdall_cfg::{cfg, CFGArgsBuilder}; +use heimdall_cfg::{cfg, CfgArgsBuilder}; use tokio::runtime::Runtime; fn test_cfg(c: &mut Criterion) { @@ -17,10 +17,10 @@ fn test_cfg(c: &mut Criterion) { group.bench_with_input(BenchmarkId::from_parameter(name), &contract, |b, c| { b.to_async::(Runtime::new().unwrap()).iter(|| async { let start = std::time::Instant::now(); - let args = CFGArgsBuilder::new() + let args = CfgArgsBuilder::new() .target(c.to_string()) .build() - .expect("Failed to build CFGArgs"); + .expect("Failed to build CfgArgs"); let _ = cfg(args).await; start.elapsed() }); diff --git a/crates/core/tests/test_cfg.rs b/crates/core/tests/test_cfg.rs index 0ed78ba4..50a4adad 100644 --- a/crates/core/tests/test_cfg.rs +++ b/crates/core/tests/test_cfg.rs @@ -3,7 +3,7 @@ mod integration_tests { use memory_stats::memory_stats; use std::path::PathBuf; - use heimdall_cfg::{cfg, CFGArgs, CFGArgsBuilder}; + use heimdall_cfg::{cfg, CfgArgs, CfgArgsBuilder}; use petgraph::dot::Dot; use serde_json::Value; @@ -14,7 +14,7 @@ mod integration_tests { std::process::exit(0); }); - let result = heimdall_cfg::cfg(CFGArgs { + let result = heimdall_cfg::cfg(CfgArgs { target: String::from("0x1bf797219482a29013d804ad96d1c6f84fba4c45"), rpc_url, default: true, @@ -43,7 +43,7 @@ mod integration_tests { std::process::exit(0); }); - let result = heimdall_cfg::cfg(CFGArgs { + let result = heimdall_cfg::cfg(CfgArgs { target: String::from("0xE90d8Fb7B79C8930B5C8891e61c298b412a6e81a"), rpc_url, default: true, @@ -110,8 +110,8 @@ mod integration_tests { let mut fail_count = 0; for (contract_address, bytecode) in contracts { - println!("Generating CFG for contract {contract_address}"); - let args = CFGArgsBuilder::new() + println!("Generating Cfg for contract {contract_address}"); + let args = CfgArgsBuilder::new() .target(bytecode) .timeout(10000) .build() diff --git a/crates/core/tests/test_decode.rs b/crates/core/tests/test_decode.rs index 3f254eb6..c0ca0b6f 100644 --- a/crates/core/tests/test_decode.rs +++ b/crates/core/tests/test_decode.rs @@ -15,6 +15,7 @@ mod integration_tests { constructor: false, truncate_calldata: false, skip_resolving: false, + raw: false, }; let _ = heimdall_decoder::decode(args).await; } @@ -31,6 +32,7 @@ mod integration_tests { constructor: false, truncate_calldata: false, skip_resolving: false, + raw: false, }; let _ = heimdall_decoder::decode(args).await; } diff --git a/crates/core/tests/test_decompile.rs b/crates/core/tests/test_decompile.rs index 8827777a..50db61b2 100644 --- a/crates/core/tests/test_decompile.rs +++ b/crates/core/tests/test_decompile.rs @@ -168,7 +168,8 @@ mod integration_tests { include_yul: false, output: String::from(""), name: String::from(""), - timeout: 10000, abi: None, + timeout: 10000, + abi: None, }) .await diff --git a/crates/decode/src/interfaces/args.rs b/crates/decode/src/interfaces/args.rs index c960a280..32f3b408 100644 --- a/crates/decode/src/interfaces/args.rs +++ b/crates/decode/src/interfaces/args.rs @@ -44,6 +44,11 @@ pub struct DecodeArgs { #[clap(long = "skip-resolving")] pub skip_resolving: bool, + /// Whether to treat the target as a raw calldata string. Useful if the target is exactly 32 + /// bytes. + #[clap(long, short)] + pub raw: bool, + /// Path to an optional ABI file to use for resolving errors, functions, and events. #[clap(long, short, default_value = None, hide_default_value = true)] pub abi: Option, @@ -51,7 +56,7 @@ pub struct DecodeArgs { impl DecodeArgs { pub async fn get_calldata(&self) -> Result> { - get_calldata_from_target(&self.target, &self.rpc_url).await + get_calldata_from_target(&self.target, self.raw, &self.rpc_url).await } } @@ -66,6 +71,7 @@ impl DecodeArgsBuilder { constructor: Some(false), truncate_calldata: Some(false), skip_resolving: Some(false), + raw: Some(false), abi: Some(None), } } diff --git a/crates/decode/src/utils/abi.rs b/crates/decode/src/utils/abi.rs index c1f44cc7..b8321a13 100644 --- a/crates/decode/src/utils/abi.rs +++ b/crates/decode/src/utils/abi.rs @@ -185,8 +185,8 @@ fn try_decode_dynamic_parameter_bytes( // (5) we've covered all words from `data_start_word_offset` to `data_end_word_offset`, // so add them to `word_coverages`. coverages.extend( - (word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)) + (word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)) .collect::>(), ); @@ -211,8 +211,8 @@ fn try_decode_dynamic_parameter_array( // (1) join all words from `data_start_word_offset` to `data_end_word_offset`. This is where // the encoded data may be stored. - let data_words = &calldata_words[data_start_word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)]; + let data_words = &calldata_words[data_start_word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)]; trace!("potential array items: {:#?}", data_words); // (2) first, check if this is a `string` type, since some string encodings may appear to be @@ -234,8 +234,8 @@ fn try_decode_dynamic_parameter_array( // `word_coverages` with the indices of all words from `data_start_word_offset` to // `data_end_word_offset`, since we've now covered all words in the ABI-encoded type. coverages.extend( - (word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)) + (word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)) .collect::>(), ); @@ -296,8 +296,8 @@ fn try_decode_dynamic_parameter_string( trace!( "with data: {:#?}", encode_hex( - &calldata_words[data_start_word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)] + &calldata_words[data_start_word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)] .concat() ) ); @@ -321,8 +321,8 @@ fn try_decode_dynamic_parameter_string( // (5) we've covered all words from `data_start_word_offset` to `data_end_word_offset`, // so add them to `word_coverages`. coverages.extend( - (word_offset.try_into().unwrap_or(usize::MAX) - ..data_end_word_offset.try_into().unwrap_or(usize::MAX)) + (word_offset.try_into().unwrap_or(usize::MAX).. + data_end_word_offset.try_into().unwrap_or(usize::MAX)) .collect::>(), ); diff --git a/crates/decode/src/utils/constructor.rs b/crates/decode/src/utils/constructor.rs index 881968b3..836ba2f9 100644 --- a/crates/decode/src/utils/constructor.rs +++ b/crates/decode/src/utils/constructor.rs @@ -42,16 +42,16 @@ pub fn parse_deployment_bytecode(input: Vec) -> Result { let constructor_offset = 0; let metadata_length = u32::from_str_radix( - &input[(contract_offset + contract_length - 4) as usize - ..(contract_offset + contract_length) as usize], + &input[(contract_offset + contract_length - 4) as usize.. + (contract_offset + contract_length) as usize], 16, - )? * 2 - + 4; + )? * 2 + + 4; let constructor = &input[constructor_offset as usize..contract_offset as usize]; let contract = &input[contract_offset as usize..(contract_offset + contract_length) as usize]; - let metadata = &input[(contract_offset + contract_length - metadata_length) as usize - ..(contract_offset + contract_length) as usize]; + let metadata = &input[(contract_offset + contract_length - metadata_length) as usize.. + (contract_offset + contract_length) as usize]; let arguments = &input[(contract_offset + contract_length) as usize..]; Ok(Constructor { diff --git a/crates/decompile/Cargo.toml b/crates/decompile/Cargo.toml index e5b2d504..eb5815a5 100644 --- a/crates/decompile/Cargo.toml +++ b/crates/decompile/Cargo.toml @@ -30,8 +30,13 @@ fancy-regex = "0.11.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" alloy-dyn-abi = "0.8.3" -alloy = { version = "0.3.3", features = ["full", "rpc-types-debug", "rpc-types-trace"] } +alloy = { version = "0.3.3", features = [ + "full", + "rpc-types-debug", + "rpc-types-trace", +] } hashbrown = "0.14.5" +tokio = { version = "1", features = ["full"] } heimdall-disassembler.workspace = true heimdall-vm.workspace = true diff --git a/crates/decompile/src/core/analyze.rs b/crates/decompile/src/core/analyze.rs index 9e70150b..6178f1fd 100644 --- a/crates/decompile/src/core/analyze.rs +++ b/crates/decompile/src/core/analyze.rs @@ -1,13 +1,14 @@ use std::{fmt::Display, time::Instant}; +use futures::future::BoxFuture; use heimdall_vm::ext::exec::VMTrace; use tracing::debug; use crate::{ interfaces::AnalyzedFunction, utils::heuristics::{ - argument_heuristic, event_heuristic, modifier_heuristic, solidity_heuristic, yul_heuristic, - Heuristic, + argument_heuristic, event_heuristic, extcall_heuristic, modifier_heuristic, + solidity_heuristic, yul_heuristic, Heuristic, }, Error, }; @@ -85,6 +86,7 @@ impl Analyzer { self.heuristics.push(Heuristic::new(solidity_heuristic)); self.heuristics.push(Heuristic::new(argument_heuristic)); self.heuristics.push(Heuristic::new(modifier_heuristic)); + self.heuristics.push(Heuristic::new(extcall_heuristic)); } AnalyzerType::Yul => { self.heuristics.push(Heuristic::new(event_heuristic)); @@ -103,7 +105,7 @@ impl Analyzer { } /// Performs analysis - pub fn analyze(&mut self, trace_root: VMTrace) -> Result { + pub async fn analyze(&mut self, trace_root: VMTrace) -> Result { debug!( "analzying symbolic execution trace for '{}' with the {} analyzer", self.function.selector, self.typ @@ -122,7 +124,7 @@ impl Analyzer { }; // Perform analysis - self.analyze_inner(&trace_root, &mut analyzer_state)?; + self.analyze_inner(&trace_root, &mut analyzer_state).await?; debug!( "analysis for '{}' completed in {:?}", @@ -134,51 +136,52 @@ impl Analyzer { } /// Inner analysis implementation - fn analyze_inner( - &mut self, - branch: &VMTrace, - analyzer_state: &mut AnalyzerState, - ) -> Result<(), Error> { - // reset jumped conditional, we dont propagate conditionals across branches - analyzer_state.jumped_conditional = None; - - // for each operation in the current trace branch, peform analysis with registerred - // heuristics - for operation in &branch.operations { - for heuristic in &self.heuristics { - heuristic.run(&mut self.function, operation, analyzer_state)?; + fn analyze_inner<'a>( + &'a mut self, + branch: &'a VMTrace, + analyzer_state: &'a mut AnalyzerState, + ) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + // reset jumped conditional, we dont propagate conditionals across branches + analyzer_state.jumped_conditional = None; + + // for each operation in the current trace branch, peform analysis with registerred + // heuristics + for operation in &branch.operations { + for heuristic in &self.heuristics { + heuristic.run(&mut self.function, operation, analyzer_state).await?; + } } - } - // recurse into the children of the current trace branch - for child in &branch.children { - self.analyze_inner(child, analyzer_state)?; - } + // recurse into the children of the current trace branch + for child in &branch.children { + self.analyze_inner(child, analyzer_state).await?; + } - // check if the ending brackets are needed - if analyzer_state.jumped_conditional.is_some() - && analyzer_state.conditional_stack.contains( - analyzer_state - .jumped_conditional - .as_ref() - .expect("impossible case: should have short-circuited in previous conditional"), - ) - { - // remove the conditional - for (i, conditional) in analyzer_state.conditional_stack.iter().enumerate() { - if conditional - == analyzer_state.jumped_conditional.as_ref().expect( + // check if the ending brackets are needed + if analyzer_state.jumped_conditional.is_some() && + analyzer_state.conditional_stack.contains( + analyzer_state.jumped_conditional.as_ref().expect( "impossible case: should have short-circuited in previous conditional", - ) - { - analyzer_state.conditional_stack.remove(i); - break; + ), + ) + { + // remove the conditional + for (i, conditional) in analyzer_state.conditional_stack.iter().enumerate() { + if conditional == + analyzer_state.jumped_conditional.as_ref().expect( + "impossible case: should have short-circuited in previous conditional", + ) + { + analyzer_state.conditional_stack.remove(i); + break; + } } - } - self.function.logic.push("}".to_string()); - } + self.function.logic.push("}".to_string()); + } - Ok(()) + Ok(()) + }) } } diff --git a/crates/decompile/src/core/mod.rs b/crates/decompile/src/core/mod.rs index ae0cff47..4234fcc1 100644 --- a/crates/decompile/src/core/mod.rs +++ b/crates/decompile/src/core/mod.rs @@ -156,21 +156,21 @@ pub async fn decompile(args: DecompilerArgs) -> Result { info!("symbolically executed {} selectors", symbolic_execution_maps.len()); let start_analysis_time = Instant::now(); - let mut analyzed_functions = symbolic_execution_maps - .into_iter() - .map(|(selector, trace_root)| { + let handles = symbolic_execution_maps.into_iter().map(|(selector, trace_root)| { + let mut evm_clone = evm.clone(); + async move { let mut analyzer = Analyzer::new( analyzer_type, AnalyzedFunction::new(&selector, selector == "fallback"), ); // analyze the symbolic execution trace - let mut analyzed_function = analyzer.analyze(trace_root)?; + let mut analyzed_function = analyzer.analyze(trace_root).await?; // if the function is constant, we can get the exact val if analyzed_function.is_constant() && !analyzed_function.fallback { - evm.reset(); - let x = evm.call(&decode_hex(&selector).expect("invalid selector"), 0)?; + evm_clone.reset(); + let x = evm_clone.call(&decode_hex(&selector).expect("invalid selector"), 0)?; let returns_param_type = analyzed_function .returns @@ -192,8 +192,9 @@ pub async fn decompile(args: DecompilerArgs) -> Result { } Ok::<_, Error>(analyzed_function) - }) - .collect::, Error>>()?; + } + }); + let mut analyzed_functions = futures::future::try_join_all(handles).await?; debug!("analyzing symbolic execution results took {:?}", start_analysis_time.elapsed()); info!("analyzed {} symbolic execution traces", analyzed_functions.len()); diff --git a/crates/decompile/src/core/out/source.rs b/crates/decompile/src/core/out/source.rs index 5f81a0a4..573a3134 100644 --- a/crates/decompile/src/core/out/source.rs +++ b/crates/decompile/src/core/out/source.rs @@ -67,9 +67,9 @@ pub fn build_source( functions .iter() .filter(|f| { - !f.fallback - && (analyzer_type == AnalyzerType::Yul - || (f.maybe_getter_for.is_none() && !f.is_constant())) + !f.fallback && + (analyzer_type == AnalyzerType::Yul || + (f.maybe_getter_for.is_none() && !f.is_constant())) }) .for_each(|f| { let mut function_source = Vec::new(); @@ -174,29 +174,35 @@ fn get_function_header(f: &AnalyzedFunction) -> Vec { None => format!("Unresolved_{}", f.selector), }; - let function_signature = format!( - "{}({}) {}", - function_name, - f.sorted_arguments() - .iter() - .enumerate() - .map(|(i, (_, arg))| { - format!( - "{} arg{i}", - match f.resolved_function { - Some(ref sig) => sig.inputs()[i].to_string(), - None => arg - .potential_types() - .first() - .unwrap_or(&"bytes32".to_string()) - .to_string(), - } - ) - }) - .collect::>() - .join(", "), - function_modifiers.join(" ") - ); + let function_signature = match f.resolved_function { + Some(ref sig) => format!( + "{}({}) {}", + function_name, + sig.inputs() + .iter() + .enumerate() + .map(|(i, arg)| { format!("{} arg{i}", arg.to_string()) }) + .collect::>() + .join(", "), + function_modifiers.join(" ") + ), + None => format!( + "{}({}) {}", + function_name, + f.sorted_arguments() + .iter() + .enumerate() + .map(|(i, (_, arg))| { + format!( + "{} arg{i}", + arg.potential_types().first().unwrap_or(&"bytes32".to_string()) + ) + }) + .collect::>() + .join(", "), + function_modifiers.join(" ") + ), + }; match f.analyzer_type { AnalyzerType::Solidity => { diff --git a/crates/decompile/src/core/postprocess.rs b/crates/decompile/src/core/postprocess.rs index fff35e49..6219dd26 100644 --- a/crates/decompile/src/core/postprocess.rs +++ b/crates/decompile/src/core/postprocess.rs @@ -130,8 +130,8 @@ impl PostprocessOrchestrator { // Note: this can't be done with a postprocessor because it needs all lines if !function.payable && (function.pure || function.view) && function.arguments.is_empty() { // check for RLP encoding. very naive check, but it works for now - if function.logic.iter().any(|line| line.contains("0x0100 *")) - && function.logic.iter().any(|line| line.contains("0x01) &")) + if function.logic.iter().any(|line| line.contains("0x0100 *")) && + function.logic.iter().any(|line| line.contains("0x01) &")) { // find any storage accesses let joined = function.logic.join(" "); diff --git a/crates/decompile/src/interfaces/function.rs b/crates/decompile/src/interfaces/function.rs index 5a1a3ff0..64f0367d 100644 --- a/crates/decompile/src/interfaces/function.rs +++ b/crates/decompile/src/interfaces/function.rs @@ -61,6 +61,7 @@ pub struct AnalyzedFunction { #[derive(Clone, Debug)] pub struct StorageFrame { pub operation: WrappedOpcode, + pub value: U256, } #[derive(Clone, Debug)] diff --git a/crates/decompile/src/utils/heuristics/arguments.rs b/crates/decompile/src/utils/heuristics/arguments.rs index fce28e2c..54a5aa82 100644 --- a/crates/decompile/src/utils/heuristics/arguments.rs +++ b/crates/decompile/src/utils/heuristics/arguments.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use hashbrown::HashSet; use alloy::primitives::U256; @@ -17,152 +18,150 @@ use crate::{ Error, }; -pub fn argument_heuristic( - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, -) -> Result<(), Error> { - match state.last_instruction.opcode { - // CALLDATALOAD - 0x35 => { - // calculate the argument index, with the 4byte signature padding removed - // for example, CALLDATALOAD(4) -> (4-4)/32 = 0 - // CALLDATALOAD(36) -> (36-4)/32 = 1 - let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) - / U256::from(32)) - .try_into() - .unwrap_or(usize::MAX); +pub fn argument_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + match state.last_instruction.opcode { + // CALLDATALOAD + 0x35 => { + // calculate the argument index, with the 4byte signature padding removed + // for example, CALLDATALOAD(4) -> (4-4)/32 = 0 + // CALLDATALOAD(36) -> (36-4)/32 = 1 + let arg_index = (state.last_instruction.inputs[0].saturating_sub(U256::from(4)) / + U256::from(32)) + .try_into() + .unwrap_or(usize::MAX); - // insert only if this argument is not already in the hashmap - function.arguments.entry(arg_index).or_insert_with(|| { - debug!( - "discovered new argument at index {} from CALLDATALOAD({})", - arg_index, state.last_instruction.inputs[0] - ); - CalldataFrame { - arg_op: state.last_instruction.input_operations[0].to_string(), - mask_size: 32, // init to 32 because all CALLDATALOADs are 32 bytes - heuristics: HashSet::new(), - } - }); - } - - // CALLDATACOPY - 0x37 => { - // TODO: implement CALLDATACOPY support - trace!("CALLDATACOPY detected; not implemented"); - } + // insert only if this argument is not already in the hashmap + function.arguments.entry(arg_index).or_insert_with(|| { + debug!( + "discovered new argument at index {} from CALLDATALOAD({})", + arg_index, state.last_instruction.inputs[0] + ); + CalldataFrame { + arg_op: state.last_instruction.input_operations[0].to_string(), + mask_size: 32, // init to 32 because all CALLDATALOADs are 32 bytes + heuristics: HashSet::new(), + } + }); + } - // AND | OR - 0x16 | 0x17 => { - // if this is a bitwise mask operation on CALLDATALOAD, we can use it to determine the - // size (and consequently type) of the variable - if let Some(calldataload_op) = - state.last_instruction.input_operations.iter().find(|op| op.opcode == CALLDATALOAD) - { - // this is a bitwise mask, we can use it to determine the size of the variable - let (mask_size_bytes, _potential_types) = convert_bitmask(&state.last_instruction); + // CALLDATACOPY + 0x37 => { + // TODO: implement CALLDATACOPY support + trace!("CALLDATACOPY detected; not implemented"); + } - // yulify the calldataload operation, and find the associated argument index - // this MUST exist, as we have already inserted it in the CALLDATALOAD heuristic - let arg_op = calldataload_op.inputs[0].to_string(); - if let Some((arg_index, frame)) = - function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op) + // AND | OR + 0x16 | 0x17 => { + // if this is a bitwise mask operation on CALLDATALOAD, we can use it to determine + // the size (and consequently type) of the variable + if let Some(calldataload_op) = state + .last_instruction + .input_operations + .iter() + .find(|op| op.opcode == CALLDATALOAD) { - debug!( - "instruction {} ({}) indicates argument {} is masked to {} bytes", - state.last_instruction.instruction, - opcode_name(state.last_instruction.opcode), - arg_index, - mask_size_bytes - ); + // this is a bitwise mask, we can use it to determine the size of the variable + let (mask_size_bytes, _potential_types) = + convert_bitmask(&state.last_instruction); + + // yulify the calldataload operation, and find the associated argument index + // this MUST exist, as we have already inserted it in the CALLDATALOAD heuristic + let arg_op = calldataload_op.inputs[0].to_string(); + if let Some((arg_index, frame)) = + function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op) + { + debug!( + "instruction {} ({}) indicates argument {} is masked to {} bytes", + state.last_instruction.instruction, + opcode_name(state.last_instruction.opcode), + arg_index, + mask_size_bytes + ); - frame.mask_size = mask_size_bytes; + frame.mask_size = mask_size_bytes; + } } } - } - // RETURN - 0xf3 => { - // Safely convert U256 to usize - let size: usize = state.last_instruction.inputs[1].try_into().unwrap_or(0); + // RETURN + 0xf3 => { + // Safely convert U256 to usize + let size: usize = state.last_instruction.inputs[1].try_into().unwrap_or(0); - let return_memory_operations = function.get_memory_range( - state.last_instruction.inputs[0], - state.last_instruction.inputs[1], - ); - let return_memory_operations_solidified = return_memory_operations - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", "); + let return_memory_operations = function.get_memory_range( + state.last_instruction.inputs[0], + state.last_instruction.inputs[1], + ); + let return_memory_operations_solidified = return_memory_operations + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", "); - // add the return statement to the function logic - if analyzer_state.analyzer_type == AnalyzerType::Solidity { - if return_memory_operations.len() <= 1 { - function.logic.push(format!("return {return_memory_operations_solidified};")); - } else { + // add the return statement to the function logic + if analyzer_state.analyzer_type == AnalyzerType::Solidity { + if return_memory_operations.len() <= 1 { + function + .logic + .push(format!("return {return_memory_operations_solidified};")); + } else { + function.logic.push(format!( + "return abi.encodePacked({return_memory_operations_solidified});" + )); + } + } else if analyzer_state.analyzer_type == AnalyzerType::Yul { function.logic.push(format!( - "return abi.encodePacked({return_memory_operations_solidified});" + "return({}, {})", + state.last_instruction.input_operations[0].yulify(), + state.last_instruction.input_operations[1].yulify() )); } - } else if analyzer_state.analyzer_type == AnalyzerType::Yul { - function.logic.push(format!( - "return({}, {})", - state.last_instruction.input_operations[0].yulify(), - state.last_instruction.input_operations[1].yulify() - )); - } - // if we've already determined a return type, we don't want to do it again. - // we use bytes32 as a default return type - if function.returns.is_some() && function.returns.as_deref() != Some("bytes32") { - return Ok(()); - } + // if we've already determined a return type, we don't want to do it again. + // we use bytes32 as a default return type + if function.returns.is_some() && function.returns.as_deref() != Some("bytes32") { + return Ok(()); + } - // if the any input op is ISZERO(x), this is a boolean return - if return_memory_operations.iter().any(|x| x.operation.opcode == ISZERO) { - function.returns = Some(String::from("bool")); - } - // if the input op is any of the following, it is a uint256 return - // this is because these push numeric values onto the stack - else if return_memory_operations.iter().any(|x| { - [0x31, 0x34, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x58, 0x5a] - .contains(&x.operation.opcode) - }) { - function.returns = Some(String::from("uint256")); - } - // if the input op is any of the following, it is an address return - // this is because these push address values onto the stack - else if return_memory_operations - .iter() - .any(|x| [0x30, 0x32, 0x33, 0x41].contains(&x.operation.opcode)) - { - function.returns = Some(String::from("address")); - } - // if the size of returndata is > 32, it must be a bytes or string return. - else if size > 32 { - // some hardcoded function selectors where the return type is known to be a string - if ["06fdde03", "95d89b41", "6a98de4c", "9d2b0822", "1a0d4bca"] - .contains(&function.selector.as_str()) - { - function.returns = Some(String::from("string memory")); - } else { - function.returns = Some(String::from("bytes memory")); + // if the any input op is ISZERO(x), this is a boolean return + if return_memory_operations.iter().any(|x| x.operation.opcode == ISZERO) { + function.returns = Some(String::from("bool")); } - } else { - // attempt to find a return type within the return memory operations - let byte_size = match AND_BITMASK_REGEX - .find(&return_memory_operations_solidified) - .ok() - .flatten() + // if the input op is any of the following, it is a uint256 return + // this is because these push numeric values onto the stack + else if return_memory_operations.iter().any(|x| { + [0x31, 0x34, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x58, 0x5a] + .contains(&x.operation.opcode) + }) { + function.returns = Some(String::from("uint256")); + } + // if the input op is any of the following, it is an address return + // this is because these push address values onto the stack + else if return_memory_operations + .iter() + .any(|x| [0x30, 0x32, 0x33, 0x41].contains(&x.operation.opcode)) { - Some(bitmask) => { - let cast = bitmask.as_str(); - - cast.matches("ff").count() + function.returns = Some(String::from("address")); + } + // if the size of returndata is > 32, it must be a bytes or string return. + else if size > 32 { + // some hardcoded function selectors where the return type is known to be a + // string + if ["06fdde03", "95d89b41", "6a98de4c", "9d2b0822", "1a0d4bca"] + .contains(&function.selector.as_str()) + { + function.returns = Some(String::from("string memory")); + } else { + function.returns = Some(String::from("bytes memory")); } - None => match AND_BITMASK_REGEX_2 + } else { + // attempt to find a return type within the return memory operations + let byte_size = match AND_BITMASK_REGEX .find(&return_memory_operations_solidified) .ok() .flatten() @@ -172,109 +171,130 @@ pub fn argument_heuristic( cast.matches("ff").count() } - None => 32, - }, - }; + None => match AND_BITMASK_REGEX_2 + .find(&return_memory_operations_solidified) + .ok() + .flatten() + { + Some(bitmask) => { + let cast = bitmask.as_str(); - // convert the cast size to a string - let (_, cast_types) = byte_size_to_type(byte_size); - function.returns = Some(cast_types[0].to_string()); - } - - // check if this is a state getter - if function.arguments.is_empty() { - if let Some(storage_access) = - STORAGE_ACCESS_REGEX.find(&return_memory_operations_solidified).unwrap_or(None) - { - let storage_access = storage_access.as_str(); - let access_range = find_balanced_encapsulator(storage_access, ('[', ']')) - .map_err(|e| eyre!("failed to find access range: {e}"))?; + cast.matches("ff").count() + } + None => 32, + }, + }; - function.maybe_getter_for = - Some(format!("storage[{}]", &storage_access[access_range])); + // convert the cast size to a string + let (_, cast_types) = byte_size_to_type(byte_size); + function.returns = Some(cast_types[0].to_string()); } - } - debug!( - "return type determined to be '{:?}' from ops '{}'", - function.returns, return_memory_operations_solidified - ); - } + // check if this is a state getter + if function.arguments.is_empty() { + if let Some(storage_access) = STORAGE_ACCESS_REGEX + .find(&return_memory_operations_solidified) + .unwrap_or(None) + { + let storage_access = storage_access.as_str(); + let access_range = + find_balanced_encapsulator(storage_access, ('[', ']')) + .map_err(|e| eyre!("failed to find access range: {e}"))?; + + function.maybe_getter_for = + Some(format!("storage[{}]", &storage_access[access_range])); + } + } - // integer type heuristics - 0x02 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0b | 0x10 | 0x11 | 0x12 | 0x13 => { - // check if this instruction is operating on a known argument. - // if it is, add 'integer' to the list of heuristics - // TODO: we probably want to use an enum for heuristics - if let Some((arg_index, frame)) = function.arguments.iter_mut().find(|(_, frame)| { - state - .last_instruction - .output_operations - .iter() - .any(|operation| operation.to_string().contains(frame.arg_op.as_str())) - }) { debug!( - "instruction {} ({}) indicates argument {} may be a numeric type", - state.last_instruction.instruction, - opcode_name(state.last_instruction.opcode), - arg_index + "return type determined to be '{:?}' from ops '{}'", + function.returns, return_memory_operations_solidified ); - - frame.heuristics.insert(TypeHeuristic::Numeric); } - } - // bytes type heuristics - 0x18 | 0x1a | 0x1b | 0x1c | 0x1d | 0x20 => { - // check if this instruction is operating on a known argument. - // if it is, add 'bytes' to the list of heuristics - // TODO: we probably want to use an enum for heuristics - if let Some((arg_index, frame)) = function.arguments.iter_mut().find(|(_, frame)| { - state - .last_instruction - .output_operations - .iter() - .any(|operation| operation.to_string().contains(frame.arg_op.as_str())) - }) { - debug!( - "instruction {} ({}) indicates argument {} may be a bytes type", - state.last_instruction.instruction, - opcode_name(state.last_instruction.opcode), - arg_index - ); + // integer type heuristics + 0x02 | 0x04 | 0x05 | 0x06 | 0x07 | 0x08 | 0x09 | 0x0b | 0x10 | 0x11 | 0x12 | 0x13 => { + // check if this instruction is operating on a known argument. + // if it is, add 'integer' to the list of heuristics + // TODO: we probably want to use an enum for heuristics + if let Some((arg_index, frame)) = + function.arguments.iter_mut().find(|(_, frame)| { + state + .last_instruction + .output_operations + .iter() + .any(|operation| operation.to_string().contains(frame.arg_op.as_str())) + }) + { + debug!( + "instruction {} ({}) indicates argument {} may be a numeric type", + state.last_instruction.instruction, + opcode_name(state.last_instruction.opcode), + arg_index + ); - frame.heuristics.insert(TypeHeuristic::Bytes); + frame.heuristics.insert(TypeHeuristic::Numeric); + } } - } - // boolean type heuristics - 0x15 => { - // if this is a boolean check on CALLDATALOAD, we can add boolean to the potential types - if let Some(calldataload_op) = - state.last_instruction.input_operations.iter().find(|op| op.opcode == CALLDATALOAD) - { - // yulify the calldataload operation, and find the associated argument index - // this MUST exist, as we have already inserted it in the CALLDATALOAD heuristic - let arg_op = calldataload_op.inputs[0].to_string(); + // bytes type heuristics + 0x18 | 0x1a | 0x1b | 0x1c | 0x1d | 0x20 => { + // check if this instruction is operating on a known argument. + // if it is, add 'bytes' to the list of heuristics + // TODO: we probably want to use an enum for heuristics if let Some((arg_index, frame)) = - function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op) + function.arguments.iter_mut().find(|(_, frame)| { + state + .last_instruction + .output_operations + .iter() + .any(|operation| operation.to_string().contains(frame.arg_op.as_str())) + }) { debug!( - "instruction {} ({}) indicates argument {} may be a boolean", + "instruction {} ({}) indicates argument {} may be a bytes type", state.last_instruction.instruction, opcode_name(state.last_instruction.opcode), arg_index ); - // NOTE: we don't want to update mask_size here, as we are only adding potential - // types - frame.heuristics.insert(TypeHeuristic::Boolean); + frame.heuristics.insert(TypeHeuristic::Bytes); + } + } + + // boolean type heuristics + 0x15 => { + // if this is a boolean check on CALLDATALOAD, we can add boolean to the potential + // types + if let Some(calldataload_op) = state + .last_instruction + .input_operations + .iter() + .find(|op| op.opcode == CALLDATALOAD) + { + // yulify the calldataload operation, and find the associated argument index + // this MUST exist, as we have already inserted it in the CALLDATALOAD heuristic + let arg_op = calldataload_op.inputs[0].to_string(); + if let Some((arg_index, frame)) = + function.arguments.iter_mut().find(|(_, frame)| frame.arg_op == arg_op) + { + debug!( + "instruction {} ({}) indicates argument {} may be a boolean", + state.last_instruction.instruction, + opcode_name(state.last_instruction.opcode), + arg_index + ); + + // NOTE: we don't want to update mask_size here, as we are only adding + // potential types + frame.heuristics.insert(TypeHeuristic::Boolean); + } } } - } - _ => {} - }; + _ => {} + }; - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/heuristics/events.rs b/crates/decompile/src/utils/heuristics/events.rs index 3d630fcc..29676b68 100644 --- a/crates/decompile/src/utils/heuristics/events.rs +++ b/crates/decompile/src/utils/heuristics/events.rs @@ -1,5 +1,6 @@ use alloy::primitives::U256; use eyre::OptionExt; +use futures::future::BoxFuture; use heimdall_common::utils::hex::ToLowerHex; use heimdall_vm::core::vm::State; @@ -9,64 +10,71 @@ use crate::{ Error, }; -pub fn event_heuristic( - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, -) -> Result<(), Error> { - if (0xA0..=0xA4).contains(&state.last_instruction.opcode) { - // this should be the last event in state - let event = state.events.last().ok_or_eyre("no events in state")?; - let selector = event.topics.first().unwrap_or(&U256::ZERO).to_owned(); - let anonymous = selector == U256::ZERO; +pub fn event_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + if (0xA0..=0xA4).contains(&state.last_instruction.opcode) { + // this should be the last event in state + let event = state.events.last().ok_or_eyre("no events in state")?; + let selector = event.topics.first().unwrap_or(&U256::ZERO).to_owned(); + let anonymous = selector == U256::ZERO; - // insert this selector into events - function.events.insert(selector); + // insert this selector into events + function.events.insert(selector); - // decode the data field - let data_mem_ops = function - .get_memory_range(state.last_instruction.inputs[0], state.last_instruction.inputs[1]); - let data_mem_ops_solidified = - data_mem_ops.iter().map(|x| x.operation.solidify()).collect::>().join(", "); + // decode the data field + let data_mem_ops = function.get_memory_range( + state.last_instruction.inputs[0], + state.last_instruction.inputs[1], + ); + let data_mem_ops_solidified = data_mem_ops + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", "); - // add the event emission to the function's logic - if analyzer_state.analyzer_type == AnalyzerType::Solidity { - function.logic.push(format!( - "emit Event_{}({}{});{}", - &event - .topics - .first() - .unwrap_or(&U256::from(0)) - .to_lower_hex() - .replacen("0x", "", 1)[0..8], - event - .topics - .get(1..) - .map(|topics| { - if !event.data.is_empty() && !topics.is_empty() { - let mut solidified_topics: Vec = Vec::new(); - for (i, _) in topics.iter().enumerate() { - solidified_topics.push( - state.last_instruction.input_operations[i + 3].solidify(), - ); + // add the event emission to the function's logic + if analyzer_state.analyzer_type == AnalyzerType::Solidity { + function.logic.push(format!( + "emit Event_{}({}{});{}", + &event + .topics + .first() + .unwrap_or(&U256::from(0)) + .to_lower_hex() + .replacen("0x", "", 1)[0..8], + event + .topics + .get(1..) + .map(|topics| { + if !event.data.is_empty() && !topics.is_empty() { + let mut solidified_topics: Vec = Vec::new(); + for (i, _) in topics.iter().enumerate() { + solidified_topics.push( + state.last_instruction.input_operations[i + 3].solidify(), + ); + } + format!("{}, ", solidified_topics.join(", ")) + } else { + let mut solidified_topics: Vec = Vec::new(); + for (i, _) in topics.iter().enumerate() { + solidified_topics.push( + state.last_instruction.input_operations[i + 3].solidify(), + ); + } + solidified_topics.join(", ") } - format!("{}, ", solidified_topics.join(", ")) - } else { - let mut solidified_topics: Vec = Vec::new(); - for (i, _) in topics.iter().enumerate() { - solidified_topics.push( - state.last_instruction.input_operations[i + 3].solidify(), - ); - } - solidified_topics.join(", ") - } - }) - .unwrap_or("".to_string()), - data_mem_ops_solidified, - if anonymous { " // anonymous event" } else { "" } - )); + }) + .unwrap_or("".to_string()), + data_mem_ops_solidified, + if anonymous { " // anonymous event" } else { "" } + )); + } } - } - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/heuristics/extcall.rs b/crates/decompile/src/utils/heuristics/extcall.rs new file mode 100644 index 00000000..b4c51305 --- /dev/null +++ b/crates/decompile/src/utils/heuristics/extcall.rs @@ -0,0 +1,223 @@ +use alloy::primitives::U256; +use futures::future::BoxFuture; +use heimdall_common::utils::{hex::ToLowerHex, strings::encode_hex_reduced}; +use heimdall_vm::{ + core::{opcodes::opcode_name, vm::State}, + w_gas, w_push0, +}; +use tracing::trace; + +use crate::{ + core::analyze::AnalyzerState, interfaces::AnalyzedFunction, + utils::precompile::decode_precompile, Error, +}; +use heimdall_decoder::{decode, DecodeArgsBuilder}; + +pub fn extcall_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + _: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + let instruction = &state.last_instruction; + + match instruction.opcode { + // CALL / CALLCODE + 0xf1 | 0xf2 => { + let address = instruction.input_operations[1].solidify(); + let memory = + function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); + let extcalldata = memory + .iter() + .map(|x| x.value.to_lower_hex().to_owned()) + .collect::>() + .join(""); + let gas_solidified = instruction.input_operations[0].solidify(); + let value_solidified = instruction.input_operations[2].solidify(); + + // if gas is 2,300, this is a value transfer + if gas_solidified.contains("0x08fc") { + trace!( + "instruction {} ({}) with 2300 gas indicates a value transfer", + instruction.instruction, + opcode_name(instruction.opcode) + ); + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).transfer({});", + address, value_solidified + )); + return Ok(()); + } + if extcalldata.is_empty() { + trace!( + "instruction {} ({}) with no calldata indicates a value transfer", + instruction.instruction, + opcode_name(instruction.opcode) + ); + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).transfer({});", + address, value_solidified + )); + return Ok(()); + } + + let extcalldata_clone = extcalldata.clone(); + let decoded = decode( + DecodeArgsBuilder::new() + .target(extcalldata_clone) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await + .ok(); + + // build modifiers + // - if gas is just the default (GAS()), we don't need to include it + // - if value is just the default (0), we don't need to include it + let mut modifiers = vec![]; + if instruction.input_operations[0] != w_gas!() { + modifiers.push(format!("gas: {}", gas_solidified)); + } + if instruction.input_operations[2] != w_push0!() { + // if the value is just a hex string, we can parse it as ether for readability + if let Ok(value) = + u128::from_str_radix(value_solidified.trim_start_matches("0x"), 16) + { + let ether_value = value as f64 / 10_f64.powi(18); + modifiers.push(format!("value: {} ether", ether_value)); + } else { + modifiers.push(format!("value: {}", value_solidified)); + } + } + let modifier = if modifiers.is_empty() { + "".to_string() + } else { + format!("{{ {} }}", modifiers.join(", ")) + }; + + // check if the external call is a precompiled contract + if let Some(precompile_logic) = decode_precompile( + instruction.inputs[1], + &memory, + &instruction.input_operations[5], + ) { + function.logic.push(precompile_logic); + } else if let Some(decoded) = decoded { + let start_slot = instruction.inputs[3] + U256::from(4); + + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}({}); // {}", + address, + modifier, + decoded.decoded.name, + decoded + .decoded + .inputs + .iter() + .enumerate() + .map(|(i, _)| { + format!( + "memory[{}]", + encode_hex_reduced(start_slot + U256::from(i * 32)) + ) + }) + .collect::>() + .join(", "), + opcode_name(instruction.opcode).to_lowercase(), + )); + } else { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(msg.data[{}:{}]); // {}", + address, + extcalldata.get(2..10).unwrap_or(""), + modifier, + instruction.input_operations[3].solidify(), + instruction.input_operations[4].solidify(), + opcode_name(instruction.opcode).to_lowercase(), + )); + } + } + + // STATICCALL / DELEGATECALL + 0xfa | 0xf4 => { + let gas = format!("gas: {}", instruction.input_operations[0].solidify()); + let address = instruction.input_operations[1].solidify(); + let memory = + function.get_memory_range(instruction.inputs[2], instruction.inputs[3]); + let extcalldata = memory + .iter() + .map(|x| x.value.to_lower_hex().trim_start_matches("0x").to_owned()) + .collect::>() + .join(""); + + let extcalldata_clone = extcalldata.clone(); + let decoded = decode( + DecodeArgsBuilder::new() + .target(extcalldata_clone) + .raw(true) + .build() + .expect("Failed to build DecodeArgs"), + ) + .await + .ok(); + + // build the modifier w/ gas + // if the modifier is just the default (GAS()), we don't need to include it + let modifier = if instruction.input_operations[0] != w_gas!() { + format!("{{ {} }}", gas) + } else { + "".to_string() + }; + + // check if the external call is a precompiled contract + if let Some(precompile_logic) = decode_precompile( + instruction.inputs[1], + &memory, + &instruction.input_operations[4], + ) { + function.logic.push(precompile_logic); + } else if let Some(decoded) = decoded { + let start_slot = instruction.inputs[2] + U256::from(4); + + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).{}{}({}); // {}", + address, + modifier, + decoded.decoded.name, + decoded + .decoded + .inputs + .iter() + .enumerate() + .map(|(i, _)| { + format!( + "memory[{}]", + encode_hex_reduced(start_slot + U256::from(i * 32)) + ) + }) + .collect::>() + .join(", "), + opcode_name(instruction.opcode).to_lowercase(), + )); + } else { + function.logic.push(format!( + "(bool success, bytes memory ret0) = address({}).Unresolved_{}{}(memory[{}:{}]); // {}", + address, + extcalldata.get(2..10).unwrap_or(""), + modifier, + instruction.input_operations[2].solidify(), + instruction.input_operations[3].solidify(), + opcode_name(instruction.opcode).to_lowercase(), + )); + } + } + + _ => {} + }; + + Ok(()) + }) +} + +// TODO: handle skip_resolving (need to fix in inspect mod too) diff --git a/crates/decompile/src/utils/heuristics/mod.rs b/crates/decompile/src/utils/heuristics/mod.rs index bc7881bc..604ec163 100644 --- a/crates/decompile/src/utils/heuristics/mod.rs +++ b/crates/decompile/src/utils/heuristics/mod.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use heimdall_vm::core::vm::State; use crate::{core::analyze::AnalyzerState, interfaces::AnalyzedFunction, Error}; @@ -5,6 +6,7 @@ use crate::{core::analyze::AnalyzerState, interfaces::AnalyzedFunction, Error}; // import heuristics mod arguments; mod events; +mod extcall; mod modifiers; mod solidity; mod yul; @@ -12,30 +14,34 @@ mod yul; // re-export heuristics pub use arguments::argument_heuristic; pub use events::event_heuristic; +pub use extcall::extcall_heuristic; pub use modifiers::modifier_heuristic; pub use solidity::solidity_heuristic; pub use yul::yul_heuristic; /// A heuristic is a function that takes a function and a state and modifies the function based on /// the state +type HeuristicFn = for<'a> fn( + &'a mut AnalyzedFunction, + &'a State, + &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>>; + pub(crate) struct Heuristic { - implementation: fn(&mut AnalyzedFunction, &State, &mut AnalyzerState) -> Result<(), Error>, + implementation: HeuristicFn, } impl Heuristic { - pub fn new( - implementation: fn(&mut AnalyzedFunction, &State, &mut AnalyzerState) -> Result<(), Error>, - ) -> Self { + pub fn new(implementation: HeuristicFn) -> Self { Self { implementation } } - /// Run the heuristic implementation on the given state - pub fn run( + pub async fn run<'a>( &self, - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, ) -> Result<(), Error> { - (self.implementation)(function, state, analyzer_state) + (self.implementation)(function, state, analyzer_state).await } } diff --git a/crates/decompile/src/utils/heuristics/modifiers.rs b/crates/decompile/src/utils/heuristics/modifiers.rs index a66c7b50..118a9ef4 100644 --- a/crates/decompile/src/utils/heuristics/modifiers.rs +++ b/crates/decompile/src/utils/heuristics/modifiers.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use heimdall_vm::{ core::{ opcodes::{OpCodeInfo, JUMPI}, @@ -9,45 +10,47 @@ use tracing::debug; use crate::{core::analyze::AnalyzerState, interfaces::AnalyzedFunction, Error}; -pub fn modifier_heuristic( - function: &mut AnalyzedFunction, - state: &State, - _: &mut AnalyzerState, -) -> Result<(), Error> { - let opcode_info = OpCodeInfo::from(state.last_instruction.opcode); +pub fn modifier_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + _: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + let opcode_info = OpCodeInfo::from(state.last_instruction.opcode); - // if any instruction is non-pure, the function is non-pure - if function.pure && !opcode_info.is_pure() { - debug!( - "instruction {} ({}) indicates a non-pure function", - state.last_instruction.instruction, - opcode_info.name() - ); - function.pure = false; - } + // if any instruction is non-pure, the function is non-pure + if function.pure && !opcode_info.is_pure() { + debug!( + "instruction {} ({}) indicates a non-pure function", + state.last_instruction.instruction, + opcode_info.name() + ); + function.pure = false; + } - // if any instruction is non-view, the function is non-view - if function.view && !opcode_info.is_view() { - debug!( - "instruction {} ({}) indicates a non-view function", - state.last_instruction.instruction, - opcode_info.name() - ); - function.view = false; - } + // if any instruction is non-view, the function is non-view + if function.view && !opcode_info.is_view() { + debug!( + "instruction {} ({}) indicates a non-view function", + state.last_instruction.instruction, + opcode_info.name() + ); + function.view = false; + } - // if the instruction is a JUMPI with non-zero CALLVALUE requirement, the function is - // non-payable exactly: ISZERO(CALLVALUE()) - if function.payable - && state.last_instruction.opcode == JUMPI - && state.last_instruction.input_operations[1] == w_iszero!(w_callvalue!()) - { - debug!( - "conditional at instruction {} indicates a non-payable function", - state.last_instruction.instruction - ); - function.payable = false; - } + // if the instruction is a JUMPI with non-zero CALLVALUE requirement, the function is + // non-payable exactly: ISZERO(CALLVALUE()) + if function.payable && + state.last_instruction.opcode == JUMPI && + state.last_instruction.input_operations[1] == w_iszero!(w_callvalue!()) + { + debug!( + "conditional at instruction {} indicates a non-payable function", + state.last_instruction.instruction + ); + function.payable = false; + } - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/heuristics/solidity.rs b/crates/decompile/src/utils/heuristics/solidity.rs index f3a70e59..d2707861 100644 --- a/crates/decompile/src/utils/heuristics/solidity.rs +++ b/crates/decompile/src/utils/heuristics/solidity.rs @@ -1,315 +1,252 @@ use alloy::primitives::U256; use alloy_dyn_abi::{DynSolType, DynSolValue}; +use futures::future::BoxFuture; use heimdall_common::utils::strings::encode_hex_reduced; -use heimdall_vm::core::{opcodes::opcode_name, vm::State}; +use heimdall_vm::core::vm::State; use crate::{ core::analyze::AnalyzerState, interfaces::{AnalyzedFunction, StorageFrame}, - utils::{constants::VARIABLE_SIZE_CHECK_REGEX, precompile::decode_precompile}, + utils::constants::VARIABLE_SIZE_CHECK_REGEX, Error, }; -pub fn solidity_heuristic( - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, -) -> Result<(), Error> { - let instruction = &state.last_instruction; - - match instruction.opcode { - // CALLDATACOPY - 0x37 => { - let memory_offset = &instruction.input_operations[0]; - let source_offset = instruction.inputs[1]; - let size_bytes = instruction.inputs[2]; - - // add the mstore to the function's memory map - function.logic.push(format!( - "memory[{}] = msg.data[{}:{}];", - memory_offset.solidify(), - source_offset, - source_offset.saturating_add(size_bytes) - )); - } - - // CODECOPY - 0x39 => { - let memory_offset = &instruction.input_operations[0]; - let source_offset = instruction.inputs[1]; - let size_bytes = instruction.inputs[2]; - - // add the mstore to the function's memory map - function.logic.push(format!( - "memory[{}] = this.code[{}:{}];", - memory_offset.solidify(), - source_offset, - source_offset.saturating_add(size_bytes) - )); - } - - // EXTCODECOPY - 0x3C => { - let address = &instruction.input_operations[0]; - let memory_offset = &instruction.input_operations[1]; - let source_offset = instruction.inputs[2]; - let size_bytes = instruction.inputs[3]; - - // add the mstore to the function's memory map - function.logic.push(format!( - "memory[{}] = address({}).code[{}:{}]", - memory_offset.solidify(), - address.solidify(), - source_offset, - source_offset.saturating_add(size_bytes) - )); - } - - // MSTORE / MSTORE8 - 0x52 | 0x53 => { - let key = instruction.inputs[0]; - let operation = instruction.input_operations[1].to_owned(); - - // add the mstore to the function's memory map - function.memory.insert(key, StorageFrame { operation }); - function.logic.push(format!( - "memory[{}] = {};", - encode_hex_reduced(key), - instruction.input_operations[1].solidify() - )); - } - - // SSTORE - 0x55 => { - function.logic.push(format!( - "storage[{}] = {};", - instruction.input_operations[0].solidify(), - instruction.input_operations[1].solidify(), - )); - } - - // JUMPI - 0x57 => { - // this is an if conditional for the children branches - let conditional = instruction.input_operations[1].solidify(); - - // perform a series of checks to determine if the condition - // is added by the compiler and can be ignored - if (conditional.contains("msg.data.length") && conditional.contains("0x04")) - || VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) - || (conditional.replace('!', "") == "success") - || (conditional == "!msg.value") - { - return Ok(()); +pub fn solidity_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + let instruction = &state.last_instruction; + + match instruction.opcode { + // CALLDATACOPY + 0x37 => { + let memory_offset = &instruction.input_operations[0]; + let source_offset = instruction.inputs[1]; + let size_bytes = instruction.inputs[2]; + + // add the mstore to the function's memory map + function.logic.push(format!( + "memory[{}] = msg.data[{}:{}];", + memory_offset.solidify(), + source_offset, + source_offset.saturating_add(size_bytes) + )); } - function.logic.push(format!("if ({conditional}) {{").to_string()); - - // save a copy of the conditional and add it to the conditional map - analyzer_state.jumped_conditional = Some(conditional.clone()); - analyzer_state.conditional_stack.push(conditional); - } - - // TSTORE - 0x5d => { - function.logic.push(format!( - "transient[{}] = {};", - instruction.input_operations[0].solidify(), - instruction.input_operations[1].solidify(), - )); - } - - // CREATE / CREATE2 - 0xf0 | 0xf5 => { - function.logic.push(format!( - "assembly {{ addr := create({}) }}", - instruction - .input_operations - .iter() - .map(|x| x.solidify()) - .collect::>() - .join(", ") - )); - } + // CODECOPY + 0x39 => { + let memory_offset = &instruction.input_operations[0]; + let source_offset = instruction.inputs[1]; + let size_bytes = instruction.inputs[2]; + + // add the mstore to the function's memory map + function.logic.push(format!( + "memory[{}] = this.code[{}:{}];", + memory_offset.solidify(), + source_offset, + source_offset.saturating_add(size_bytes) + )); + } - // CALL / CALLCODE - 0xf1 | 0xf2 => { - let gas = format!("gas: {}", instruction.input_operations[0].solidify()); - let address = instruction.input_operations[1].solidify(); - let value = format!("value: {}", instruction.input_operations[2].solidify()); - let calldata = function.get_memory_range(instruction.inputs[3], instruction.inputs[4]); + // EXTCODECOPY + 0x3C => { + let address = &instruction.input_operations[0]; + let memory_offset = &instruction.input_operations[1]; + let source_offset = instruction.inputs[2]; + let size_bytes = instruction.inputs[3]; + + // add the mstore to the function's memory map + function.logic.push(format!( + "memory[{}] = address({}).code[{}:{}]", + memory_offset.solidify(), + address.solidify(), + source_offset, + source_offset.saturating_add(size_bytes) + )); + } - // build the modifier w/ gas and value - let modifier = format!("{{ {}, {} }}", gas, value); + // MSTORE / MSTORE8 + 0x52 | 0x53 => { + let key = instruction.inputs[0]; + let value = instruction.inputs[1]; + let operation = instruction.input_operations[1].to_owned(); + + // add the mstore to the function's memory map + function.memory.insert(key, StorageFrame { operation, value }); + function.logic.push(format!( + "memory[{}] = {};", + encode_hex_reduced(key), + instruction.input_operations[1].solidify() + )); + } - // check if the external call is a precompiled contract - match decode_precompile( - instruction.inputs[1], - &calldata, - &instruction.input_operations[5], - ) { - (true, precompile_logic) => { - function.logic.push(precompile_logic); - } - _ => { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).call{}(abi.encode({}));", - address, - modifier, - calldata - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - )); - } + // SSTORE + 0x55 => { + function.logic.push(format!( + "storage[{}] = {};", + instruction.input_operations[0].solidify(), + instruction.input_operations[1].solidify(), + )); } - } - // STATICCALL / DELEGATECALL - 0xfa | 0xf4 => { - let gas = format!("gas: {}", instruction.input_operations[0].solidify()); - let address = instruction.input_operations[1].solidify(); - let calldata = function.get_memory_range(instruction.inputs[2], instruction.inputs[3]); + // JUMPI + 0x57 => { + // this is an if conditional for the children branches + let conditional = instruction.input_operations[1].solidify(); + + // perform a series of checks to determine if the condition + // is added by the compiler and can be ignored + if (conditional.contains("msg.data.length") && conditional.contains("0x04")) || + VARIABLE_SIZE_CHECK_REGEX.is_match(&conditional).unwrap_or(false) || + (conditional.replace('!', "") == "success") || + (conditional == "!msg.value") + { + return Ok(()); + } - // build the modifier w/ gas - let modifier = format!("{{ {} }}", gas); + function.logic.push(format!("if ({conditional}) {{").to_string()); - // check if the external call is a precompiled contract - match decode_precompile( - instruction.inputs[1], - &calldata, - &instruction.input_operations[4], - ) { - (true, precompile_logic) => { - function.logic.push(precompile_logic); - } - _ => { - function.logic.push(format!( - "(bool success, bytes memory ret0) = address({}).{}{}(abi.encode({}));", - address, - opcode_name(instruction.opcode).to_lowercase(), - modifier, - calldata - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - )); - } + // save a copy of the conditional and add it to the conditional map + analyzer_state.jumped_conditional = Some(conditional.clone()); + analyzer_state.conditional_stack.push(conditional); } - } - // REVERT - 0xfd => { - // Safely convert U256 to usize - let offset: usize = instruction.inputs[0].try_into().unwrap_or(0); - let size: usize = instruction.inputs[1].try_into().unwrap_or(0); - let revert_data = state.memory.read(offset, size); + // TSTORE + 0x5d => { + function.logic.push(format!( + "transient[{}] = {};", + instruction.input_operations[0].solidify(), + instruction.input_operations[1].solidify(), + )); + } - // (1) if revert_data starts with 0x08c379a0, the folling is an error string - // abiencoded (2) if revert_data starts with 0x4e487b71, the - // following is a compiler panic (3) if revert_data starts with any - // other 4byte selector, it is a custom error and should - // be resolved and added to the generated ABI - // (4) if revert_data is empty, it is an empty revert. Ex: - // - if (true != false) { revert() }; - // - require(true != false) - let revert_logic; + // CREATE / CREATE2 + 0xf0 | 0xf5 => { + function.logic.push(format!( + "assembly {{ addr := create({}) }}", + instruction + .input_operations + .iter() + .map(|x| x.solidify()) + .collect::>() + .join(", ") + )); + } - // handle case with error string abiencoded - if revert_data.starts_with(&[0x08, 0xc3, 0x79, 0xa0]) { - let revert_string = match revert_data.get(4..) { - Some(hex_data) => match DynSolType::String.abi_decode(hex_data) { - Ok(revert) => match revert { - DynSolValue::String(revert) => revert, - _ => "decoding error".to_string(), + // REVERT + 0xfd => { + // Safely convert U256 to usize + let offset: usize = instruction.inputs[0].try_into().unwrap_or(0); + let size: usize = instruction.inputs[1].try_into().unwrap_or(0); + let revert_data = state.memory.read(offset, size); + + // (1) if revert_data starts with 0x08c379a0, the folling is an error string + // abiencoded (2) if revert_data starts with 0x4e487b71, the + // following is a compiler panic (3) if revert_data starts with any + // other 4byte selector, it is a custom error and should + // be resolved and added to the generated ABI + // (4) if revert_data is empty, it is an empty revert. Ex: + // - if (true != false) { revert() }; + // - require(true != false) + let revert_logic; + + // handle case with error string abiencoded + if revert_data.starts_with(&[0x08, 0xc3, 0x79, 0xa0]) { + let revert_string = match revert_data.get(4..) { + Some(hex_data) => match DynSolType::String.abi_decode(hex_data) { + Ok(revert) => match revert { + DynSolValue::String(revert) => revert, + _ => "decoding error".to_string(), + }, + Err(_) => "decoding error".to_string(), }, - Err(_) => "decoding error".to_string(), - }, - None => "decoding error".to_string(), - }; - revert_logic = match analyzer_state.jumped_conditional.clone() { - Some(condition) => { - analyzer_state.jumped_conditional = None; - format!("require({condition}, \"{revert_string}\");") - } - None => { - // loop backwards through logic to find the last IF statement - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - let conditional = match analyzer_state.conditional_stack.pop() { - Some(condition) => condition, - None => break, - }; - - function.logic[i] = - format!("require({conditional}, \"{revert_string}\");"); + None => "decoding error".to_string(), + }; + revert_logic = match analyzer_state.jumped_conditional.clone() { + Some(condition) => { + analyzer_state.jumped_conditional = None; + format!("require({condition}, \"{revert_string}\");") + } + None => { + // loop backwards through logic to find the last IF statement + for i in (0..function.logic.len()).rev() { + if function.logic[i].starts_with("if") { + let conditional = match analyzer_state.conditional_stack.pop() { + Some(condition) => condition, + None => break, + }; + + function.logic[i] = + format!("require({conditional}, \"{revert_string}\");"); + } } + return Ok(()); } - return Ok(()); } } - } - // handle case with custom error OR empty revert - else if !revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { - let custom_error_placeholder = match revert_data.get(0..4) { - Some(selector) => { - function.errors.insert(U256::from_be_slice(selector)); - format!( - "CustomError_{}()", - encode_hex_reduced(U256::from_be_slice(selector)).replacen("0x", "", 1) - ) - } - None => "()".to_string(), - }; - - revert_logic = match analyzer_state.jumped_conditional.clone() { - Some(condition) => { - analyzer_state.jumped_conditional = None; - if custom_error_placeholder == *"()" { - format!("require({condition});",) - } else { - format!("require({condition}, {custom_error_placeholder});") + // handle case with custom error OR empty revert + else if !revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { + let custom_error_placeholder = match revert_data.get(0..4) { + Some(selector) => { + function.errors.insert(U256::from_be_slice(selector)); + format!( + "CustomError_{}()", + encode_hex_reduced(U256::from_be_slice(selector)) + .replacen("0x", "", 1) + ) } - } - None => { - // loop backwards through logic to find the last IF statement - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - let conditional = match analyzer_state.conditional_stack.pop() { - Some(condition) => condition, - None => break, - }; - - if custom_error_placeholder == *"()" { - function.logic[i] = format!("require({conditional});",); - } else { - function.logic[i] = format!( - "require({conditional}, {custom_error_placeholder});" - ); + None => "()".to_string(), + }; + + revert_logic = match analyzer_state.jumped_conditional.clone() { + Some(condition) => { + analyzer_state.jumped_conditional = None; + if custom_error_placeholder == *"()" { + format!("require({condition});",) + } else { + format!("require({condition}, {custom_error_placeholder});") + } + } + None => { + // loop backwards through logic to find the last IF statement + for i in (0..function.logic.len()).rev() { + if function.logic[i].starts_with("if") { + let conditional = match analyzer_state.conditional_stack.pop() { + Some(condition) => condition, + None => break, + }; + + if custom_error_placeholder == *"()" { + function.logic[i] = format!("require({conditional});",); + } else { + function.logic[i] = format!( + "require({conditional}, {custom_error_placeholder});" + ); + } } } + return Ok(()); } - return Ok(()); } + } else { + return Ok(()); } - } else { - return Ok(()); - } - function.logic.push(revert_logic); - } + function.logic.push(revert_logic); + } - // SELFDESTRUCT - 0xff => { - function - .logic - .push(format!("selfdestruct({});", instruction.input_operations[0].solidify())); - } + // SELFDESTRUCT + 0xff => { + function + .logic + .push(format!("selfdestruct({});", instruction.input_operations[0].solidify())); + } - _ => {} - }; + _ => {} + }; - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/heuristics/yul.rs b/crates/decompile/src/utils/heuristics/yul.rs index 18805609..ba845089 100644 --- a/crates/decompile/src/utils/heuristics/yul.rs +++ b/crates/decompile/src/utils/heuristics/yul.rs @@ -1,3 +1,4 @@ +use futures::future::BoxFuture; use heimdall_common::utils::strings::encode_hex_reduced; use heimdall_vm::core::{opcodes::opcode_name, vm::State}; @@ -7,91 +8,95 @@ use crate::{ Error, }; -pub fn yul_heuristic( - function: &mut AnalyzedFunction, - state: &State, - analyzer_state: &mut AnalyzerState, -) -> Result<(), Error> { - let instruction = &state.last_instruction; +pub fn yul_heuristic<'a>( + function: &'a mut AnalyzedFunction, + state: &'a State, + analyzer_state: &'a mut AnalyzerState, +) -> BoxFuture<'a, Result<(), Error>> { + Box::pin(async move { + let instruction = &state.last_instruction; - match instruction.opcode { - // MSTORE / MSTORE8 - 0x52 | 0x53 => { - let key = instruction.inputs[0]; - let operation = instruction.input_operations[1].clone(); + match instruction.opcode { + // MSTORE / MSTORE8 + 0x52 | 0x53 => { + let key = instruction.inputs[0]; + let value = instruction.inputs[1]; + let operation = instruction.input_operations[1].clone(); - // add the mstore to the function's memory map - function.memory.insert(key, StorageFrame { operation }); - function.logic.push(format!( - "{}({}, {})", - opcode_name(instruction.opcode).to_lowercase(), - encode_hex_reduced(key), - instruction.input_operations[1].yulify() - )); - } + // add the mstore to the function's memory map + function.memory.insert(key, StorageFrame { operation, value }); + function.logic.push(format!( + "{}({}, {})", + opcode_name(instruction.opcode).to_lowercase(), + encode_hex_reduced(key), + instruction.input_operations[1].yulify() + )); + } - // JUMPI - 0x57 => { - let conditional = instruction.input_operations[1].yulify(); + // JUMPI + 0x57 => { + let conditional = instruction.input_operations[1].yulify(); - function.logic.push(format!("if {conditional} {{").to_string()); - analyzer_state.jumped_conditional = Some(conditional.clone()); - analyzer_state.conditional_stack.push(conditional); - } + function.logic.push(format!("if {conditional} {{").to_string()); + analyzer_state.jumped_conditional = Some(conditional.clone()); + analyzer_state.conditional_stack.push(conditional); + } - // REVERT - 0xfd => { - let revert_data = state.memory.read( - instruction.inputs[0].try_into().unwrap_or(0), - instruction.inputs[1].try_into().unwrap_or(0), - ); + // REVERT + 0xfd => { + let revert_data = state.memory.read( + instruction.inputs[0].try_into().unwrap_or(0), + instruction.inputs[1].try_into().unwrap_or(0), + ); - // ignore compiler panics, we will reach these due to symbolic execution - if revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { - return Ok(()); - } + // ignore compiler panics, we will reach these due to symbolic execution + if revert_data.starts_with(&[0x4e, 0x48, 0x7b, 0x71]) { + return Ok(()); + } - // find the if statement that caused this revert, and update it to include the revert - for i in (0..function.logic.len()).rev() { - if function.logic[i].starts_with("if") { - // get matching conditional - let conditional = function.logic[i].split("if ").collect::>()[1] - .split(" {") - .collect::>()[0] - .to_string(); + // find the if statement that caused this revert, and update it to include the + // revert + for i in (0..function.logic.len()).rev() { + if function.logic[i].starts_with("if") { + // get matching conditional + let conditional = function.logic[i].split("if ").collect::>()[1] + .split(" {") + .collect::>()[0] + .to_string(); - // we can negate the conditional to get the revert logic - function.logic[i] = format!( - "if {conditional} {{ revert({}, {}); }} else {{", - instruction.input_operations[0].yulify(), - instruction.input_operations[1].yulify() - ); + // we can negate the conditional to get the revert logic + function.logic[i] = format!( + "if {conditional} {{ revert({}, {}); }} else {{", + instruction.input_operations[0].yulify(), + instruction.input_operations[1].yulify() + ); - break; + break; + } } } - } - // STATICCALL, CALL, CALLCODE, DELEGATECALL, CREATE, CREATE2 - // CALLDATACOPY, CODECOPY, EXTCODECOPY, RETURNDATACOPY, TSTORE, - // SSTORE, RETURN, SELFDESTRUCT, LOG0, LOG1, LOG2, LOG3, LOG4 - // we simply want to add the operation to the function's logic - 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa - | 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { - function.logic.push(format!( - "{}({})", - opcode_name(instruction.opcode).to_lowercase(), - instruction - .input_operations - .iter() - .map(|x| x.yulify()) - .collect::>() - .join(", ") - )); - } + // STATICCALL, CALL, CALLCODE, DELEGATECALL, CREATE, CREATE2 + // CALLDATACOPY, CODECOPY, EXTCODECOPY, RETURNDATACOPY, TSTORE, + // SSTORE, RETURN, SELFDESTRUCT, LOG0, LOG1, LOG2, LOG3, LOG4 + // we simply want to add the operation to the function's logic + 0x37 | 0x39 | 0x3c | 0x3e | 0x55 | 0x5d | 0xf0 | 0xf1 | 0xf2 | 0xf4 | 0xf5 | 0xfa | + 0xff | 0xA0 | 0xA1 | 0xA2 | 0xA3 | 0xA4 => { + function.logic.push(format!( + "{}({})", + opcode_name(instruction.opcode).to_lowercase(), + instruction + .input_operations + .iter() + .map(|x| x.yulify()) + .collect::>() + .join(", ") + )); + } - _ => {} - }; + _ => {} + }; - Ok(()) + Ok(()) + }) } diff --git a/crates/decompile/src/utils/postprocessors/memory.rs b/crates/decompile/src/utils/postprocessors/memory.rs index 538ebfad..052133fc 100644 --- a/crates/decompile/src/utils/postprocessors/memory.rs +++ b/crates/decompile/src/utils/postprocessors/memory.rs @@ -52,9 +52,9 @@ pub fn memory_postprocessor( // infer the type from args and vars in the expression for (var, var_type) in state.memory_type_map.iter() { - if line.contains(var) - && !state.memory_type_map.contains_key(&var_name) - && !var_type.is_empty() + if line.contains(var) && + !state.memory_type_map.contains_key(&var_name) && + !var_type.is_empty() { *line = format!("{var_type} {line}"); state.memory_type_map.insert(var_name.to_string(), var_type.to_string()); @@ -78,8 +78,8 @@ pub fn memory_postprocessor( } // we can do some type inference here - if ["+", "-", "/", "*", "int", ">=", "<="].iter().any(|op| line.contains(op)) - || assignment[1].replace(';', "").parse::().is_ok() + if ["+", "-", "/", "*", "int", ">=", "<="].iter().any(|op| line.contains(op)) || + assignment[1].replace(';', "").parse::().is_ok() { *line = format!("uint256 {line}"); state.memory_type_map.insert(var_name.to_string(), "uint256".to_string()); diff --git a/crates/decompile/src/utils/postprocessors/storage.rs b/crates/decompile/src/utils/postprocessors/storage.rs index 74ec98e1..d5620a36 100644 --- a/crates/decompile/src/utils/postprocessors/storage.rs +++ b/crates/decompile/src/utils/postprocessors/storage.rs @@ -64,8 +64,8 @@ pub fn storage_postprocessor( } // if there is an assignment to a memory variable, save it to variable_map - if (line.trim().starts_with("store_") || line.trim().starts_with("storage_map_")) - && line.contains(" = ") + if (line.trim().starts_with("store_") || line.trim().starts_with("storage_map_")) && + line.contains(" = ") { let assignment: Vec = line.split(" = ").collect::>().iter().map(|x| x.to_string()).collect(); diff --git a/crates/decompile/src/utils/postprocessors/transient.rs b/crates/decompile/src/utils/postprocessors/transient.rs index eb9ebe76..93f7e28f 100644 --- a/crates/decompile/src/utils/postprocessors/transient.rs +++ b/crates/decompile/src/utils/postprocessors/transient.rs @@ -64,8 +64,8 @@ pub fn transient_postprocessor( } // if there is an assignment to a memory variable, save it to variable_map - if (line.trim().starts_with("tstore_") || line.trim().starts_with("transient_map_")) - && line.contains(" = ") + if (line.trim().starts_with("tstore_") || line.trim().starts_with("transient_map_")) && + line.contains(" = ") { let assignment: Vec = line.split(" = ").collect::>().iter().map(|x| x.to_string()).collect(); diff --git a/crates/decompile/src/utils/postprocessors/variable.rs b/crates/decompile/src/utils/postprocessors/variable.rs index 01dca862..97f88da8 100644 --- a/crates/decompile/src/utils/postprocessors/variable.rs +++ b/crates/decompile/src/utils/postprocessors/variable.rs @@ -12,8 +12,8 @@ pub fn variable_postprocessor( .chain(state.transient_map.iter()) .for_each(|(variable, expr)| { // skip exprs that are already variables - if !expr.contains(' ') - && ["store", "tstore", "transient", "storage", "var"] + if !expr.contains(' ') && + ["store", "tstore", "transient", "storage", "var"] .iter() .any(|x| expr.starts_with(x)) { diff --git a/crates/decompile/src/utils/precompile.rs b/crates/decompile/src/utils/precompile.rs index b63aa30c..014acb49 100644 --- a/crates/decompile/src/utils/precompile.rs +++ b/crates/decompile/src/utils/precompile.rs @@ -14,54 +14,40 @@ pub fn decode_precompile( precompile_address: U256, extcalldata_memory: &[StorageFrame], return_data_offset: &WrappedOpcode, -) -> (bool, String) { +) -> Option { // safely convert the precompile address to a usize. let address: usize = match precompile_address.try_into() { Ok(x) => x, Err(_) => usize::MAX, }; - let mut is_ext_call_precompile = false; - let mut ext_call_logic = String::new(); - match address { - 1 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "address memory[{}] = ecrecover({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - 2 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "bytes memory[{}] = sha256({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - 3 => { - is_ext_call_precompile = true; - ext_call_logic = format!( - "bytes memory[{}] = ripemd160({});", - return_data_offset.solidify(), - extcalldata_memory - .iter() - .map(|x| x.operation.solidify()) - .collect::>() - .join(", ") - ); - } - _ => {} + 1 => Some(format!( + "address memory[{}] = ecrecover({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + 2 => Some(format!( + "bytes memory[{}] = sha256({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + 3 => Some(format!( + "bytes memory[{}] = ripemd160({});", + return_data_offset.solidify(), + extcalldata_memory + .iter() + .map(|x| x.operation.solidify()) + .collect::>() + .join(", ") + )), + _ => None, } - - (is_ext_call_precompile, ext_call_logic) } diff --git a/crates/vm/src/core/memory.rs b/crates/vm/src/core/memory.rs index 4b9eddcd..d439c2dc 100644 --- a/crates/vm/src/core/memory.rs +++ b/crates/vm/src/core/memory.rs @@ -191,7 +191,7 @@ impl Memory { /// Given an offset into memory, returns the opcode that last modified it (if it has been /// modified at all) /// - /// Due to the nature of `WrappedOpcode`, this allows the entire CFG branch to be traversed. + /// Due to the nature of `WrappedOpcode`, this allows the entire Cfg branch to be traversed. pub fn origin(&self, byte: usize) -> Option { self.bytes.get_by_offset(byte) } diff --git a/crates/vm/src/core/types.rs b/crates/vm/src/core/types.rs index 531b909f..2d67566c 100644 --- a/crates/vm/src/core/types.rs +++ b/crates/vm/src/core/types.rs @@ -189,8 +189,8 @@ pub fn get_padding(bytes: &[u8]) -> Padding { // we can avoid doing a full check if any of the following are true: // there are no null bytes OR // neither first nor last byte is a null byte, it is not padded - if null_byte_indices.is_empty() - || null_byte_indices[0] != 0 && null_byte_indices[null_byte_indices.len() - 1] != size - 1 + if null_byte_indices.is_empty() || + null_byte_indices[0] != 0 && null_byte_indices[null_byte_indices.len() - 1] != size - 1 { return Padding::None; } diff --git a/crates/vm/src/core/vm.rs b/crates/vm/src/core/vm.rs index 31367d47..20e26e4e 100644 --- a/crates/vm/src/core/vm.rs +++ b/crates/vm/src/core/vm.rs @@ -303,8 +303,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -321,8 +321,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -339,8 +339,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -360,8 +360,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&numerator.operation.opcode) - && (0x5f..=0x7f).contains(&denominator.operation.opcode) + if (0x5f..=0x7f).contains(&numerator.operation.opcode) && + (0x5f..=0x7f).contains(&denominator.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -381,8 +381,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&numerator.operation.opcode) - && (0x5f..=0x7f).contains(&denominator.operation.opcode) + if (0x5f..=0x7f).contains(&numerator.operation.opcode) && + (0x5f..=0x7f).contains(&denominator.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result.into_raw())]) @@ -403,8 +403,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&modulus.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&modulus.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -424,8 +424,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&modulus.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&modulus.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result.into_raw())]) @@ -447,8 +447,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -469,8 +469,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -487,8 +487,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&exponent.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&exponent.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -591,8 +591,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -609,8 +609,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -627,8 +627,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -677,8 +677,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -697,8 +697,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result)]) } @@ -721,8 +721,8 @@ impl VM { // if both inputs are PUSH instructions, simplify the operation let mut simplified_operation = operation; - if (0x5f..=0x7f).contains(&a.operation.opcode) - && (0x5f..=0x7f).contains(&b.operation.opcode) + if (0x5f..=0x7f).contains(&a.operation.opcode) && + (0x5f..=0x7f).contains(&b.operation.opcode) { simplified_operation = WrappedOpcode::new(0x7f, vec![WrappedInput::Raw(result.into_raw())]) @@ -1121,13 +1121,12 @@ impl VM { let pc: u128 = pc.try_into()?; // Check if JUMPDEST is valid and throw with 790 if not (invalid jump destination) - if (pc - <= self - .bytecode + if (pc <= + self.bytecode .len() .try_into() - .expect("impossible case: bytecode is larger than u128::MAX")) - && (self.bytecode[pc as usize] != 0x5b) + .expect("impossible case: bytecode is larger than u128::MAX")) && + (self.bytecode[pc as usize] != 0x5b) { self.exit(790, Vec::new()); return Ok(Instruction { @@ -1154,13 +1153,12 @@ impl VM { if !condition.eq(&U256::from(0u8)) { // Check if JUMPDEST is valid and throw with 790 if not (invalid jump // destination) - if (pc - <= self - .bytecode + if (pc <= + self.bytecode .len() .try_into() - .expect("impossible case: bytecode is larger than u128::MAX")) - && (self.bytecode[pc as usize] != 0x5b) + .expect("impossible case: bytecode is larger than u128::MAX")) && + (self.bytecode[pc as usize] != 0x5b) { self.exit(790, Vec::new()); return Ok(Instruction { @@ -1301,9 +1299,9 @@ impl VM { let data = self.memory.read(offset, size); // consume dynamic gas - let gas_cost = (375 * (topic_count as u128)) - + 8 * (size as u128) - + self.memory.expansion_cost(offset, size); + let gas_cost = (375 * (topic_count as u128)) + + 8 * (size as u128) + + self.memory.expansion_cost(offset, size); self.consume_gas(gas_cost); // no need for a panic check because the length of events should never be larger @@ -1496,9 +1494,9 @@ impl VM { let mut vm_clone = self.clone(); for _ in 0..n { - if vm_clone.bytecode.len() < vm_clone.instruction as usize - || vm_clone.exitcode != 255 - || !vm_clone.returndata.is_empty() + if vm_clone.bytecode.len() < vm_clone.instruction as usize || + vm_clone.exitcode != 255 || + !vm_clone.returndata.is_empty() { break; } diff --git a/crates/vm/src/ext/exec/util.rs b/crates/vm/src/ext/exec/util.rs index 9be91e11..38e86a41 100644 --- a/crates/vm/src/ext/exec/util.rs +++ b/crates/vm/src/ext/exec/util.rs @@ -69,13 +69,11 @@ pub fn jump_stack_depth_less_than_max_stack_depth( /// If the stack contains more than 16 of the same item (with the same sources), it is considered a /// loop. pub fn stack_contains_too_many_of_the_same_item(stack: &Stack) -> bool { - if stack.size() > 16 - && stack.stack.iter().any(|frame| { - let solidified_frame_source = frame.operation.solidify(); - stack.stack.iter().filter(|f| f.operation.solidify() == solidified_frame_source).count() - >= 16 - }) - { + if stack.size() > 16 && stack.stack.iter().any(|frame| { + let solidified_frame_source = frame.operation.solidify(); + stack.stack.iter().filter(|f| f.operation.solidify() == solidified_frame_source).count() >= + 16 + }) { trace!("jump matches loop-detection heuristic: 'stack_contains_too_many_of_the_same_item'",); return true; } diff --git a/crates/vm/src/ext/lexers/solidity.rs b/crates/vm/src/ext/lexers/solidity.rs index c009270c..8bdd7d41 100644 --- a/crates/vm/src/ext/lexers/solidity.rs +++ b/crates/vm/src/ext/lexers/solidity.rs @@ -219,8 +219,8 @@ impl WrappedOpcode { .push_str(format!("arg{}", (slot - 4) / 32).as_str()); } Err(_) => { - if solidified_slot.contains("0x04 + ") - || solidified_slot.contains("+ 0x04") + if solidified_slot.contains("0x04 + ") || + solidified_slot.contains("+ 0x04") { solidified_wrapped_opcode.push_str( solidified_slot diff --git a/crates/vm/src/ext/range_map.rs b/crates/vm/src/ext/range_map.rs index 872a8fea..834dc737 100644 --- a/crates/vm/src/ext/range_map.rs +++ b/crates/vm/src/ext/range_map.rs @@ -107,12 +107,12 @@ impl RangeMap { } fn range_collides(incoming: &Range, incumbent: &Range) -> bool { - !(incoming.start <= incumbent.start - && incoming.end < incumbent.end - && incoming.end < incumbent.start - || incoming.start > incumbent.start - && incoming.end >= incumbent.end - && incoming.start > incumbent.end) + !(incoming.start <= incumbent.start && + incoming.end < incumbent.end && + incoming.end < incumbent.start || + incoming.start > incumbent.start && + incoming.end >= incumbent.end && + incoming.start > incumbent.end) } } diff --git a/crates/vm/src/ext/selectors.rs b/crates/vm/src/ext/selectors.rs index b1b9f3d4..1b8cd239 100644 --- a/crates/vm/src/ext/selectors.rs +++ b/crates/vm/src/ext/selectors.rs @@ -113,10 +113,10 @@ pub fn resolve_entry_point(vm: &mut VM, selector: &str) -> u128 { let jump_condition = call.last_instruction.input_operations[1].solidify(); let jump_taken = call.last_instruction.inputs[1].try_into().unwrap_or(1); - if jump_condition.contains(selector) - && jump_condition.contains("msg.data[0]") - && jump_condition.contains(" == ") - && jump_taken == 1 + if jump_condition.contains(selector) && + jump_condition.contains("msg.data[0]") && + jump_condition.contains(" == ") && + jump_taken == 1 { return call.last_instruction.inputs[0].try_into().unwrap_or(0); } else if jump_taken == 1 { @@ -142,8 +142,7 @@ pub fn resolve_entry_point(vm: &mut VM, selector: &str) -> u128 { /// Resolve a list of selectors to their function signatures. pub async fn resolve_selectors(selectors: Vec) -> HashMap> where - T: ResolveSelector + Send + Clone + 'static, -{ + T: ResolveSelector + Send + Clone + 'static, { // short-circuit if there are no selectors if selectors.is_empty() { return HashMap::new(); diff --git a/examples/cfg/src/main.rs b/examples/cfg/src/main.rs index 66e7ac0f..48750964 100644 --- a/examples/cfg/src/main.rs +++ b/examples/cfg/src/main.rs @@ -1,15 +1,15 @@ -use heimdall_cfg::{cfg, CFGArgsBuilder}; +use heimdall_cfg::{cfg, CfgArgsBuilder}; #[tokio::main] async fn main() -> Result<(), Box> { - let args = CFGArgsBuilder::new() + let args = CfgArgsBuilder::new() .target("0x9f00c43700bc0000Ff91bE00841F8e04c0495000".to_string()) .rpc_url("https://eth.llamarpc.com".to_string()) .build()?; let result = cfg(args).await?; - println!("Contract CFG: {:#?}", result); + println!("Contract Cfg: {:#?}", result); Ok(()) }