From 61478ddfff36f5dc7c369798da7c13dca0600878 Mon Sep 17 00:00:00 2001
From: Jonathan Becker <jonathan@jbecker.dev>
Date: Tue, 26 Dec 2023 18:29:22 -0600
Subject: [PATCH] fix(symbolic-execution): add growth loop-detection heuristic
 (#251)

* chore(sym-exec): convert tuple to `JumpFrame`

* feat(sym-exec): add `jump_stack_depth_less_than_max_stack_depth` heuristic

* fix(sym-exec): handle `JUMP` loops as well

* fix(sym-exec): increase similarity threshhold for jump detection

* chore(sym-exec): remove unused parameter `logger`
---
 common/src/ether/evm/ext/exec/jump_frame.rs |  15 ++
 common/src/ether/evm/ext/exec/mod.rs        | 156 +++++++++++++++-----
 common/src/ether/evm/ext/exec/util.rs       |   7 +-
 common/src/utils/range_map.rs               |   6 +-
 core/tests/test_decompile.rs                |   4 +
 5 files changed, 147 insertions(+), 41 deletions(-)
 create mode 100644 common/src/ether/evm/ext/exec/jump_frame.rs
diff --git a/common/src/ether/evm/ext/exec/jump_frame.rs b/common/src/ether/evm/ext/exec/jump_frame.rs
new file mode 100644
index 00000000..f41e1781
--- /dev/null
+++ b/common/src/ether/evm/ext/exec/jump_frame.rs
@@ -0,0 +1,15 @@
+use ethers::types::U256;
+
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub struct JumpFrame {
+    pub pc: u128,
+    pub jumpdest: U256,
+    pub stack_depth: usize,
+    pub jump_taken: bool,
+}
+
+impl JumpFrame {
+    pub fn new(pc: u128, jumpdest: U256, stack_depth: usize, jump_taken: bool) -> Self {
+        Self { pc, jumpdest, stack_depth, jump_taken }
+    }
+}
diff --git a/common/src/ether/evm/ext/exec/mod.rs b/common/src/ether/evm/ext/exec/mod.rs
index c3607650..4ff6387e 100644
--- a/common/src/ether/evm/ext/exec/mod.rs
+++ b/common/src/ether/evm/ext/exec/mod.rs
@@ -1,10 +1,14 @@
+mod jump_frame;
 mod util;
 
-use self::util::{
-    jump_condition_appears_recursive, jump_condition_contains_mutated_memory_access,
-    jump_condition_contains_mutated_storage_access,
-    jump_condition_historical_diffs_approximately_equal, stack_contains_too_many_of_the_same_item,
-    stack_diff, stack_item_source_depth_too_deep,
+use self::{
+    jump_frame::JumpFrame,
+    util::{
+        jump_condition_appears_recursive, jump_condition_contains_mutated_memory_access,
+        jump_condition_contains_mutated_storage_access,
+        jump_condition_historical_diffs_approximately_equal,
+        stack_contains_too_many_of_the_same_item, stack_diff, stack_item_source_depth_too_deep,
+    },
 };
 use crate::{
     debug_max,
@@ -12,9 +16,8 @@ use crate::{
         stack::Stack,
         vm::{State, VM},
     },
-    utils::{io::logging::Logger, strings::decode_hex},
+    utils::strings::decode_hex,
 };
-use ethers::types::U256;
 use std::collections::HashMap;
 
 #[derive(Clone, Debug)]
@@ -41,33 +44,28 @@ impl VM {
             }
         }
 
-        // get a new logger
-        let logger = Logger::default();
         debug_max!("beginning symbolic execution for selector 0x{}", selector);
 
         // the VM is at the function entry point, begin tracing
         let mut branch_count = 0;
-        (self.recursive_map(&mut branch_count, &mut HashMap::new(), &logger), branch_count)
+        (self.recursive_map(&mut branch_count, &mut HashMap::new()), branch_count)
     }
 
     // build a map of function jump possibilities from the EVM bytecode
     pub fn symbolic_exec(&self) -> (VMTrace, u32) {
         let mut vm = self.clone();
 
-        // get a new logger
-        let logger = Logger::default();
         debug_max!("beginning contract-wide symbolic execution");
 
         // the VM is at the function entry point, begin tracing
         let mut branch_count = 0;
-        (vm.recursive_map(&mut branch_count, &mut HashMap::new(), &logger), branch_count)
+        (vm.recursive_map(&mut branch_count, &mut HashMap::new()), branch_count)
     }
 
     fn recursive_map(
         &mut self,
         branch_count: &mut u32,
-        handled_jumps: &mut HashMap<(u128, U256, usize, bool), Vec<Stack>>,
-        logger: &Logger,
+        handled_jumps: &mut HashMap<JumpFrame, Vec<Stack>>,
     ) -> VMTrace {
         let mut vm = self.clone();
 
@@ -96,16 +94,12 @@ impl VM {
                     state.last_instruction.instruction
                 );
 
-                // jump frame contains:
-                //  1. the instruction (PC) of the JUMPI
-                //  2. the jump destination
-                //  3. the stack size at the time of the JUMPI
-                //  4. whether the jump condition is zero
-                let jump_frame: (u128, U256, usize, bool) = (
+                // build hashable jump frame
+                let jump_frame = JumpFrame::new(
                     state.last_instruction.instruction,
                     state.last_instruction.inputs[0],
                     vm.stack.size(),
-                    state.last_instruction.inputs[1].is_zero(),
+                    !state.last_instruction.inputs[1].is_zero(),
                 );
 
                 // if the stack has over 16 items of the same source, it's probably a loop
@@ -120,6 +114,25 @@ impl VM {
                 }
 
                 // break out of loops
+                // (1) get all keys that match jump_frame.pc and jump_frame.jumpdest
+                let matching_keys = handled_jumps
+                    .keys()
+                    .filter(|key| key.pc == jump_frame.pc && key.jumpdest == jump_frame.jumpdest)
+                    .collect::<Vec<&JumpFrame>>();
+
+                // (a) get the max stack_depth of all matching keys
+                let max_stack_depth =
+                    matching_keys.iter().map(|key| key.stack_depth).max().unwrap_or(0);
+
+                // (b) if the current stack depth is less than the max stack depth, we don't need to
+                // continue.
+                if jump_frame.stack_depth < max_stack_depth {
+                    debug_max!("jump matches loop-detection heuristic: 'jump_stack_depth_less_than_max_stack_depth'");
+                    debug_max!("jump terminated.");
+                    return vm_trace
+                }
+
+                // (2) perform heuristic checks on historical stacks
                 match handled_jumps.get_mut(&jump_frame) {
                     Some(historical_stacks) => {
                         // for every stack that we have encountered for this jump, perform some
@@ -236,31 +249,106 @@ impl VM {
                     // push a new vm trace to the children
                     let mut trace_vm = vm.clone();
                     trace_vm.instruction = state.last_instruction.inputs[0].as_u128() + 1;
-                    vm_trace.children.push(trace_vm.recursive_map(
-                        branch_count,
-                        handled_jumps,
-                        logger,
-                    ));
+                    vm_trace.children.push(trace_vm.recursive_map(branch_count, handled_jumps));
 
                     // push the current path onto the stack
-                    vm_trace.children.push(vm.recursive_map(branch_count, handled_jumps, logger));
+                    vm_trace.children.push(vm.recursive_map(branch_count, handled_jumps));
                     break;
                 } else {
                     // push a new vm trace to the children
                     let mut trace_vm = vm.clone();
                     trace_vm.instruction = state.last_instruction.instruction + 1;
-                    vm_trace.children.push(trace_vm.recursive_map(
-                        branch_count,
-                        handled_jumps,
-                        logger,
-                    ));
+                    vm_trace.children.push(trace_vm.recursive_map(branch_count, handled_jumps));
 
                     // push the current path onto the stack
-                    vm_trace.children.push(vm.recursive_map(branch_count, handled_jumps, logger));
+                    vm_trace.children.push(vm.recursive_map(branch_count, handled_jumps));
                     break;
                 }
             }
 
+            // if we encounter a JUMP
+            if state.last_instruction.opcode == 0x56 {
+                // build hashable jump frame
+                let jump_frame = JumpFrame::new(
+                    state.last_instruction.instruction,
+                    state.last_instruction.inputs[0],
+                    vm.stack.size(),
+                    true,
+                );
+
+                // perform heuristic checks on historical stacks
+                match handled_jumps.get_mut(&jump_frame) {
+                    Some(historical_stacks) => {
+                        // for every stack that we have encountered for this jump, perform some
+                        // heuristic checks to determine if this might be a loop
+                        if historical_stacks.iter().any(|hist_stack| {
+                            // check if any historical stack is the same as the current stack
+                            if hist_stack == &vm.stack {
+                                debug_max!(
+                                    "jump matches loop-detection heuristic: 'jump_path_already_handled'"
+                                );
+                                return true
+                            }
+
+                            // calculate the difference of the current stack and the historical stack
+                            let stack_diff = stack_diff(&vm.stack, hist_stack);
+                            if stack_diff.is_empty() {
+                                // the stack_diff is empty (the stacks are the same), so we've
+                                // already handled this path
+                                debug_max!(
+                                    "jump matches loop-detection heuristic: 'stack_diff_is_empty'"
+                                );
+                                return true
+                            }
+
+                            debug_max!("stack diff: [{}]", stack_diff.iter().map(|frame| format!("{}", frame.value)).collect::<Vec<String>>().join(", "));
+
+                            false
+                        }) {
+                            debug_max!("jump terminated.");
+                            debug_max!(
+                                "adding historical stack {} to jump frame {:?}",
+                                &format!("{:#016x?}", vm.stack.hash()),
+                                jump_frame
+                            );
+
+                            // this key exists, but the stack is different, so the jump is new
+                            historical_stacks.push(vm.stack.clone());
+                            return vm_trace
+                        }
+
+                        if jump_condition_historical_diffs_approximately_equal(
+                            &vm.stack,
+                            historical_stacks,
+                        ) {
+                            debug_max!("jump terminated.");
+                            debug_max!(
+                                "adding historical stack {} to jump frame {:?}",
+                                &format!("{:#016x?}", vm.stack.hash()),
+                                jump_frame
+                            );
+
+                            // this key exists, but the stack is different, so the jump is new
+                            historical_stacks.push(vm.stack.clone());
+                            return vm_trace;
+                        } else {
+                            debug_max!(
+                                "adding historical stack {} to jump frame {:?}",
+                                &format!("{:#016x?}", vm.stack.hash()),
+                                jump_frame
+                            );
+                            // this key exists, but the stack is different, so the jump is new
+                            historical_stacks.push(vm.stack.clone());
+                        }
+                    }
+                    None => {
+                        // this key doesnt exist, so the jump is new
+                        debug_max!("added new jump frame: {:?}", jump_frame);
+                        handled_jumps.insert(jump_frame, vec![vm.stack.clone()]);
+                    }
+                }
+            }
+
             // when the vm exits, this path is complete
             if vm.exitcode != 255 || !vm.returndata.is_empty() {
                 break;
diff --git a/common/src/ether/evm/ext/exec/util.rs b/common/src/ether/evm/ext/exec/util.rs
index 8db1ecbc..2e5c9047 100644
--- a/common/src/ether/evm/ext/exec/util.rs
+++ b/common/src/ether/evm/ext/exec/util.rs
@@ -144,8 +144,11 @@ pub fn jump_condition_historical_diffs_approximately_equal(
         );
     }
 
-    // check if all stack diffs are exactly length 1
-    if !stack_diffs.iter().all(|diff| diff.len() == 1) {
+    // get stack length / 10, rounded up as threshold
+    let threshold = (stack.size() as f64 / 10f64).ceil() as usize;
+
+    // check if all stack diffs are similar
+    if !stack_diffs.iter().all(|diff| diff.len() <= threshold) {
         return false;
     }
 
diff --git a/common/src/utils/range_map.rs b/common/src/utils/range_map.rs
index 2994dd5b..a30d4356 100644
--- a/common/src/utils/range_map.rs
+++ b/common/src/utils/range_map.rs
@@ -103,11 +103,7 @@ impl RangeMap {
     }
 
     fn affected_ranges(&self, range: Range<usize>) -> Vec<Range<usize>> {
-        self.0
-            .keys()
-            .filter(|incumbent| Self::range_collides(&range, *incumbent))
-            .cloned()
-            .collect()
+        self.0.keys().filter(|incumbent| Self::range_collides(&range, incumbent)).cloned().collect()
     }
 
     fn range_collides(incoming: &Range<usize>, incumbent: &Range<usize>) -> bool {
diff --git a/core/tests/test_decompile.rs b/core/tests/test_decompile.rs
index 960dff45..7a9a680b 100644
--- a/core/tests/test_decompile.rs
+++ b/core/tests/test_decompile.rs
@@ -296,6 +296,10 @@ mod integration_tests {
             "0xd1d2Eb1B1e90B638588728b4130137D262C87cae",
             "0x95e05e2Abbd26943874ac000D87C3D9e115B543c",
             "0x00000000A991C429eE2Ec6df19d40fe0c80088B8",
+            "0x737673b5e0a3c68adf4c1a87bca5623cfc537ec3",
+            "0x940259178FbF021e625510919BC2FF0B944E5613",
+            "0xff612db0583be8d5498731e4e32bc12e08fa6292",
+            "0xd5FEa30Ed719693Ec8848Dc7501b582F5de6a5BB",
         ];
 
         // define flag checks