Skip to content

Commit

Permalink
feat: implement byte tracking in EVM memory (#242)
Browse files Browse the repository at this point in the history
* Add byte-to-opcode mapping

* Use bit-tracked memory stores during VM execution

* Add getter for bit tracking

* Add ByteTracker type

* Add documentation comments

* Add unit tests for ByteTracker type

* Refactor range collision handling

* Move core ByteTracker implementation to its own RangeMap module

* Nightly format run

* Address review comments

* Update common/src/resources/range_map.rs

Co-authored-by: Jonathan Becker <jonathan@jbecker.dev>

* Move range map implementation to appropriate module and lint

* Patch range map unit tests

---------

Co-authored-by: Jonathan Becker <jonathan@jbecker.dev>
  • Loading branch information
jmcph4 and Jon-Becker authored Dec 19, 2023
1 parent 71aaaeb commit 950c763
Show file tree
Hide file tree
Showing 4 changed files with 274 additions and 9 deletions.
34 changes: 31 additions & 3 deletions common/src/ether/evm/core/memory.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
use crate::utils::range_map::RangeMap;

use super::opcodes::WrappedOpcode;

pub type ByteTracker = RangeMap;

/// The [`Memory`] struct represents the memory of an EVM.
#[derive(Clone, Debug)]
pub struct Memory {
/// Vector storing memory data
pub memory: Vec<u8>,
// TODO: add bit-tracking for memory
/// Byte-tracking facility, allowing bytes to be associated with the opcodes that last modified
/// them
pub bytes: ByteTracker,
}

impl Default for Memory {
Expand All @@ -12,9 +21,9 @@ impl Default for Memory {
}

impl Memory {
/// Creates a new [`Memory`] with an empty memory vector.
/// Creates a new [`Memory`] with an empty memory vector and empty byte tracker
pub fn new() -> Memory {
Memory { memory: Vec::new() }
Memory { memory: Vec::new(), bytes: ByteTracker::new() }
}

/// Gets the current size of the memory in bytes.
Expand Down Expand Up @@ -89,6 +98,17 @@ impl Memory {
self.memory.splice(offset..offset + size, value);
}

pub fn store_with_opcode(
&mut self,
offset: usize,
size: usize,
value: &[u8],
opcode: WrappedOpcode,
) {
self.store(offset, size, value);
self.bytes.write(offset, size, opcode);
}

/// Read the given number of bytes from the memory at the given offset.
/// If the offset + size is greater than the current size of the memory, null bytes will be
/// appended to the value.
Expand Down Expand Up @@ -155,6 +175,14 @@ impl Memory {
new_memory_cost - self.memory_cost()
}
}

/// Given an offset into memory, returns the opcode that last modified it (if it has been
/// modified at all)
///
/// Due to the nature of `WrappedOpcode`, this allows the entire CFG branch to be traversed.
pub fn origin(&self, byte: usize) -> Option<WrappedOpcode> {
self.bytes.get_by_offset(byte)
}
}

#[cfg(test)]
Expand Down
12 changes: 6 additions & 6 deletions common/src/ether/evm/core/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,7 @@ impl VM {
let gas_cost = 3 * minimum_word_size + self.memory.expansion_cost(offset, size);
self.consume_gas(gas_cost);

self.memory.store(dest_offset, size, &value);
self.memory.store_with_opcode(dest_offset, size, &value, operation);
}

// CODESIZE
Expand Down Expand Up @@ -997,7 +997,7 @@ impl VM {
let gas_cost = 3 * minimum_word_size + self.memory.expansion_cost(offset, size);
self.consume_gas(gas_cost);

self.memory.store(dest_offset, size, &value);
self.memory.store_with_opcode(dest_offset, size, &value, operation);
}

// GASPRICE
Expand Down Expand Up @@ -1074,7 +1074,7 @@ impl VM {
self.consume_gas(100);
}

self.memory.store(dest_offset, size, &value);
self.memory.store_with_opcode(dest_offset, size, &value, operation);
}

// RETURNDATASIZE
Expand Down Expand Up @@ -1129,7 +1129,7 @@ impl VM {
3 * minimum_word_size + self.memory.expansion_cost(dest_offset, size);
self.consume_gas(gas_cost);

self.memory.store(dest_offset, size, &value);
self.memory.store_with_opcode(dest_offset, size, &value, operation);
}

// EXTCODEHASH and BLOCKHASH
Expand Down Expand Up @@ -1228,7 +1228,7 @@ impl VM {
let gas_cost = self.memory.expansion_cost(offset, 32);
self.consume_gas(gas_cost);

self.memory.store(offset, 32, value.encode().as_slice());
self.memory.store_with_opcode(offset, 32, value.encode().as_slice(), operation);
}

// MSTORE8
Expand Down Expand Up @@ -1257,7 +1257,7 @@ impl VM {
let gas_cost = self.memory.expansion_cost(offset, 1);
self.consume_gas(gas_cost);

self.memory.store(offset, 1, &[value.encode()[31]]);
self.memory.store_with_opcode(offset, 1, &[value.encode()[31]], operation);
}

// SLOAD
Expand Down
1 change: 1 addition & 0 deletions common/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub mod http;
pub mod integers;
pub mod io;
pub mod iter;
pub mod range_map;
pub mod strings;
pub mod sync;
pub mod testing;
Expand Down
236 changes: 236 additions & 0 deletions common/src/utils/range_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
use std::{collections::HashMap, ops::Range};

use crate::ether::evm::core::opcodes::WrappedOpcode;

#[derive(Copy, Clone, Debug)]
enum CollisionKind {
Deletion,
Splitting,
Shortening,
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct RangeMap(pub HashMap<Range<usize>, WrappedOpcode>);

impl RangeMap {
pub fn new() -> Self {
Self(HashMap::new())
}

/// Given an offset into memory, returns the associated opcode if it exists
pub fn get_by_offset(&self, offset: usize) -> Option<WrappedOpcode> {
self.0.get(self.find_range(offset).expect("RangeMap::have_range is broken")).cloned()
}

/// Associates the provided opcode with the range of memory modified by writing a `size`-byte
/// value to `offset`.
///
/// This range is exactly `[offset, offset + size - 1]`. This function ensures that any existing
/// ranges that our new range would collide with are dealt with accordingly, that is:
///
/// - deleted, if our range completely overwrites it,
/// - split, if our range overwrites a subset that partitions it,
/// - shortened, if our range overwrites such that only one "end" of it is overwritten
pub fn write(&mut self, offset: usize, size: usize, opcode: WrappedOpcode) {
let range: Range<usize> = Range { start: offset, end: offset + size - 1 };
let incumbents: Vec<Range<usize>> = self.affected_ranges(range.clone());

if incumbents.is_empty() {
self.0.insert(range, opcode);
} else {
incumbents.iter().for_each(|incumbent| {
match Self::classify_collision(&range, incumbent) {
CollisionKind::Deletion => {
self.0.remove(incumbent);
}
CollisionKind::Splitting => {
let left: Range<usize> =
Range { start: incumbent.start, end: range.start - 1 };
let right: Range<usize> =
Range { start: range.end + 1, end: incumbent.end };
let old_opcode: WrappedOpcode = self.0.get(incumbent).expect("").clone();

self.0.remove(incumbent);
self.0.insert(left, old_opcode.clone());
self.0.insert(right, old_opcode.clone());
}
CollisionKind::Shortening => {
let needs_right_shortening =
|incoming: &Range<usize>, incumbent: &Range<usize>| {
incoming.start >= incumbent.start
};

if needs_right_shortening(&range, incumbent) {
let remainder: Range<usize> =
Range { start: incumbent.start, end: range.start - 1 };
let old_opcode: WrappedOpcode = self.0.get(incumbent).cloned().unwrap();
self.0.remove(incumbent);
self.0.insert(remainder, old_opcode);
} else {
let remainder: Range<usize> =
Range { start: range.end + 1, end: incumbent.end };
let old_opcode: WrappedOpcode = self.0.get(incumbent).cloned().unwrap();
self.0.remove(incumbent);
self.0.insert(remainder, old_opcode);
}
}
}

self.0.insert(range.clone(), opcode.clone());
});
}
}

fn classify_collision(incoming: &Range<usize>, incumbent: &Range<usize>) -> CollisionKind {
let range_needs_deletion = |incoming: &Range<usize>, incumbent: &Range<usize>| {
incoming.start <= incumbent.start && incoming.end >= incumbent.end
};
let range_needs_splitting = |incoming: &Range<usize>, incumbent: &Range<usize>| {
incoming.start > incumbent.start && incoming.end < incumbent.end
};

if range_needs_deletion(incoming, incumbent) {
CollisionKind::Deletion
} else if range_needs_splitting(incoming, incumbent) {
CollisionKind::Splitting
} else {
CollisionKind::Shortening
}
}

fn find_range(&self, offset: usize) -> Option<&Range<usize>> {
self.0.keys().find(|range| range.contains(&offset))
}

fn affected_ranges(&self, range: Range<usize>) -> Vec<Range<usize>> {
self.0
.keys()
.filter(|incumbent| Self::range_collides(&range, *incumbent))
.cloned()
.collect()
}

fn range_collides(incoming: &Range<usize>, incumbent: &Range<usize>) -> bool {
(incoming.start <= incumbent.start && incoming.end >= incumbent.end) ||
(incoming.start <= incumbent.start && incoming.end >= incumbent.start) ||
(incoming.start <= incumbent.end && incoming.end >= incumbent.end) ||
(incoming.start > incumbent.start && incoming.end < incumbent.end)
}
}

#[cfg(test)]
mod tests {
use std::{collections::HashMap, ops::Range};

use crate::{ether::evm::core::opcodes::WrappedOpcode, utils::range_map::RangeMap};

#[test]
fn test_one_incumbent_and_needs_deletion() {
/* the values of the mapping are irrelevant for the purposes of this test, so we
* construct an arbitrary one and reuse it everywhere for simplicity */
let some_op: WrappedOpcode = WrappedOpcode::default();
let initial_pairs: Vec<((usize, usize), WrappedOpcode)> =
vec![((8, 16), some_op.clone()), ((32, 64), some_op.clone())];

let mut actual_byte_tracker: RangeMap = RangeMap(HashMap::from_iter(
initial_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)),
));

let offset: usize = 7;
let size: usize = 11;
actual_byte_tracker.write(offset, size, some_op.clone());

let expected_pairs: Vec<((usize, usize), WrappedOpcode)> =
vec![((7, 17), some_op.clone()), ((32, 64), some_op.clone())];
let expected_byte_tracker: RangeMap = RangeMap(HashMap::from_iter(
expected_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)),
));

assert_eq!(actual_byte_tracker, expected_byte_tracker);
}

#[test]
fn test_one_incumbent_and_needs_splitting() {
/* the values of the mapping are irrelevant for the purposes of this test, so we
* construct an arbitrary one and reuse it everywhere for simplicity */
let some_op: WrappedOpcode = WrappedOpcode::default();
let initial_pairs: Vec<((usize, usize), WrappedOpcode)> =
vec![((7, 18), some_op.clone()), ((32, 64), some_op.clone())];

let mut actual_byte_tracker: RangeMap = RangeMap(HashMap::from_iter(
initial_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)),
));

let offset: usize = 8;
let size: usize = 8;
actual_byte_tracker.write(offset, size, some_op.clone());

let expected_pairs: Vec<((usize, usize), WrappedOpcode)> = vec![
((7, 7), some_op.clone()),
((8, 15), some_op.clone()),
((16, 18), some_op.clone()),
((32, 64), some_op.clone()),
];
let expected_byte_tracker: RangeMap = RangeMap(HashMap::from_iter(
expected_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)),
));

assert_eq!(actual_byte_tracker, expected_byte_tracker);
}

#[test]
fn test_one_incumbent_and_needs_right_shortening() {
/* the values of the mapping are irrelevant for the purposes of this test, so we
* construct an arbitrary one and reuse it everywhere for simplicity */
let some_op: WrappedOpcode = WrappedOpcode::default();
let initial_pairs: Vec<((usize, usize), WrappedOpcode)> =
vec![((7, 18), some_op.clone()), ((32, 64), some_op.clone())];

let mut actual_byte_tracker: RangeMap = RangeMap(HashMap::from_iter(
initial_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)),
));

let offset: usize = 10;
let size: usize = 14;
actual_byte_tracker.write(offset, size, some_op.clone());

let expected_pairs: Vec<((usize, usize), WrappedOpcode)> = vec![
((7, 9), some_op.clone()),
((10, 23), some_op.clone()),
((32, 64), some_op.clone()),
];
let expected_byte_tracker: RangeMap = RangeMap(HashMap::from_iter(
expected_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)),
));

assert_eq!(actual_byte_tracker, expected_byte_tracker);
}

#[test]
fn test_one_incumbent_and_needs_left_shortening() {
/* the values of the mapping are irrelevant for the purposes of this test, so we
* construct an arbitrary one and reuse it everywhere for simplicity */
let some_op: WrappedOpcode = WrappedOpcode::default();
let initial_pairs: Vec<((usize, usize), WrappedOpcode)> =
vec![((7, 18), some_op.clone()), ((32, 64), some_op.clone())];

let mut actual_byte_tracker: RangeMap = RangeMap(HashMap::from_iter(
initial_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)),
));

let offset: usize = 2;
let size: usize = 8;
actual_byte_tracker.write(offset, size, some_op.clone());

let expected_pairs: Vec<((usize, usize), WrappedOpcode)> = vec![
((2, 9), some_op.clone()),
((10, 18), some_op.clone()),
((32, 64), some_op.clone()),
];
let expected_byte_tracker: RangeMap = RangeMap(HashMap::from_iter(
expected_pairs.iter().cloned().map(|((a, b), v)| (Range { start: a, end: b }, v)),
));

assert_eq!(actual_byte_tracker, expected_byte_tracker);
}
}

0 comments on commit 950c763

Please sign in to comment.