From b4338c6fc6d0dc54c3367d24ecaa2d15275083e5 Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Sat, 4 May 2024 10:26:14 +0200 Subject: [PATCH 1/8] Refactoring Virtual Machine --- Cargo.toml | 2 +- crates/vuur_vm/Cargo.toml | 1 + crates/vuur_vm/src/func_def.rs | 45 ++++++ crates/vuur_vm/src/instruction_set.rs | 120 ++++++++++++++ crates/vuur_vm/src/lib.rs | 17 +- crates/vuur_vm/src/module.rs | 16 ++ crates/vuur_vm/src/symbol_table.rs | 225 ++++++++++++++++++++++++++ crates/vuur_vm/src/value.rs | 2 + crates/vuur_vm/src/vm_v2.rs | 211 ++++++++++++++++++++++++ crates/vuur_vm/tests/test_vm_v2.rs | 7 + 10 files changed, 643 insertions(+), 3 deletions(-) create mode 100644 crates/vuur_vm/src/func_def.rs create mode 100644 crates/vuur_vm/src/instruction_set.rs create mode 100644 crates/vuur_vm/src/module.rs create mode 100644 crates/vuur_vm/src/symbol_table.rs create mode 100644 crates/vuur_vm/src/value.rs create mode 100644 crates/vuur_vm/src/vm_v2.rs create mode 100644 crates/vuur_vm/tests/test_vm_v2.rs diff --git a/Cargo.toml b/Cargo.toml index 7cacd00..f6e36e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = [ "crates/vuur_lexer", "crates/vuur_parse", "crates/vuur_compile", - "crates/vuur_compiler", +# "crates/vuur_compiler", "crates/vuur_vm", "crates/vuur", ] diff --git a/crates/vuur_vm/Cargo.toml b/crates/vuur_vm/Cargo.toml index 0f91e4d..1dd41e7 100644 --- a/crates/vuur_vm/Cargo.toml +++ b/crates/vuur_vm/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +num = "0.4" vuur_compile = { path = "../vuur_compile" } vuur_parse = { path = "../vuur_parse" } diff --git a/crates/vuur_vm/src/func_def.rs b/crates/vuur_vm/src/func_def.rs new file mode 100644 index 0000000..f0360f1 --- /dev/null +++ b/crates/vuur_vm/src/func_def.rs @@ -0,0 +1,45 @@ +use crate::instruction_set::Op; +use std::rc::Rc; + +#[derive(Debug, Clone, Copy)] +#[repr(transparent)] +pub struct FuncId(pub(crate) u32); + +impl FuncId { + #[inline(always)] + pub(crate) fn new(id: u32) -> Self { + Self(id) + } + + #[inline(always)] + pub fn to_usize(self) -> usize { + self.0 as usize + } + + #[inline(always)] + pub fn to_u32(self) -> u32 { + self.0 + } +} + +#[derive(Debug)] +pub struct Closure { + pub func_id: FuncId, + pub func: Rc, + pub up_values: Vec<()>, +} + +#[derive(Debug)] +pub struct ScriptFunc { + pub id: FuncId, + pub constants: Vec, + pub code: Box<[Op]>, +} + +pub type NativeFuncPtr = fn() -> (); + +#[derive(Debug)] +pub struct NativeFunc { + pub id: FuncId, + pub ptr: NativeFuncPtr, +} diff --git a/crates/vuur_vm/src/instruction_set.rs b/crates/vuur_vm/src/instruction_set.rs new file mode 100644 index 0000000..c5282e6 --- /dev/null +++ b/crates/vuur_vm/src/instruction_set.rs @@ -0,0 +1,120 @@ +use std::fmt; +use std::fmt::Formatter; + +/// Instruction set. +#[derive(Debug, Clone, Copy)] +#[allow(non_camel_case_types)] +pub enum Op { + /// Does nothing. The program counter will be incremented. + NoOp, + + /// Remove the top values from the operand stack and discard it. + Pop, + + // ------------------------------------------------------------------------ + // Arithmetic + I32_Add, + I32_Sub, + I32_Mul, + I32_Div, + I32_Neg, + I32_Eq, + I32_Cmp, + + /// Push a constant int32 value onto the operand stack. + I32_Const { + constant_id: ConstantId, + }, + I32_Const_Inline { + arg: Arg24, + }, + + // ------------------------------------------------------------------------ + // Variables + Store_Local { + local_id: Arg24, + }, + Load_Local { + local_id: Arg24, + }, + + // ------------------------------------------------------------------------ + // Up-values + /// "Close" the up-value, copying its inner value into its heap slot. + Upvalue_Close, + + // ------------------------------------------------------------------------ + // Callables + /// Statically call a function identified by `func_id`. + Call_Func { + func_id: Arg24, + }, + Return, + /// Create a closure instance. + Closure_Create, + + // ------------------------------------------------------------------------ + // Control Flow + /// Unconditionally jump. + Jump, + /// Conditionally jump if the top of the operand stack is value 0, type int32. + Jump_False, + /// Ends the current block. + End, + /// Unconditionally error. + Abort, +} + +pub type ConstantId = u16; + +/// Bytecode argument packed into 24 bits, encoded in little-endian. +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Arg24([u8; 3]); + +impl Arg24 { + #[inline(always)] + pub fn from_i32(value: i32) -> Self { + // Shift left so sign will be preserved later when decoding. + let [_, a, b, c] = (value << 8).to_le_bytes(); + Self([a, b, c]) + } + + #[inline(always)] + pub fn to_i32(self) -> i32 { + let [a, b, c] = self.0; + // Shift right to extend to cover up the least-significant bit, + // and preserve the sign. + i32::from_le_bytes([0, a, b, c]) >> 8 + } + + #[inline(always)] + pub fn to_u32(self) -> u32 { + let [a, b, c] = self.0; + u32::from_le_bytes([a, b, c, 0]) + } +} + +impl fmt::Debug for Arg24 { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:06x}", self.to_u32()) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_instruction_size() { + assert!( + std::mem::size_of::() <= 4, + "bytecode instruction must be at most 32-bits (4 bytes)" + ) + } + + #[test] + fn test_arg24() { + assert_eq!(Arg24::from_i32(0b00000100_00000010_00000001), Arg24([1, 2, 4])); + assert_eq!(Arg24::from_i32(-1).to_i32(), -1, "negative values must be preserved"); + } +} diff --git a/crates/vuur_vm/src/lib.rs b/crates/vuur_vm/src/lib.rs index 4ae44b4..6aec8e9 100644 --- a/crates/vuur_vm/src/lib.rs +++ b/crates/vuur_vm/src/lib.rs @@ -6,7 +6,13 @@ use vuur_compile::bytecode::{decode_arg_a, decode_arg_k, decode_opcode, opcodes use vuur_compile::Chunk; pub mod error; +mod func_def; +mod instruction_set; +mod module; pub mod obj; +pub mod vm_v2; +mod value; +#[doc(hidden)] pub mod symbol_table; use self::error::{ErrorKind, Result, RuntimeError}; @@ -33,6 +39,9 @@ pub struct Fiber { pub(crate) error: Option, } +/// Operand stack slot, which encodes an untyped value. +struct Slot(usize); + #[derive(Debug)] struct FrameInfo { /// Offset in the stack where this call frame's @@ -135,6 +144,10 @@ impl Fiber { } } + pub fn run_v2(&mut self, chunk: &Chunk) { + todo!() + } + pub fn run(&mut self, chunk: &Chunk) { println!("running..."); 'eval: loop { @@ -161,7 +174,7 @@ impl Fiber { match op { ops::NOOP => { - println!(""); + println!("noop"); self.ip += 1 } ops::POP => { @@ -353,7 +366,7 @@ impl Fiber { self.calls.push(FrameInfo { base: stack_base, - // after this insrtuction + // after this instruction return_addr: self.ip + 1, }); } diff --git a/crates/vuur_vm/src/module.rs b/crates/vuur_vm/src/module.rs new file mode 100644 index 0000000..085fa40 --- /dev/null +++ b/crates/vuur_vm/src/module.rs @@ -0,0 +1,16 @@ +use crate::func_def::{NativeFunc, ScriptFunc}; +use std::rc::Rc; + +#[derive(Debug)] +pub struct Module { + pub name: String, + pub func_defs: Vec>, + /// Module level global variables. + pub vars: Vec<()>, +} + +#[derive(Debug)] +pub enum Func { + Script(ScriptFunc), + Native(NativeFunc), +} diff --git a/crates/vuur_vm/src/symbol_table.rs b/crates/vuur_vm/src/symbol_table.rs new file mode 100644 index 0000000..ece6df6 --- /dev/null +++ b/crates/vuur_vm/src/symbol_table.rs @@ -0,0 +1,225 @@ +//! Symbol table. +//! +//! ``` +//! # use vuur_vm::symbol_table::{SymbolTable, Symbol}; +//! # struct Func{} +//! +//! struct FuncId(u16); +//! +//! impl Symbol for FuncId { +//! const MAX: usize = u16::MAX as usize; +//! +//! fn from_usize(index: usize) -> Self { +//! Self(index as u16) +//! } +//! +//! fn to_usize(&self) -> usize { +//! self.0 as usize +//! } +//! +//! } +//! +//! let mut table = SymbolTable::::new(); +//! +//! let func_id = table.push(Func{}); +//! ``` +use std::marker::PhantomData; + + +/// Symbol table. +pub struct SymbolTable { + symbols: Vec, + _key: PhantomData, +} + +pub trait Symbol { + /// The maximum symbol index allowed. + /// + /// This is to limit the number of symbols to + /// what the key can store, given its own restraints. + /// + /// For example if the symbol is backed by an `u16`, then + /// the maximum 16-bit unsigned integer is the + /// largest the table can grow. + /// + /// See module documents [`crate::symbol_table`] + const MAX: usize; + + /// Create a symbol from an index. + fn from_usize(index: usize) -> Self; + + /// Determine a table index. + fn to_usize(&self) -> usize; +} + +/// Convenience macro for implementing a symbol key, +/// if the key storage is a simple integer type that +/// can be cast to and from `usize`. +#[macro_export] +macro_rules! symbol_impl { + ( + $(#[$outer:meta])* + $vis:vis struct $name:ident($ty:tt) + ) => { + $(#[$outer])* + #[repr(transparent)] + $vis struct $name($ty); + + impl $name { + #[inline] + $vis fn new(index: usize) -> Self { + assert!(index <= Self::MAX, "index overflows maximum value of {}", stringify!($ty)); + Self::from_usize(index) + } + + #[inline] + $vis fn inner(&self) -> &$ty { + &self.0 + } + } + + impl $crate::symbol_table::Symbol for $name { + const MAX: usize = $ty::MAX as usize; + + #[inline(always)] + fn from_usize(index: usize) -> Self { + Self(index as $ty) + } + + #[inline(always)] + fn to_usize(&self) -> usize { + self.0 as usize + } + } + }; +} + +impl SymbolTable { + /// Create a new empty symbol table. + pub fn new() -> Self { + Self { + symbols: vec![], + _key: PhantomData, + } + } +} + +impl SymbolTable { + /// Push the given value to the end of the table. + /// + /// Returns the symbol identifying the new location. + pub fn push(&mut self, value: V) -> K { + if self.symbols.len()+1 > K::MAX { + panic!("symbol table overflowed maximum key space: {}", K::MAX); + } + let symbol = K::from_usize(self.symbols.len()); + self.symbols.push(value); + symbol + } + + /// Insert the given value at the location identified + /// by the given symbol. + /// + /// If a value already exists at the given location, + /// it is returned. + /// + /// # Panic + /// + /// Panics if the symbol overflows the table space. + pub fn insert(&mut self, symbol: K, value: V) -> Option { + let index = symbol.to_usize(); + if index >= self.symbols.len() { + panic!("symbol is out of range of table"); + } + let existing = std::mem::replace(&mut self.symbols[index], value); + Some(existing) + } + + /// Retrieve the value identified by the given symbol. + /// + /// # Panic + /// + /// Panics if the symbol overflows the table space. + pub fn get(&self, symbol: K) -> &V { + let index = symbol.to_usize(); + if index >= self.symbols.len() { + panic!("symbol is out of range of table"); + } + &self.symbols[index] + } + + /// Mutably retrieve the value identified by the given symbol. + /// + /// # Panic + /// + /// Panics if the symbol overflows the table space. + pub fn get_mut(&mut self, symbol: K) -> &mut V { + let index = symbol.to_usize(); + if index >= self.symbols.len() { + panic!("symbol is out of range of table"); + } + &mut self.symbols[index] + } +} + +impl Default for SymbolTable { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_symbol_macro() { + { + symbol_impl!( + #[derive(Debug, PartialEq, Eq)] + struct FooId(u8) + ); + let mut table = SymbolTable::::new(); + let symbol1 = table.push(()); + let symbol2 = table.push(()); + let symbol3 = table.push(()); + assert_eq!(symbol1, FooId::new(0)); + assert_eq!(symbol1.to_usize(), 0); + assert_eq!(symbol2.to_usize(), 1); + assert_eq!(symbol3.to_usize(), 2); + } + + { + symbol_impl!(struct FooId(u16)); + let mut table = SymbolTable::::new(); + let symbol1 = table.push(()); + let symbol2 = table.push(()); + let symbol3 = table.push(()); + assert_eq!(symbol1.to_usize(), 0); + assert_eq!(symbol2.to_usize(), 1); + assert_eq!(symbol3.to_usize(), 2); + } + + { + symbol_impl!(struct FooId(u32)); + let mut table = SymbolTable::::new(); + let symbol1 = table.push(()); + let symbol2 = table.push(()); + let symbol3 = table.push(()); + assert_eq!(symbol1.to_usize(), 0); + assert_eq!(symbol2.to_usize(), 1); + assert_eq!(symbol3.to_usize(), 2); + } + + { + symbol_impl!(struct FooId(i32)); + let mut table = SymbolTable::::new(); + let symbol1 = table.push(()); + let symbol2 = table.push(()); + let symbol3 = table.push(()); + assert_eq!(symbol1.to_usize(), 0); + assert_eq!(symbol2.to_usize(), 1); + assert_eq!(symbol3.to_usize(), 2); + } + } +} \ No newline at end of file diff --git a/crates/vuur_vm/src/value.rs b/crates/vuur_vm/src/value.rs new file mode 100644 index 0000000..139597f --- /dev/null +++ b/crates/vuur_vm/src/value.rs @@ -0,0 +1,2 @@ + + diff --git a/crates/vuur_vm/src/vm_v2.rs b/crates/vuur_vm/src/vm_v2.rs new file mode 100644 index 0000000..5be431f --- /dev/null +++ b/crates/vuur_vm/src/vm_v2.rs @@ -0,0 +1,211 @@ +//! Virtual Machine V2. +//! +//! +//! Complete rewrite of the virtual machine. +use std::cell::RefCell; +use std::cmp::Ordering; +use std::collections::HashMap; +use std::rc::Rc; + +use crate::func_def::{Closure, ScriptFunc}; +use crate::instruction_set::Op; +use crate::module::Module; + +const ENTRY_POINT: &str = "Main"; + +#[derive(Debug)] +pub struct VM { + /// Current running fiber + pub(crate) fiber: Rc>, + store: Store, +} + +#[derive(Debug)] +struct Store { + modules: HashMap>, + /// Global table of function signatures. + funcs: Vec<()>, +} + +#[derive(Debug)] +struct CallFrame { + /// Instruction pointer + ip: usize, + /// Reference to the closure instance that will be executed. + closure: Rc>, +} + +#[derive(Debug)] +pub struct Fiber { + /// Operand stack + pub(crate) stack: Vec, + /// Top frame of the call stack. + /// + /// Kept outside the stack buffer to make access infallible. + pub(crate) frame: CallFrame, + /// Stack of call frames (activation records). + pub(crate) calls: Vec, +} + +impl Fiber { + pub fn new(closure: Rc>) -> Self { + Self { + stack: vec![], + frame: CallFrame { ip: 0, closure }, + calls: vec![], + } + } +} + +/// Slot is an untyped operand stack value. +#[derive(Debug, Clone, Copy)] +#[repr(transparent)] +pub(crate) struct Slot(u64); + +impl Slot { + #[inline(always)] + pub(crate) fn from_i32(val: i32) -> Self { + Self(val as u64) + } + + #[inline(always)] + pub(crate) fn to_i32(self) -> i32 { + self.0 as i32 + } + + #[inline(always)] + pub(crate) fn from_f32(val: f32) -> Self { + Self(val.to_bits() as u64) + } + + #[inline(always)] + pub(crate) fn to_f32(self) -> f32 { + f32::from_bits(self.0 as u32) + } +} + +impl VM { + pub fn new() -> Self { + todo!() + } + + /// Runs the entry point of the named module. + pub fn run_entrypoint(&mut self, module_name: &str) -> Result<(), String> { + if self.store.modules.get(module_name).is_none() { + self.store.modules.insert( + module_name.to_string(), + Rc::new(Module { + name: module_name.to_string(), + func_defs: vec![], + vars: vec![], + }), + ); + } + + let module_rc = self.store.modules[module_name].clone(); + self.run_module(module_rc, ENTRY_POINT, &[]) + } + + /// Execute a top-level function inside the given module. + pub fn run_module(&mut self, module: Rc, func_name: &str, args: &[u8]) -> Result<(), String> { + todo!() + } + + pub fn resume_fiber(&mut self, fiber: &mut Fiber) -> Result<(), String> { + todo!() + } +} + +impl Fiber { + #[inline(always)] + fn pop_slots_2(&mut self) -> [Slot; 2] { + let l = self.stack.len(); + let slot_b = self.stack[l - 2]; + let slot_a = self.stack[l - 1]; + self.stack.truncate(self.stack.len() - 2); + [slot_a, slot_b] + } +} + +enum FiberAction { + /// Pause execution of the current fiber and yield control + /// back to host. + Yield, +} + +enum RunAction { + /// Successfully return a value. + Return(Slot), + /// Call a script function. + Call, + /// Fiber control action. + Fiber(FiberAction), +} + +/// Run the current fiber in the VM. +fn run_fiber(vm: &mut VM, fiber: &mut Fiber) -> Result { + todo!() +} + +#[inline(always)] +fn run_interpreter(fiber: &mut Fiber, frame: &mut CallFrame) -> Result { + let closure = frame.closure.clone(); + let func = closure.borrow_mut().func.clone(); + + 'eval: loop { + let op = func + .code + .get(frame.ip) + .cloned() + .ok_or_else(|| "bytecode buffer out of bounds")?; + frame.ip += 1; + + match op { + Op::NoOp => { /* Do nothing. */ } + Op::Pop => { + // Discard + fiber.stack.pop(); + } + Op::I32_Add => { + let [a, b] = fiber.pop_slots_2(); + fiber.stack.push(Slot::from_i32(a.to_i32() + b.to_i32())); + } + Op::I32_Sub => { + let [a, b] = fiber.pop_slots_2(); + fiber.stack.push(Slot::from_i32(a.to_i32() - b.to_i32())); + } + Op::I32_Mul => { + let [a, b] = fiber.pop_slots_2(); + fiber.stack.push(Slot::from_i32(a.to_i32() * b.to_i32())); + } + Op::I32_Div => { + let [a, b] = fiber.pop_slots_2(); + fiber.stack.push(Slot::from_i32(a.to_i32() / b.to_i32())); + } + Op::I32_Neg => { + let a = fiber.stack.pop().ok_or_else(|| "operand stack is empty")?; + fiber.stack.push(Slot::from_i32(-a.to_i32())); + } + Op::I32_Eq => { + let [a, b] = fiber.pop_slots_2(); + fiber.stack.push(Slot::from_i32(if a.to_i32() == b.to_i32() { 1 } else { 0 })); + } + Op::I32_Cmp => { + let [a, b] = fiber.pop_slots_2(); + let ordering = match Ord::cmp(&a.to_i32(), &b.to_i32()) { + Ordering::Less => -1, + Ordering::Equal => 0, + Ordering::Greater => 1, + }; + fiber.stack.push(Slot::from_i32(ordering)); + } + Op::I32_Const_Inline { arg } => { + let a = arg.to_i32(); + fiber.stack.push(Slot::from_i32(a)); + } + _ => { + return Err("abort".to_string()); + } + } + } +} diff --git a/crates/vuur_vm/tests/test_vm_v2.rs b/crates/vuur_vm/tests/test_vm_v2.rs new file mode 100644 index 0000000..adc618b --- /dev/null +++ b/crates/vuur_vm/tests/test_vm_v2.rs @@ -0,0 +1,7 @@ +use vuur_vm::vm_v2::VM; + + +#[test] +fn test_vm_v2() { + // let vm = VM::new +} From 81f30116c4aae8cb0e0e04f6a827ce523ec47d90 Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Sat, 4 May 2024 12:44:33 +0200 Subject: [PATCH 2/8] Handle wrapper type --- crates/vuur_vm/src/func_def.rs | 4 +++ crates/vuur_vm/src/handle.rs | 56 ++++++++++++++++++++++++++++++ crates/vuur_vm/src/lib.rs | 6 ++-- crates/vuur_vm/src/symbol_table.rs | 5 ++- crates/vuur_vm/src/value.rs | 1 - crates/vuur_vm/src/vm_v2.rs | 7 ++-- crates/vuur_vm/tests/test_vm_v2.rs | 1 - 7 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 crates/vuur_vm/src/handle.rs diff --git a/crates/vuur_vm/src/func_def.rs b/crates/vuur_vm/src/func_def.rs index f0360f1..db6bfcf 100644 --- a/crates/vuur_vm/src/func_def.rs +++ b/crates/vuur_vm/src/func_def.rs @@ -29,6 +29,10 @@ pub struct Closure { pub up_values: Vec<()>, } +/// Function definition in the guest script. +/// +/// It contains interpreter instructions which can be executed +/// in the virtual machine. #[derive(Debug)] pub struct ScriptFunc { pub id: FuncId, diff --git a/crates/vuur_vm/src/handle.rs b/crates/vuur_vm/src/handle.rs new file mode 100644 index 0000000..01cea11 --- /dev/null +++ b/crates/vuur_vm/src/handle.rs @@ -0,0 +1,56 @@ +use std::cell::RefCell; +pub use std::cell::{Ref, RefMut}; +use std::fmt; +use std::fmt::Formatter; +use std::rc::{Rc, Weak}; + +/// Shared reference counted handle +pub struct Handle(Rc>); + +impl Handle { + #[inline(always)] + pub fn new(value: T) -> Self { + Self(Rc::new(RefCell::new(value))) + } + + #[inline(always)] + pub fn borrow(&self) -> Ref<'_, T> { + self.0.borrow() + } + + #[inline(always)] + pub fn borrow_mut(&self) -> RefMut<'_, T> { + self.0.borrow_mut() + } + + #[inline(always)] + pub fn try_borrow(&self) -> Option> { + self.0.try_borrow().ok() + } + + #[inline(always)] + pub fn try_borrow_mut(&self) -> Option> { + self.0.try_borrow_mut().ok() + } +} + +impl Clone for Handle { + #[inline(always)] + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl fmt::Debug for Handle +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let mut debug = f.debug_tuple("Handle"); + + match self.0.try_borrow() { + Ok(value) => debug.field(&*value).finish(), + Err(_) => debug.field(&"_").finish(), + } + } +} diff --git a/crates/vuur_vm/src/lib.rs b/crates/vuur_vm/src/lib.rs index 6aec8e9..76f8ea7 100644 --- a/crates/vuur_vm/src/lib.rs +++ b/crates/vuur_vm/src/lib.rs @@ -7,12 +7,14 @@ use vuur_compile::Chunk; pub mod error; mod func_def; +mod handle; mod instruction_set; mod module; pub mod obj; -pub mod vm_v2; +#[doc(hidden)] +pub mod symbol_table; mod value; -#[doc(hidden)] pub mod symbol_table; +pub mod vm_v2; use self::error::{ErrorKind, Result, RuntimeError}; diff --git a/crates/vuur_vm/src/symbol_table.rs b/crates/vuur_vm/src/symbol_table.rs index ece6df6..381fcd0 100644 --- a/crates/vuur_vm/src/symbol_table.rs +++ b/crates/vuur_vm/src/symbol_table.rs @@ -25,7 +25,6 @@ //! ``` use std::marker::PhantomData; - /// Symbol table. pub struct SymbolTable { symbols: Vec, @@ -109,7 +108,7 @@ impl SymbolTable { /// /// Returns the symbol identifying the new location. pub fn push(&mut self, value: V) -> K { - if self.symbols.len()+1 > K::MAX { + if self.symbols.len() + 1 > K::MAX { panic!("symbol table overflowed maximum key space: {}", K::MAX); } let symbol = K::from_usize(self.symbols.len()); @@ -222,4 +221,4 @@ mod test { assert_eq!(symbol3.to_usize(), 2); } } -} \ No newline at end of file +} diff --git a/crates/vuur_vm/src/value.rs b/crates/vuur_vm/src/value.rs index 139597f..8b13789 100644 --- a/crates/vuur_vm/src/value.rs +++ b/crates/vuur_vm/src/value.rs @@ -1,2 +1 @@ - diff --git a/crates/vuur_vm/src/vm_v2.rs b/crates/vuur_vm/src/vm_v2.rs index 5be431f..4b7c065 100644 --- a/crates/vuur_vm/src/vm_v2.rs +++ b/crates/vuur_vm/src/vm_v2.rs @@ -8,6 +8,7 @@ use std::collections::HashMap; use std::rc::Rc; use crate::func_def::{Closure, ScriptFunc}; +use crate::handle::Handle; use crate::instruction_set::Op; use crate::module::Module; @@ -31,8 +32,8 @@ struct Store { struct CallFrame { /// Instruction pointer ip: usize, - /// Reference to the closure instance that will be executed. - closure: Rc>, + /// Reference to the closure instance that is being executed. + closure: Handle, } #[derive(Debug)] @@ -48,7 +49,7 @@ pub struct Fiber { } impl Fiber { - pub fn new(closure: Rc>) -> Self { + pub fn new(closure: Handle) -> Self { Self { stack: vec![], frame: CallFrame { ip: 0, closure }, diff --git a/crates/vuur_vm/tests/test_vm_v2.rs b/crates/vuur_vm/tests/test_vm_v2.rs index adc618b..1a7c310 100644 --- a/crates/vuur_vm/tests/test_vm_v2.rs +++ b/crates/vuur_vm/tests/test_vm_v2.rs @@ -1,6 +1,5 @@ use vuur_vm::vm_v2::VM; - #[test] fn test_vm_v2() { // let vm = VM::new From 5f7fe216524563a61a4383a64b42703feff9bd62 Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Sat, 4 May 2024 18:11:18 +0200 Subject: [PATCH 3/8] Working VM --- crates/vuur_vm/src/func_def.rs | 49 ------- crates/vuur_vm/src/handle.rs | 8 +- crates/vuur_vm/src/instruction_set.rs | 94 ++++++++++-- crates/vuur_vm/src/lib.rs | 11 +- crates/vuur_vm/src/module.rs | 16 --- crates/vuur_vm/src/symbol_table.rs | 36 +++++ crates/vuur_vm/src/tests.rs | 85 +++++++++++ crates/vuur_vm/src/value.rs | 198 ++++++++++++++++++++++++++ crates/vuur_vm/src/vm_v2.rs | 121 ++++++++++------ crates/vuur_vm/tests/test_vm_v2.rs | 6 - 10 files changed, 489 insertions(+), 135 deletions(-) delete mode 100644 crates/vuur_vm/src/func_def.rs delete mode 100644 crates/vuur_vm/src/module.rs create mode 100644 crates/vuur_vm/src/tests.rs delete mode 100644 crates/vuur_vm/tests/test_vm_v2.rs diff --git a/crates/vuur_vm/src/func_def.rs b/crates/vuur_vm/src/func_def.rs deleted file mode 100644 index db6bfcf..0000000 --- a/crates/vuur_vm/src/func_def.rs +++ /dev/null @@ -1,49 +0,0 @@ -use crate::instruction_set::Op; -use std::rc::Rc; - -#[derive(Debug, Clone, Copy)] -#[repr(transparent)] -pub struct FuncId(pub(crate) u32); - -impl FuncId { - #[inline(always)] - pub(crate) fn new(id: u32) -> Self { - Self(id) - } - - #[inline(always)] - pub fn to_usize(self) -> usize { - self.0 as usize - } - - #[inline(always)] - pub fn to_u32(self) -> u32 { - self.0 - } -} - -#[derive(Debug)] -pub struct Closure { - pub func_id: FuncId, - pub func: Rc, - pub up_values: Vec<()>, -} - -/// Function definition in the guest script. -/// -/// It contains interpreter instructions which can be executed -/// in the virtual machine. -#[derive(Debug)] -pub struct ScriptFunc { - pub id: FuncId, - pub constants: Vec, - pub code: Box<[Op]>, -} - -pub type NativeFuncPtr = fn() -> (); - -#[derive(Debug)] -pub struct NativeFunc { - pub id: FuncId, - pub ptr: NativeFuncPtr, -} diff --git a/crates/vuur_vm/src/handle.rs b/crates/vuur_vm/src/handle.rs index 01cea11..bf6e394 100644 --- a/crates/vuur_vm/src/handle.rs +++ b/crates/vuur_vm/src/handle.rs @@ -2,7 +2,8 @@ use std::cell::RefCell; pub use std::cell::{Ref, RefMut}; use std::fmt; use std::fmt::Formatter; -use std::rc::{Rc, Weak}; +use std::rc::Rc; +pub use std::rc::Weak; /// Shared reference counted handle pub struct Handle(Rc>); @@ -32,6 +33,11 @@ impl Handle { pub fn try_borrow_mut(&self) -> Option> { self.0.try_borrow_mut().ok() } + + #[inline(always)] + pub fn downgrade(&self) -> Weak> { + Rc::downgrade(&self.0) + } } impl Clone for Handle { diff --git a/crates/vuur_vm/src/instruction_set.rs b/crates/vuur_vm/src/instruction_set.rs index c5282e6..93d6785 100644 --- a/crates/vuur_vm/src/instruction_set.rs +++ b/crates/vuur_vm/src/instruction_set.rs @@ -1,6 +1,8 @@ use std::fmt; use std::fmt::Formatter; +use crate::value::{GlobalId, LocalId, UpValueId}; + /// Instruction set. #[derive(Debug, Clone, Copy)] #[allow(non_camel_case_types)] @@ -19,6 +21,10 @@ pub enum Op { I32_Div, I32_Neg, I32_Eq, + I32_Less, + I32_Greater, + I32_LessEq, + I32_GreaterEq, I32_Cmp, /// Push a constant int32 value onto the operand stack. @@ -31,26 +37,44 @@ pub enum Op { // ------------------------------------------------------------------------ // Variables + Store_Global { + global_id: GlobalId, + }, + Load_Global { + global_id: GlobalId, + }, Store_Local { - local_id: Arg24, + local_id: LocalId, }, Load_Local { - local_id: Arg24, + local_id: LocalId, + }, + Store_Upvalue { + up_value_id: UpValueId, + }, + Load_Upvalue { + up_value_id: UpValueId, }, - - // ------------------------------------------------------------------------ - // Up-values /// "Close" the up-value, copying its inner value into its heap slot. - Upvalue_Close, + Upvalue_Close { + up_value_id: UpValueId, + }, // ------------------------------------------------------------------------ // Callables - /// Statically call a function identified by `func_id`. - Call_Func { - func_id: Arg24, + /// Call a closure instance on the stack. + Call_Closure { + arity: u8, + }, + /// Call a method defined on a class. + Call_Method { + arity: u8, + func_id: u16, }, Return, /// Create a closure instance. + /// + /// Expects a function definition to be on the top of the stack. Closure_Create, // ------------------------------------------------------------------------ @@ -58,13 +82,55 @@ pub enum Op { /// Unconditionally jump. Jump, /// Conditionally jump if the top of the operand stack is value 0, type int32. - Jump_False, + /// + /// Pop 1. + Jump_False { + addr: Arg24, + }, /// Ends the current block. End, - /// Unconditionally error. + /// Unconditional error. Abort, } +impl Op { + /// The effect on the operand stack that the instruction has. + pub fn stack_effect(&self) -> isize { + match self { + Op::NoOp => 0, + Op::Pop => -1, + Op::I32_Add => -1, + Op::I32_Sub => -1, + Op::I32_Mul => -1, + Op::I32_Div => -1, + Op::I32_Neg => 0, + Op::I32_Eq => -1, + Op::I32_Less => -1, + Op::I32_Greater => -1, + Op::I32_LessEq => -1, + Op::I32_GreaterEq => -1, + Op::I32_Cmp => -1, + Op::I32_Const { .. } => 1, + Op::I32_Const_Inline { .. } => 1, + Op::Store_Global { .. } => 0, + Op::Load_Global { .. } => 1, + Op::Store_Local { .. } => 0, + Op::Load_Local { .. } => 1, + Op::Store_Upvalue { .. } => 0, + Op::Load_Upvalue { .. } => 1, + Op::Upvalue_Close { .. } => 0, + Op::Call_Closure { arity } => -(*arity as isize) + 1, + Op::Call_Method { arity, .. } => -(*arity as isize), // remember receiver + Op::Return => -1, + Op::Closure_Create => 1, + Op::Jump => 0, + Op::Jump_False { .. } => -1, + Op::End => 0, + Op::Abort => 0, + } + } +} + pub type ConstantId = u16; /// Bytecode argument packed into 24 bits, encoded in little-endian. @@ -87,6 +153,12 @@ impl Arg24 { i32::from_le_bytes([0, a, b, c]) >> 8 } + #[inline(always)] + pub fn from_u32(value: u32) -> Self { + let [a, b, c, _] = value.to_le_bytes(); + Self([a, b, c]) + } + #[inline(always)] pub fn to_u32(self) -> u32 { let [a, b, c] = self.0; diff --git a/crates/vuur_vm/src/lib.rs b/crates/vuur_vm/src/lib.rs index 76f8ea7..09ca31c 100644 --- a/crates/vuur_vm/src/lib.rs +++ b/crates/vuur_vm/src/lib.rs @@ -6,16 +6,17 @@ use vuur_compile::bytecode::{decode_arg_a, decode_arg_k, decode_opcode, opcodes use vuur_compile::Chunk; pub mod error; -mod func_def; -mod handle; -mod instruction_set; -mod module; +pub mod handle; +pub mod instruction_set; pub mod obj; #[doc(hidden)] pub mod symbol_table; -mod value; +pub mod value; pub mod vm_v2; +#[cfg(test)] +mod tests; + use self::error::{ErrorKind, Result, RuntimeError}; pub const STRIDE: usize = 4; diff --git a/crates/vuur_vm/src/module.rs b/crates/vuur_vm/src/module.rs deleted file mode 100644 index 085fa40..0000000 --- a/crates/vuur_vm/src/module.rs +++ /dev/null @@ -1,16 +0,0 @@ -use crate::func_def::{NativeFunc, ScriptFunc}; -use std::rc::Rc; - -#[derive(Debug)] -pub struct Module { - pub name: String, - pub func_defs: Vec>, - /// Module level global variables. - pub vars: Vec<()>, -} - -#[derive(Debug)] -pub enum Func { - Script(ScriptFunc), - Native(NativeFunc), -} diff --git a/crates/vuur_vm/src/symbol_table.rs b/crates/vuur_vm/src/symbol_table.rs index 381fcd0..8f78c15 100644 --- a/crates/vuur_vm/src/symbol_table.rs +++ b/crates/vuur_vm/src/symbol_table.rs @@ -23,6 +23,7 @@ //! //! let func_id = table.push(Func{}); //! ``` +use std::fmt::{self, Formatter}; use std::marker::PhantomData; /// Symbol table. @@ -159,6 +160,24 @@ impl SymbolTable { } &mut self.symbols[index] } + + pub fn find

(&self, mut predicate: P) -> Option<(K, &V)> + where + P: FnMut(&V) -> bool, + { + self.symbols + .iter() + .enumerate() + .position(|(_, el)| predicate(el)) + .map(|i| (K::from_usize(i), &self.symbols[i])) + } +} + +impl SymbolTable { + /// Lookup the symbol for the given value. + pub fn find_symbol(&self, value: &V) -> Option { + self.symbols.iter().position(|el| *el == *value).map(K::from_usize) + } } impl Default for SymbolTable { @@ -167,6 +186,23 @@ impl Default for SymbolTable { } } +impl fmt::Debug for SymbolTable +where + K: Symbol + fmt::Debug, + V: fmt::Debug, +{ + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let mut debug = f.debug_map(); + + for (index, value) in self.symbols.iter().enumerate() { + let symbol = K::from_usize(index); + debug.entry(&symbol, &value); + } + + debug.finish() + } +} + #[cfg(test)] mod test { use super::*; diff --git a/crates/vuur_vm/src/tests.rs b/crates/vuur_vm/src/tests.rs new file mode 100644 index 0000000..e6c6c72 --- /dev/null +++ b/crates/vuur_vm/src/tests.rs @@ -0,0 +1,85 @@ +use crate::value::{GlobalId, LocalId, Program}; +use crate::{ + handle::Handle, + instruction_set::{Arg24, Op}, + value::{Closure, Module, ScriptFunc}, + vm_v2::{Store, VM}, +}; +use std::rc::Rc; + +/// Create a recursive fibonacci script function. +fn fibonacci(store: &mut Store, module: Handle) { + // func fib(n: Int) { + // if n <= 1 { + // return n + // } else { + // return fib(n - 1) + fib(n - 2) + // } + // } + let fib = GlobalId::new(0); + let n = LocalId::new(0); + let code = vec![ + Op::Load_Local { local_id: n }, + Op::I32_Const_Inline { + arg: Arg24::from_i32(1), + }, + Op::I32_LessEq, + Op::Jump_False { + addr: Arg24::from_u32(0), + }, + Op::Load_Local { local_id: n }, + Op::Return, + // Setup call to fib(n) + Op::Load_Global { global_id: fib }, + // n - 1 + Op::Load_Local { local_id: n }, + Op::I32_Const_Inline { + arg: Arg24::from_i32(1), + }, + Op::I32_Sub, + Op::Call_Closure { arity: 1 }, + // Setup call to fib(n) + Op::Load_Global { global_id: fib }, + // n - 2 + Op::Load_Local { local_id: n }, + Op::I32_Const_Inline { + arg: Arg24::from_i32(2), + }, + Op::I32_Sub, + Op::Call_Closure { arity: 1 }, + // fib(n - 1) + fib(n - 2) + Op::I32_Add, + ]; +} + +#[test] +fn test_vm_v2() { + let module = Handle::new(Module::new("__main__")); + + let code = vec![ + Op::I32_Const_Inline { + arg: Arg24::from_i32(1), + }, + Op::I32_Const_Inline { + arg: Arg24::from_i32(2), + }, + Op::I32_Add, + Op::Return, + ]; + + // Module top-level code. + let func = Rc::new(ScriptFunc { + constants: vec![], + code: code.into_boxed_slice(), + module: module.downgrade(), + }); + + let closure = Handle::new(Closure::new(func)); + let program = Program::new(module, closure); + + // --------------------------------------------------------------------------------------------- + let mut vm = VM::new(); + let slot = vm.run_program(&program); + println!("{slot:?}"); + assert_eq!(slot.unwrap().raw(), 3); +} diff --git a/crates/vuur_vm/src/value.rs b/crates/vuur_vm/src/value.rs index 8b13789..96f83fd 100644 --- a/crates/vuur_vm/src/value.rs +++ b/crates/vuur_vm/src/value.rs @@ -1 +1,199 @@ +use crate::handle::Handle; +use std::cell::RefCell; +use std::fmt::{self, Formatter}; +use std::rc::{Rc, Weak}; +use crate::instruction_set::Op; +use crate::symbol_impl; +use crate::symbol_table::{Symbol, SymbolTable}; + +symbol_impl!( + /// Global variable Id. + #[derive(Debug, Clone, Copy)] pub struct GlobalId(u16) +); + +symbol_impl!( + /// Local variable Id. + #[derive(Debug, Clone, Copy)] pub struct LocalId(u16) +); + +symbol_impl!( + /// Local variable Id. + #[derive(Debug, Clone, Copy)] pub struct UpValueId(u16) +); + +/// An executable Vuur program. +pub struct Program { + /// An executable closure object holding the top-level code of the main module. + pub(crate) closure: Handle, + + /// Handle to the module that acted as the function's environment. + /// + /// This keeps helps keep the module alive since the closure only + /// has a weak reference to its lexical module. + pub(crate) module: Handle, +} + +impl Program { + pub fn new(module: Handle, closure: Handle) -> Self { + Self { closure, module } + } +} + +/// Slot is an untyped operand stack value. +/// +/// It holds the raw bits of a value. The encoding is +/// specific to the current platform. +#[derive(Clone, Copy)] +#[repr(transparent)] +pub(crate) struct Slot(u64); + +impl Slot { + pub(crate) const ZERO: Self = Slot(0); + + #[inline(always)] + pub(crate) fn raw(&self) -> u64 { + self.0 + } + + #[inline(always)] + pub(crate) fn from_i32(val: i32) -> Self { + Self(val as u64) + } + + #[inline(always)] + pub(crate) fn to_i32(self) -> i32 { + self.0 as i32 + } + + #[inline(always)] + pub(crate) fn from_f32(val: f32) -> Self { + Self(val.to_bits() as u64) + } + + #[inline(always)] + pub(crate) fn to_f32(self) -> f32 { + f32::from_bits(self.0 as u32) + } +} + +impl fmt::Debug for Slot { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let Self(value) = *self; + write!(f, "Slot(0x{value:04x})") + } +} + +#[derive(Debug)] +pub struct Module { + /// Name of the module. + pub name: String, + /// Module level global variables. + pub vars: SymbolTable, +} + +impl Module { + pub fn new(name: impl ToString) -> Self { + Self { + name: name.to_string(), + vars: SymbolTable::new(), + } + } +} + +#[derive(Debug)] +pub enum Func { + Script(Rc), + Native(NativeFunc), +} + +#[derive(Debug)] +pub struct Closure { + pub func: Rc, + + /// Up-values are variables that are referenced in this closure's scope, + /// but are not local to this scope, or global to the module. + /// + /// They are boxed into handles because they can be shared between + /// multiple closures, as well as the call frames that have to close + /// them on return. + pub up_values: SymbolTable>, +} + +impl Closure { + pub fn new(func: Rc) -> Self { + Self { + func, + up_values: SymbolTable::new(), + } + } +} + +/// An Up-value is a variable that is referenced within a scope, but is not +/// local to that scope. +#[derive(Debug, Clone)] +pub enum UpValue { + /// A local variable is an **open** up-value when it is still within scope + /// and on the operand stack. + /// + /// In this case the up-value holds an absolute *stack offset* pointing to the + /// local variable. + /// + /// This implies that the stack offset will be invalid when the call frame + /// is popped from the stack. The up-value must be closed before (the value + /// copied from the stack to the heap) before the frame returns. + Open(usize), + + /// A local variable is a **closed** up-value when the closure escapes its + /// parent scope. The lifetime of those locals extend beyond their scope, + /// so must be replaced with heap allocated values. + /// + /// In this case the up-value holds a *handle* to a heap value. + Closed(Slot), +} + +/// Function defined in the guest script. +/// +/// It contains interpreter instructions which can be executed +/// in the virtual machine. +/// +/// After compilation a script function is immutable, +/// so it can be stored without `RefCell`. +#[derive(Debug)] +pub struct ScriptFunc { + pub constants: Vec, + pub code: Box<[Op]>, + + /// The function keeps a reference to the module it lexically belongs to. + /// + /// This allows instructions in the function to interact with module level + /// global variables. + /// + /// This unfortunately creates circular ownership between modules and functions. + /// A weak reference is needed to avoid leaking memory, which means when a + /// function definition leaves the module (like when the host keeps a closure) + /// the module must be kept alive. + pub module: Weak>, +} + +pub type NativeFuncPtr = fn() -> (); + +#[derive(Debug)] +pub struct NativeFunc { + pub ptr: NativeFuncPtr, +} + +#[cfg(test)] +mod test { + use super::*; + use crate::handle::Handle; + use std::rc::Rc; + + /// Ensure that a slot can hold a pointer on the current architecture. + #[test] + fn test_slot_size() { + assert!(std::mem::size_of::<*const [u8; 1024]>() <= std::mem::size_of::()); + assert!(std::mem::size_of::>() <= std::mem::size_of::()); + assert!(std::mem::size_of::>() <= std::mem::size_of::()); + } +} diff --git a/crates/vuur_vm/src/vm_v2.rs b/crates/vuur_vm/src/vm_v2.rs index 4b7c065..7e009a1 100644 --- a/crates/vuur_vm/src/vm_v2.rs +++ b/crates/vuur_vm/src/vm_v2.rs @@ -7,27 +7,34 @@ use std::cmp::Ordering; use std::collections::HashMap; use std::rc::Rc; -use crate::func_def::{Closure, ScriptFunc}; use crate::handle::Handle; use crate::instruction_set::Op; -use crate::module::Module; +use crate::value::Module; +use crate::value::Slot; +use crate::value::{Closure, Program}; const ENTRY_POINT: &str = "Main"; #[derive(Debug)] pub struct VM { /// Current running fiber - pub(crate) fiber: Rc>, + pub(crate) fiber: Option>, store: Store, } #[derive(Debug)] -struct Store { +pub struct Store { modules: HashMap>, /// Global table of function signatures. funcs: Vec<()>, } +impl Store { + pub fn insert_func(&mut self) { + todo!("Insert function signature") + } +} + #[derive(Debug)] struct CallFrame { /// Instruction pointer @@ -40,10 +47,6 @@ struct CallFrame { pub struct Fiber { /// Operand stack pub(crate) stack: Vec, - /// Top frame of the call stack. - /// - /// Kept outside the stack buffer to make access infallible. - pub(crate) frame: CallFrame, /// Stack of call frames (activation records). pub(crate) calls: Vec, } @@ -52,55 +55,40 @@ impl Fiber { pub fn new(closure: Handle) -> Self { Self { stack: vec![], - frame: CallFrame { ip: 0, closure }, - calls: vec![], + calls: vec![CallFrame { ip: 0, closure }], } } } -/// Slot is an untyped operand stack value. -#[derive(Debug, Clone, Copy)] -#[repr(transparent)] -pub(crate) struct Slot(u64); - -impl Slot { - #[inline(always)] - pub(crate) fn from_i32(val: i32) -> Self { - Self(val as u64) +impl VM { + pub fn new() -> Self { + Self { + fiber: None, + store: Store { + modules: HashMap::new(), + funcs: vec![], + }, + } } - #[inline(always)] - pub(crate) fn to_i32(self) -> i32 { - self.0 as i32 - } + pub(crate) fn run_program(&mut self, program: &Program) -> Result { + let module = program.module.clone(); + let closure = program.closure.clone(); - #[inline(always)] - pub(crate) fn from_f32(val: f32) -> Self { - Self(val.to_bits() as u64) - } + // Setup a fiber + let fiber = Handle::new(Fiber::new(closure)); - #[inline(always)] - pub(crate) fn to_f32(self) -> f32 { - f32::from_bits(self.0 as u32) - } -} + let result = run_interpreter(self, fiber)?; -impl VM { - pub fn new() -> Self { - todo!() + Ok(result) } /// Runs the entry point of the named module. pub fn run_entrypoint(&mut self, module_name: &str) -> Result<(), String> { if self.store.modules.get(module_name).is_none() { - self.store.modules.insert( - module_name.to_string(), - Rc::new(Module { - name: module_name.to_string(), - func_defs: vec![], - vars: vec![], - }), - ); + self.store + .modules + .insert(module_name.to_string(), Rc::new(Module::new(module_name))); } let module_rc = self.store.modules[module_name].clone(); @@ -129,6 +117,8 @@ impl Fiber { } enum FiberAction { + /// Return a value. + Return(Slot), /// Pause execution of the current fiber and yield control /// back to host. Yield, @@ -144,16 +134,47 @@ enum RunAction { } /// Run the current fiber in the VM. +// TODO: Instead of Slot, return a decent value that's usable in the Rust host. +fn run_interpreter(vm: &mut VM, fiber: Handle) -> Result { + vm.fiber = Some(fiber.clone()); + + loop { + let fiber = &mut *fiber.borrow_mut(); + + match run_fiber(vm, fiber)? { + FiberAction::Return(slot) => { + return Ok(slot); + } + FiberAction::Yield => { + todo!() + } + } + } +} + fn run_fiber(vm: &mut VM, fiber: &mut Fiber) -> Result { - todo!() + let mut frame = fiber.calls.pop().ok_or_else(|| "fiber has no frames on its callstack")?; + + loop { + match run_op_loop(vm, fiber, &mut frame)? { + RunAction::Return(slot) => { + // Current frame returned but there are no callers left on the stack. + if fiber.calls.is_empty() { + return Ok(FiberAction::Return(slot)); + } + } + RunAction::Call => {} + RunAction::Fiber(_) => {} + } + } } #[inline(always)] -fn run_interpreter(fiber: &mut Fiber, frame: &mut CallFrame) -> Result { +fn run_op_loop(vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result { let closure = frame.closure.clone(); let func = closure.borrow_mut().func.clone(); - 'eval: loop { + loop { let op = func .code .get(frame.ip) @@ -204,9 +225,15 @@ fn run_interpreter(fiber: &mut Fiber, frame: &mut CallFrame) -> Result { + Op::Return => { + return Ok(RunAction::Return(fiber.stack.pop().unwrap_or(Slot::ZERO))); + } + Op::Abort => { return Err("abort".to_string()); } + _ => { + return Err(format!("instruction not implemented yet: {op:?}")); + } } } } diff --git a/crates/vuur_vm/tests/test_vm_v2.rs b/crates/vuur_vm/tests/test_vm_v2.rs deleted file mode 100644 index 1a7c310..0000000 --- a/crates/vuur_vm/tests/test_vm_v2.rs +++ /dev/null @@ -1,6 +0,0 @@ -use vuur_vm::vm_v2::VM; - -#[test] -fn test_vm_v2() { - // let vm = VM::new -} From 63083737e7f8a1ab2f6de37d7ab830cc8af8e7b2 Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Sun, 5 May 2024 15:11:44 +0200 Subject: [PATCH 4/8] Working fibonacci recursive calls --- crates/vuur_vm/Cargo.toml | 4 + crates/vuur_vm/src/handle.rs | 8 +- crates/vuur_vm/src/instruction_set.rs | 16 +-- crates/vuur_vm/src/tests.rs | 59 +++++++-- crates/vuur_vm/src/value.rs | 111 +++++++++++++++- crates/vuur_vm/src/vm_v2.rs | 182 ++++++++++++++++++++------ 6 files changed, 307 insertions(+), 73 deletions(-) diff --git a/crates/vuur_vm/Cargo.toml b/crates/vuur_vm/Cargo.toml index 1dd41e7..21f7bd1 100644 --- a/crates/vuur_vm/Cargo.toml +++ b/crates/vuur_vm/Cargo.toml @@ -12,3 +12,7 @@ vuur_parse = { path = "../vuur_parse" } # Dynamic Objects bytemuck = "1.13" + +[features] +# Prints opcode instructions as they are interpreted. +trace_ops = [] diff --git a/crates/vuur_vm/src/handle.rs b/crates/vuur_vm/src/handle.rs index bf6e394..986114c 100644 --- a/crates/vuur_vm/src/handle.rs +++ b/crates/vuur_vm/src/handle.rs @@ -6,7 +6,7 @@ use std::rc::Rc; pub use std::rc::Weak; /// Shared reference counted handle -pub struct Handle(Rc>); +pub struct Handle(Rc>); impl Handle { #[inline(always)] @@ -40,7 +40,7 @@ impl Handle { } } -impl Clone for Handle { +impl Clone for Handle { #[inline(always)] fn clone(&self) -> Self { Self(self.0.clone()) @@ -49,13 +49,13 @@ impl Clone for Handle { impl fmt::Debug for Handle where - T: fmt::Debug, + T: ?Sized + fmt::Debug, { fn fmt(&self, f: &mut Formatter) -> fmt::Result { let mut debug = f.debug_tuple("Handle"); match self.0.try_borrow() { - Ok(value) => debug.field(&*value).finish(), + Ok(value) => debug.field(&&*value).finish(), Err(_) => debug.field(&"_").finish(), } } diff --git a/crates/vuur_vm/src/instruction_set.rs b/crates/vuur_vm/src/instruction_set.rs index 93d6785..9cf9fd1 100644 --- a/crates/vuur_vm/src/instruction_set.rs +++ b/crates/vuur_vm/src/instruction_set.rs @@ -1,7 +1,7 @@ use std::fmt; use std::fmt::Formatter; -use crate::value::{GlobalId, LocalId, UpValueId}; +use crate::value::{ConstantId, GlobalId, LocalId, UpValueId}; /// Instruction set. #[derive(Debug, Clone, Copy)] @@ -25,7 +25,6 @@ pub enum Op { I32_Greater, I32_LessEq, I32_GreaterEq, - I32_Cmp, /// Push a constant int32 value onto the operand stack. I32_Const { @@ -72,10 +71,10 @@ pub enum Op { func_id: u16, }, Return, - /// Create a closure instance. - /// - /// Expects a function definition to be on the top of the stack. - Closure_Create, + + /// Create a closure instance from the function definition stored + /// in the constant table of the current call frame. + Closure(ConstantId), // ------------------------------------------------------------------------ // Control Flow @@ -109,7 +108,6 @@ impl Op { Op::I32_Greater => -1, Op::I32_LessEq => -1, Op::I32_GreaterEq => -1, - Op::I32_Cmp => -1, Op::I32_Const { .. } => 1, Op::I32_Const_Inline { .. } => 1, Op::Store_Global { .. } => 0, @@ -122,7 +120,7 @@ impl Op { Op::Call_Closure { arity } => -(*arity as isize) + 1, Op::Call_Method { arity, .. } => -(*arity as isize), // remember receiver Op::Return => -1, - Op::Closure_Create => 1, + Op::Closure(_) => 1, Op::Jump => 0, Op::Jump_False { .. } => -1, Op::End => 0, @@ -131,8 +129,6 @@ impl Op { } } -pub type ConstantId = u16; - /// Bytecode argument packed into 24 bits, encoded in little-endian. #[derive(Clone, Copy, PartialEq, Eq)] pub struct Arg24([u8; 3]); diff --git a/crates/vuur_vm/src/tests.rs b/crates/vuur_vm/src/tests.rs index e6c6c72..fc2470b 100644 --- a/crates/vuur_vm/src/tests.rs +++ b/crates/vuur_vm/src/tests.rs @@ -1,15 +1,15 @@ -use crate::value::{GlobalId, LocalId, Program}; +use crate::value::{Slot, Value}; use crate::{ handle::Handle, instruction_set::{Arg24, Op}, - value::{Closure, Module, ScriptFunc}, + value::{Closure, ConstantId, GlobalId, LocalId, Module, Program, ScriptFunc}, vm_v2::{Store, VM}, }; use std::rc::Rc; /// Create a recursive fibonacci script function. -fn fibonacci(store: &mut Store, module: Handle) { - // func fib(n: Int) { +fn fibonacci(module: Handle) -> Rc { + // func fib(n: Int) -> Int { // if n <= 1 { // return n // } else { @@ -25,7 +25,7 @@ fn fibonacci(store: &mut Store, module: Handle) { }, Op::I32_LessEq, Op::Jump_False { - addr: Arg24::from_u32(0), + addr: Arg24::from_u32(6), }, Op::Load_Local { local_id: n }, Op::Return, @@ -49,27 +49,58 @@ fn fibonacci(store: &mut Store, module: Handle) { Op::Call_Closure { arity: 1 }, // fib(n - 1) + fib(n - 2) Op::I32_Add, + Op::Return, ]; + + Rc::new(ScriptFunc { + constants: vec![], + code: code.into_boxed_slice(), + module: module.downgrade(), + }) } #[test] fn test_vm_v2() { + let fib_arg_1 = 10; + let module = Handle::new(Module::new("__main__")); + // Global variable slots would be determined by top-level `var` and `func` statements. + for _ in 0..1 { + module.borrow_mut().vars.push(Value::Nil); + } + + let fib_func = fibonacci(module.clone()); + let code = vec![ + // func fib(n: Int) -> Int: + Op::Closure(ConstantId::new(0)), // create closure + Op::Store_Global { + global_id: GlobalId::new(0), + }, // Store closure in variable + // fib(5) + Op::Load_Global { + global_id: GlobalId::new(0), + }, // Load closure from variable Op::I32_Const_Inline { - arg: Arg24::from_i32(1), - }, - Op::I32_Const_Inline { - arg: Arg24::from_i32(2), + arg: Arg24::from_i32(fib_arg_1), }, - Op::I32_Add, + Op::Call_Closure { arity: 1 }, + // Op::I32_Const_Inline { + // arg: Arg24::from_i32(1), + // }, + // Op::I32_Const_Inline { + // arg: Arg24::from_i32(2), + // }, + // Op::I32_Add, Op::Return, ]; // Module top-level code. let func = Rc::new(ScriptFunc { - constants: vec![], + constants: vec![ + Value::Func(fib_func), // ConstantId(0) + ], code: code.into_boxed_slice(), module: module.downgrade(), }); @@ -79,7 +110,7 @@ fn test_vm_v2() { // --------------------------------------------------------------------------------------------- let mut vm = VM::new(); - let slot = vm.run_program(&program); - println!("{slot:?}"); - assert_eq!(slot.unwrap().raw(), 3); + let value = vm.run_program(&program); + println!("{value:?}"); + assert_eq!(value.unwrap().into_i32().unwrap(), 55); } diff --git a/crates/vuur_vm/src/value.rs b/crates/vuur_vm/src/value.rs index 96f83fd..9a85419 100644 --- a/crates/vuur_vm/src/value.rs +++ b/crates/vuur_vm/src/value.rs @@ -18,10 +18,90 @@ symbol_impl!( ); symbol_impl!( - /// Local variable Id. + /// Up-value variable Id. #[derive(Debug, Clone, Copy)] pub struct UpValueId(u16) ); +symbol_impl!( + /// Constant Id. + #[derive(Debug, Clone, Copy)] pub struct ConstantId(u16) +); + +/// Dynamically typed value. +/// +/// This is to simplify the internals of the VM for the short term. +/// In the future the VM will be statically typed. +/// +/// See [`Slot`] +#[derive(Clone)] +pub enum Value { + Nil, + Bool(bool), + Int(i32), + Float(f32), + Str(Handle), + + // ------------------------------------------------------------------------ + // Reference type objects. + Func(Rc), + Closure(Handle), + Native(Handle), +} + +impl fmt::Debug for Value { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + use Value::*; + + // There are plenty of opportunities for circular + // references, so we don't recurse into complex objects. + match self { + Nil => write!(f, "Nil"), + Bool(v) => f.debug_tuple("Bool").field(&v).finish(), + Int(v) => f.debug_tuple("Int").field(&v).finish(), + Float(v) => f.debug_tuple("Float").field(&v).finish(), + Str(v) => f.debug_tuple("Str").field(&v).finish(), + Func(_) => write!(f, "Func(...)"), + Closure(_) => write!(f, "Closure(...)"), + Native(_) => write!(f, "Native(...)"), + } + } +} + +impl Value { + #[inline(always)] + pub fn into_func(self) -> Result, String> { + match self { + Value::Func(func) => Ok(func), + _ => Err(self.type_error()), + } + } + + #[inline(always)] + pub fn into_closure(self) -> Result, String> { + match self { + Value::Closure(closure) => Ok(closure), + _ => Err(self.type_error()), + } + } + + #[inline(always)] + pub fn into_i32(self) -> Result { + match self { + Value::Int(int) => Ok(int), + _ => Err(self.type_error()), + } + } + + #[inline(always)] + pub fn from_i32(value: i32) -> Self { + Self::Int(value) + } + + fn type_error(&self) -> String { + format!("unexpected value type: {self:?}") + } +} + /// An executable Vuur program. pub struct Program { /// An executable closure object holding the top-level code of the main module. @@ -44,8 +124,18 @@ impl Program { /// /// It holds the raw bits of a value. The encoding is /// specific to the current platform. +/// +/// FIXME: Storing reference type object pointers in a slot. +/// +/// To keep the VM simple, the standard library `Rc` is used +/// for reference types. It doesn't expose its internal pointer, +/// making it hard to build unsafe internals around it. +/// +/// When we have a proper garbage collector with our own +/// handle types we can revisit `Slot`. #[derive(Clone, Copy)] #[repr(transparent)] +#[allow(dead_code)] pub(crate) struct Slot(u64); impl Slot { @@ -75,6 +165,16 @@ impl Slot { pub(crate) fn to_f32(self) -> f32 { f32::from_bits(self.0 as u32) } + + #[inline(always)] + pub(crate) fn from_ptr(ptr: *const T) -> Self { + Self(ptr as usize as u64) + } + + #[inline(always)] + pub(crate) unsafe fn to_ptr(self) -> *mut T { + self.0 as usize as *mut T + } } impl fmt::Debug for Slot { @@ -89,7 +189,7 @@ pub struct Module { /// Name of the module. pub name: String, /// Module level global variables. - pub vars: SymbolTable, + pub vars: SymbolTable, } impl Module { @@ -161,7 +261,10 @@ pub enum UpValue { /// so it can be stored without `RefCell`. #[derive(Debug)] pub struct ScriptFunc { - pub constants: Vec, + /// Values defined in the function body that do not change. + pub constants: Vec, + + /// Interpreter bytecode instructions to be executed. pub code: Box<[Op]>, /// The function keeps a reference to the module it lexically belongs to. @@ -178,6 +281,7 @@ pub struct ScriptFunc { pub type NativeFuncPtr = fn() -> (); +/// Host function defined in Rust. #[derive(Debug)] pub struct NativeFunc { pub ptr: NativeFuncPtr, @@ -192,6 +296,7 @@ mod test { /// Ensure that a slot can hold a pointer on the current architecture. #[test] fn test_slot_size() { + println!("{}", std::mem::size_of::()); assert!(std::mem::size_of::<*const [u8; 1024]>() <= std::mem::size_of::()); assert!(std::mem::size_of::>() <= std::mem::size_of::()); assert!(std::mem::size_of::>() <= std::mem::size_of::()); diff --git a/crates/vuur_vm/src/vm_v2.rs b/crates/vuur_vm/src/vm_v2.rs index 7e009a1..d354a98 100644 --- a/crates/vuur_vm/src/vm_v2.rs +++ b/crates/vuur_vm/src/vm_v2.rs @@ -2,16 +2,13 @@ //! //! //! Complete rewrite of the virtual machine. -use std::cell::RefCell; -use std::cmp::Ordering; use std::collections::HashMap; use std::rc::Rc; use crate::handle::Handle; use crate::instruction_set::Op; -use crate::value::Module; -use crate::value::Slot; -use crate::value::{Closure, Program}; +use crate::symbol_table::Symbol; +use crate::value::{Closure, Module, Program, Slot, Value}; const ENTRY_POINT: &str = "Main"; @@ -25,8 +22,11 @@ pub struct VM { #[derive(Debug)] pub struct Store { modules: HashMap>, - /// Global table of function signatures. - funcs: Vec<()>, + /// Global table of method signatures. + /// + /// The symbol from this table can be used to index into a class' + /// methods. This is the method-overloading mechanism. + methods: Vec<()>, } impl Store { @@ -39,6 +39,8 @@ impl Store { struct CallFrame { /// Instruction pointer ip: usize, + /// Offset into operand stack where this frame's local variables start. + stack_offset: usize, /// Reference to the closure instance that is being executed. closure: Handle, } @@ -46,16 +48,27 @@ struct CallFrame { #[derive(Debug)] pub struct Fiber { /// Operand stack - pub(crate) stack: Vec, + pub(crate) stack: Vec, /// Stack of call frames (activation records). pub(crate) calls: Vec, } impl Fiber { + /// Create a fiber with a top-level function as an entrypoint. pub fn new(closure: Handle) -> Self { Self { - stack: vec![], - calls: vec![CallFrame { ip: 0, closure }], + stack: vec![ + // To keep consistent with the calling convention, + // the called closure must be on the stack when the + // module's top level code returns. + Value::Closure(closure.clone()), + ], + calls: vec![CallFrame { + ip: 0, + // Arguments start after the closure value. + stack_offset: 1, + closure, + }], } } } @@ -66,12 +79,12 @@ impl VM { fiber: None, store: Store { modules: HashMap::new(), - funcs: vec![], + methods: vec![], }, } } - pub(crate) fn run_program(&mut self, program: &Program) -> Result { + pub(crate) fn run_program(&mut self, program: &Program) -> Result { let module = program.module.clone(); let closure = program.closure.clone(); @@ -107,18 +120,18 @@ impl VM { impl Fiber { #[inline(always)] - fn pop_slots_2(&mut self) -> [Slot; 2] { + fn pop_slots_2(&mut self) -> [Value; 2] { let l = self.stack.len(); - let slot_b = self.stack[l - 2]; - let slot_a = self.stack[l - 1]; + let value_a = self.stack[l - 2].clone(); + let value_b = self.stack[l - 1].clone(); self.stack.truncate(self.stack.len() - 2); - [slot_a, slot_b] + [value_a, value_b] } } enum FiberAction { /// Return a value. - Return(Slot), + Return(Value), /// Pause execution of the current fiber and yield control /// back to host. Yield, @@ -126,16 +139,19 @@ enum FiberAction { enum RunAction { /// Successfully return a value. - Return(Slot), - /// Call a script function. - Call, + Return(Value), + /// Call a closure. + Call { + closure: Handle, + stack_offset: usize, + }, /// Fiber control action. Fiber(FiberAction), } /// Run the current fiber in the VM. // TODO: Instead of Slot, return a decent value that's usable in the Rust host. -fn run_interpreter(vm: &mut VM, fiber: Handle) -> Result { +fn run_interpreter(vm: &mut VM, fiber: Handle) -> Result { vm.fiber = Some(fiber.clone()); loop { @@ -157,29 +173,57 @@ fn run_fiber(vm: &mut VM, fiber: &mut Fiber) -> Result { loop { match run_op_loop(vm, fiber, &mut frame)? { - RunAction::Return(slot) => { - // Current frame returned but there are no callers left on the stack. - if fiber.calls.is_empty() { - return Ok(FiberAction::Return(slot)); + RunAction::Return(value) => { + // Drop callee stack and closure value. + fiber.stack.truncate(frame.stack_offset - 1); + + match fiber.calls.pop() { + // Current frame returned but there are no callers left on the stack. + None => { + return Ok(FiberAction::Return(value)); + } + Some(parent_frame) => { + frame = parent_frame; + fiber.stack.push(value); + } } } - RunAction::Call => {} + RunAction::Call { closure, stack_offset } => { + // Setup new call frame. + let mut new_frame = CallFrame { + ip: 0, + stack_offset, + closure, + }; + std::mem::swap(&mut frame, &mut new_frame); + // Put parent frame back onto call stack. + fiber.calls.push(new_frame); + } RunAction::Fiber(_) => {} } } } #[inline(always)] -fn run_op_loop(vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result { +fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result { let closure = frame.closure.clone(); let func = closure.borrow_mut().func.clone(); + // for op in func.code.iter() { + // println!(" {op:?}"); + // } + loop { let op = func .code .get(frame.ip) .cloned() .ok_or_else(|| "bytecode buffer out of bounds")?; + + if cfg!(feature = "trace_ops") { + println!("{:04} {op:?}", frame.ip); + } + frame.ip += 1; match op { @@ -190,43 +234,97 @@ fn run_op_loop(vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result< } Op::I32_Add => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(a.to_i32() + b.to_i32())); + fiber.stack.push(Value::from_i32(a.into_i32()? + b.into_i32()?)); } Op::I32_Sub => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(a.to_i32() - b.to_i32())); + fiber.stack.push(Value::from_i32(a.into_i32()? - b.into_i32()?)); } Op::I32_Mul => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(a.to_i32() * b.to_i32())); + fiber.stack.push(Value::from_i32(a.into_i32()? * b.into_i32()?)); } Op::I32_Div => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(a.to_i32() / b.to_i32())); + fiber.stack.push(Value::from_i32(a.into_i32()? / b.into_i32()?)); } Op::I32_Neg => { let a = fiber.stack.pop().ok_or_else(|| "operand stack is empty")?; - fiber.stack.push(Slot::from_i32(-a.to_i32())); + fiber.stack.push(Value::from_i32(-a.into_i32()?)); } Op::I32_Eq => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(if a.to_i32() == b.to_i32() { 1 } else { 0 })); + fiber + .stack + .push(Value::from_i32(if a.into_i32()? == b.into_i32()? { 1 } else { 0 })); } - Op::I32_Cmp => { + Op::I32_Less => { let [a, b] = fiber.pop_slots_2(); - let ordering = match Ord::cmp(&a.to_i32(), &b.to_i32()) { - Ordering::Less => -1, - Ordering::Equal => 0, - Ordering::Greater => 1, - }; - fiber.stack.push(Slot::from_i32(ordering)); + fiber.stack.push(Value::Bool(a.into_i32()? < b.into_i32()?)); + } + Op::I32_LessEq => { + let [a, b] = fiber.pop_slots_2(); + fiber.stack.push(Value::Bool(a.into_i32()? <= b.into_i32()?)); } Op::I32_Const_Inline { arg } => { let a = arg.to_i32(); - fiber.stack.push(Slot::from_i32(a)); + fiber.stack.push(Value::from_i32(a)); + } + Op::Store_Global { global_id } => { + let module = func + .module + .upgrade() + .ok_or_else(|| "function lost reference to its lexical module")?; + let value = fiber.stack.pop().unwrap_or(Value::Nil); + module.borrow_mut().vars.insert(global_id, value); + } + Op::Load_Global { global_id } => { + let module = func + .module + .upgrade() + .ok_or_else(|| "function lost reference to its lexical module")?; + let value = module.borrow_mut().vars.get(global_id).clone(); + fiber.stack.push(value); + } + Op::Store_Local { local_id } => { + let index = frame.stack_offset + local_id.to_usize(); + if index >= fiber.stack.len() { + return Err("operand stack overflow".to_string()); + } + fiber.stack[index] = fiber.stack.pop().ok_or_else(|| "operand stack underflow")?; + } + Op::Load_Local { local_id } => { + let value = fiber + .stack + .get(frame.stack_offset + local_id.to_usize()) + .cloned() + .ok_or_else(|| "operand stack overflow")?; + fiber.stack.push(value); + } + Op::Closure(constant_id) => { + let func_value = func.constants.get(constant_id.to_usize()).cloned().unwrap_or(Value::Nil); + let func_def = func_value.into_func()?; + let closure = Handle::new(Closure::new(func_def)); + fiber.stack.push(Value::Closure(closure)); + } + Op::Call_Closure { arity } => { + let lo = fiber.stack.len() - arity as usize; + let closure_offset = lo - 1; + let closure_value = fiber.stack.get(closure_offset).cloned().ok_or_else(|| "stack underflow")?; + let closure = closure_value.into_closure()?; + return Ok(RunAction::Call { + closure, + stack_offset: lo, + }); } Op::Return => { - return Ok(RunAction::Return(fiber.stack.pop().unwrap_or(Slot::ZERO))); + return Ok(RunAction::Return(fiber.stack.pop().unwrap_or(Value::Nil))); + } + Op::Jump_False { addr } => { + let value = fiber.stack.pop().unwrap_or(Value::Nil); + if matches!(value, Value::Bool(false)) { + frame.ip = addr.to_u32() as usize; + } } Op::Abort => { return Err("abort".to_string()); From f564262f6a245c9d45fcf6c9ba6a45c87750eddd Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Sun, 5 May 2024 15:39:08 +0200 Subject: [PATCH 5/8] Changed instructions to tuples --- crates/vuur_vm/src/instruction_set.rs | 54 +++++++++------------------ crates/vuur_vm/src/tests.rs | 46 ++++++++++------------- crates/vuur_vm/src/vm_v2.rs | 11 +++--- 3 files changed, 43 insertions(+), 68 deletions(-) diff --git a/crates/vuur_vm/src/instruction_set.rs b/crates/vuur_vm/src/instruction_set.rs index 9cf9fd1..ed6bcd0 100644 --- a/crates/vuur_vm/src/instruction_set.rs +++ b/crates/vuur_vm/src/instruction_set.rs @@ -27,37 +27,19 @@ pub enum Op { I32_GreaterEq, /// Push a constant int32 value onto the operand stack. - I32_Const { - constant_id: ConstantId, - }, - I32_Const_Inline { - arg: Arg24, - }, + I32_Const(ConstantId), + I32_Const_Inline(Arg24), // ------------------------------------------------------------------------ // Variables - Store_Global { - global_id: GlobalId, - }, - Load_Global { - global_id: GlobalId, - }, - Store_Local { - local_id: LocalId, - }, - Load_Local { - local_id: LocalId, - }, - Store_Upvalue { - up_value_id: UpValueId, - }, - Load_Upvalue { - up_value_id: UpValueId, - }, + Store_Global(GlobalId), + Load_Global(GlobalId), + Store_Local(LocalId), + Load_Local(LocalId), + Store_Upvalue(UpValueId), + Load_Upvalue(UpValueId), /// "Close" the up-value, copying its inner value into its heap slot. - Upvalue_Close { - up_value_id: UpValueId, - }, + Upvalue_Close(UpValueId), // ------------------------------------------------------------------------ // Callables @@ -108,15 +90,15 @@ impl Op { Op::I32_Greater => -1, Op::I32_LessEq => -1, Op::I32_GreaterEq => -1, - Op::I32_Const { .. } => 1, - Op::I32_Const_Inline { .. } => 1, - Op::Store_Global { .. } => 0, - Op::Load_Global { .. } => 1, - Op::Store_Local { .. } => 0, - Op::Load_Local { .. } => 1, - Op::Store_Upvalue { .. } => 0, - Op::Load_Upvalue { .. } => 1, - Op::Upvalue_Close { .. } => 0, + Op::I32_Const(_) => 1, + Op::I32_Const_Inline(_) => 1, + Op::Store_Global(_) => 0, + Op::Load_Global(_) => 1, + Op::Store_Local(_) => 0, + Op::Load_Local(_) => 1, + Op::Store_Upvalue(_) => 0, + Op::Load_Upvalue(_) => 1, + Op::Upvalue_Close(_) => 0, Op::Call_Closure { arity } => -(*arity as isize) + 1, Op::Call_Method { arity, .. } => -(*arity as isize), // remember receiver Op::Return => -1, diff --git a/crates/vuur_vm/src/tests.rs b/crates/vuur_vm/src/tests.rs index fc2470b..8b915a8 100644 --- a/crates/vuur_vm/src/tests.rs +++ b/crates/vuur_vm/src/tests.rs @@ -1,11 +1,13 @@ -use crate::value::{Slot, Value}; +use crate::value::{Value}; use crate::{ handle::Handle, instruction_set::{Arg24, Op}, value::{Closure, ConstantId, GlobalId, LocalId, Module, Program, ScriptFunc}, - vm_v2::{Store, VM}, + vm_v2::{VM}, }; + use std::rc::Rc; +use std::time::{Instant, Duration}; /// Create a recursive fibonacci script function. fn fibonacci(module: Handle) -> Rc { @@ -19,32 +21,26 @@ fn fibonacci(module: Handle) -> Rc { let fib = GlobalId::new(0); let n = LocalId::new(0); let code = vec![ - Op::Load_Local { local_id: n }, - Op::I32_Const_Inline { - arg: Arg24::from_i32(1), - }, + Op::Load_Local(n), + Op::I32_Const_Inline(Arg24::from_i32(1)), Op::I32_LessEq, Op::Jump_False { addr: Arg24::from_u32(6), }, - Op::Load_Local { local_id: n }, + Op::Load_Local(n), Op::Return, // Setup call to fib(n) - Op::Load_Global { global_id: fib }, + Op::Load_Global(fib), // n - 1 - Op::Load_Local { local_id: n }, - Op::I32_Const_Inline { - arg: Arg24::from_i32(1), - }, + Op::Load_Local(n), + Op::I32_Const_Inline(Arg24::from_i32(1)), Op::I32_Sub, Op::Call_Closure { arity: 1 }, // Setup call to fib(n) - Op::Load_Global { global_id: fib }, + Op::Load_Global(fib), // n - 2 - Op::Load_Local { local_id: n }, - Op::I32_Const_Inline { - arg: Arg24::from_i32(2), - }, + Op::Load_Local(n), + Op::I32_Const_Inline(Arg24::from_i32(2)), Op::I32_Sub, Op::Call_Closure { arity: 1 }, // fib(n - 1) + fib(n - 2) @@ -74,17 +70,11 @@ fn test_vm_v2() { let code = vec![ // func fib(n: Int) -> Int: - Op::Closure(ConstantId::new(0)), // create closure - Op::Store_Global { - global_id: GlobalId::new(0), - }, // Store closure in variable + Op::Closure(ConstantId::new(0)), // create closure + Op::Store_Global(GlobalId::new(0)), // Store closure in variable // fib(5) - Op::Load_Global { - global_id: GlobalId::new(0), - }, // Load closure from variable - Op::I32_Const_Inline { - arg: Arg24::from_i32(fib_arg_1), - }, + Op::Load_Global(GlobalId::new(0)), // Load closure from variable + Op::I32_Const_Inline(Arg24::from_i32(fib_arg_1)), Op::Call_Closure { arity: 1 }, // Op::I32_Const_Inline { // arg: Arg24::from_i32(1), @@ -110,7 +100,9 @@ fn test_vm_v2() { // --------------------------------------------------------------------------------------------- let mut vm = VM::new(); + let start = Instant::now(); let value = vm.run_program(&program); + println!("time: {}µs", (Instant::now() - start).as_micros()); println!("{value:?}"); assert_eq!(value.unwrap().into_i32().unwrap(), 55); } diff --git a/crates/vuur_vm/src/vm_v2.rs b/crates/vuur_vm/src/vm_v2.rs index d354a98..6209efd 100644 --- a/crates/vuur_vm/src/vm_v2.rs +++ b/crates/vuur_vm/src/vm_v2.rs @@ -84,6 +84,7 @@ impl VM { } } + // #[inline(never)] pub(crate) fn run_program(&mut self, program: &Program) -> Result { let module = program.module.clone(); let closure = program.closure.clone(); @@ -266,11 +267,11 @@ fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result let [a, b] = fiber.pop_slots_2(); fiber.stack.push(Value::Bool(a.into_i32()? <= b.into_i32()?)); } - Op::I32_Const_Inline { arg } => { + Op::I32_Const_Inline(arg) => { let a = arg.to_i32(); fiber.stack.push(Value::from_i32(a)); } - Op::Store_Global { global_id } => { + Op::Store_Global(global_id) => { let module = func .module .upgrade() @@ -278,7 +279,7 @@ fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result let value = fiber.stack.pop().unwrap_or(Value::Nil); module.borrow_mut().vars.insert(global_id, value); } - Op::Load_Global { global_id } => { + Op::Load_Global(global_id) => { let module = func .module .upgrade() @@ -286,14 +287,14 @@ fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result let value = module.borrow_mut().vars.get(global_id).clone(); fiber.stack.push(value); } - Op::Store_Local { local_id } => { + Op::Store_Local(local_id) => { let index = frame.stack_offset + local_id.to_usize(); if index >= fiber.stack.len() { return Err("operand stack overflow".to_string()); } fiber.stack[index] = fiber.stack.pop().ok_or_else(|| "operand stack underflow")?; } - Op::Load_Local { local_id } => { + Op::Load_Local(local_id) => { let value = fiber .stack .get(frame.stack_offset + local_id.to_usize()) From c27e60e87e12d851505fd3db7e71db5755cc3e94 Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Sun, 5 May 2024 15:47:28 +0200 Subject: [PATCH 6/8] Format file --- crates/vuur_vm/src/tests.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/vuur_vm/src/tests.rs b/crates/vuur_vm/src/tests.rs index 8b915a8..89e368a 100644 --- a/crates/vuur_vm/src/tests.rs +++ b/crates/vuur_vm/src/tests.rs @@ -1,13 +1,13 @@ -use crate::value::{Value}; +use crate::value::Value; use crate::{ handle::Handle, instruction_set::{Arg24, Op}, value::{Closure, ConstantId, GlobalId, LocalId, Module, Program, ScriptFunc}, - vm_v2::{VM}, + vm_v2::VM, }; use std::rc::Rc; -use std::time::{Instant, Duration}; +use std::time::{Duration, Instant}; /// Create a recursive fibonacci script function. fn fibonacci(module: Handle) -> Rc { From 0aae1ac4c9005588b408e8d47ee0c8d713cffabe Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Sun, 5 May 2024 18:07:52 +0200 Subject: [PATCH 7/8] Benchmarking for new VM --- crates/vuur_vm/Cargo.toml | 7 ++ crates/vuur_vm/benches/vm.rs | 101 ++++++++++++++++++++++++++ crates/vuur_vm/src/instruction_set.rs | 2 +- crates/vuur_vm/src/lib.rs | 1 + crates/vuur_vm/src/store.rs | 72 ++++++++++++++++++ crates/vuur_vm/src/symbol_table.rs | 34 +++++++++ crates/vuur_vm/src/tests.rs | 3 +- crates/vuur_vm/src/value.rs | 17 ++++- crates/vuur_vm/src/vm_v2.rs | 28 ++----- 9 files changed, 241 insertions(+), 24 deletions(-) create mode 100644 crates/vuur_vm/benches/vm.rs create mode 100644 crates/vuur_vm/src/store.rs diff --git a/crates/vuur_vm/Cargo.toml b/crates/vuur_vm/Cargo.toml index 21f7bd1..40f8ec2 100644 --- a/crates/vuur_vm/Cargo.toml +++ b/crates/vuur_vm/Cargo.toml @@ -5,6 +5,13 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[[bench]] +name = "vm" +harness = false + +[dev-dependencies] +criterion = "0.5" + [dependencies] num = "0.4" vuur_compile = { path = "../vuur_compile" } diff --git a/crates/vuur_vm/benches/vm.rs b/crates/vuur_vm/benches/vm.rs new file mode 100644 index 0000000..3066af2 --- /dev/null +++ b/crates/vuur_vm/benches/vm.rs @@ -0,0 +1,101 @@ +use std::rc::Rc; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +use vuur_vm::handle::Handle; +use vuur_vm::instruction_set::{Arg24, Op}; +use vuur_vm::value::{Closure, ConstantId, GlobalId, LocalId, Module, Program, ScriptFunc, Value}; +use vuur_vm::vm_v2::VM; + +/// Create a recursive fibonacci script function. +fn fibonacci(module: Handle) -> Rc { + // func fib(n: Int) -> Int { + // if n <= 1 { + // return n + // } else { + // return fib(n - 1) + fib(n - 2) + // } + // } + let fib = GlobalId::new(0); + let n = LocalId::new(0); + let code = vec![ + Op::Load_Local(n), + Op::I32_Const_Inline(Arg24::from_i32(1)), + Op::I32_LessEq, + Op::Jump_False { + addr: Arg24::from_u32(6), + }, + Op::Load_Local(n), + Op::Return, + // Setup call to fib(n) + Op::Load_Global(fib), + // n - 1 + Op::Load_Local(n), + Op::I32_Const_Inline(Arg24::from_i32(1)), + Op::I32_Sub, + Op::Call_Closure { arity: 1 }, + // Setup call to fib(n) + Op::Load_Global(fib), + // n - 2 + Op::Load_Local(n), + Op::I32_Const_Inline(Arg24::from_i32(2)), + Op::I32_Sub, + Op::Call_Closure { arity: 1 }, + // fib(n - 1) + fib(n - 2) + Op::I32_Add, + Op::Return, + ]; + + Rc::new(ScriptFunc { + constants: vec![], + code: code.into_boxed_slice(), + module: module.downgrade(), + }) +} + +pub fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("fib 20", |b| { + let fib_arg_1 = 10; + + let module = Handle::new(Module::new("__main__")); + + // Global variable slots would be determined by top-level `var` and `func` statements. + for _ in 0..1 { + module.borrow_mut().vars.push(Value::Nil); + } + + let fib_func = fibonacci(module.clone()); + + let code = vec![ + // func fib(n: Int) -> Int: + Op::Closure(ConstantId::new(0)), // create closure + Op::Store_Global(GlobalId::new(0)), // Store closure in variable + // fib(5) + Op::Load_Global(GlobalId::new(0)), // Load closure from variable + Op::I32_Const_Inline(Arg24::from_i32(fib_arg_1)), + Op::Call_Closure { arity: 1 }, + Op::Return, + Op::End, + ]; + + // Module top-level code. + let func = Rc::new(ScriptFunc { + constants: vec![ + Value::Func(fib_func), // ConstantId(0) + ], + code: code.into_boxed_slice(), + module: module.downgrade(), + }); + + let closure = Handle::new(Closure::new(func)); + let program = Program::new(module, closure); + + // --------------------------------------------------------------------------------------------- + let mut vm = VM::new(); + + b.iter(|| vm.run_program(black_box(&program))) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/crates/vuur_vm/src/instruction_set.rs b/crates/vuur_vm/src/instruction_set.rs index ed6bcd0..d0c17b9 100644 --- a/crates/vuur_vm/src/instruction_set.rs +++ b/crates/vuur_vm/src/instruction_set.rs @@ -68,7 +68,7 @@ pub enum Op { Jump_False { addr: Arg24, }, - /// Ends the current block. + /// Ends the current module. End, /// Unconditional error. Abort, diff --git a/crates/vuur_vm/src/lib.rs b/crates/vuur_vm/src/lib.rs index 09ca31c..3d8231f 100644 --- a/crates/vuur_vm/src/lib.rs +++ b/crates/vuur_vm/src/lib.rs @@ -14,6 +14,7 @@ pub mod symbol_table; pub mod value; pub mod vm_v2; +mod store; #[cfg(test)] mod tests; diff --git a/crates/vuur_vm/src/store.rs b/crates/vuur_vm/src/store.rs new file mode 100644 index 0000000..93c9901 --- /dev/null +++ b/crates/vuur_vm/src/store.rs @@ -0,0 +1,72 @@ +//! Global store. +//! +//! The [`Store`] keeps global state that span across modules, +//! during runtime and the compiler. +use std::collections::HashMap; +use std::fmt::{self, Formatter}; +use std::ops; +use std::rc::Rc; + +use crate::symbol_table::SymbolTable; +use crate::value::{MethodId, Module}; + +#[derive(Debug)] +pub struct Store { + /// Registry of cached compiled modules. + /// + /// Key is the canonical name of the module. + pub(crate) modules: HashMap>, + + /// Global table of method signatures. + /// + /// The symbol from this table can be used to index into a class' methods. + /// This is the method-overloading mechanism. + pub(crate) methods: SymbolTable, +} + +impl Store { + pub fn insert_func(&mut self) { + todo!("Insert function signature") + } +} + +/// Method signature, which can be used to match calls to methods. +#[derive(Debug)] +pub struct MethodSig { + pub flags: MethodFlags, + pub args: Vec<()>, + pub return_: (), +} + +#[derive(Clone, Copy)] +pub struct MethodFlags(u32); + +impl MethodFlags { + pub const STATIC: MethodFlags = MethodFlags(0b0001); + pub const NATIVE: MethodFlags = MethodFlags(0b0010); + + #[inline(always)] + pub fn is_static(self) -> bool { + (self.0 & Self::STATIC.0) != 0 + } + + #[inline(always)] + pub fn is_native(self) -> bool { + (self.0 & Self::NATIVE.0) != 0 + } +} + +impl ops::BitAnd for MethodFlags { + type Output = Self; + + #[inline(always)] + fn bitand(self, rhs: Self) -> Self::Output { + Self(self.0 & rhs.0) + } +} + +impl fmt::Debug for MethodFlags { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "MethodFlags({:04b})", self.0) + } +} diff --git a/crates/vuur_vm/src/symbol_table.rs b/crates/vuur_vm/src/symbol_table.rs index 8f78c15..1062f45 100644 --- a/crates/vuur_vm/src/symbol_table.rs +++ b/crates/vuur_vm/src/symbol_table.rs @@ -102,6 +102,16 @@ impl SymbolTable { _key: PhantomData, } } + + #[inline] + pub fn len(&self) -> usize { + self.symbols.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.symbols.is_empty() + } } impl SymbolTable { @@ -173,6 +183,18 @@ impl SymbolTable { } } +impl SymbolTable { + /// Grow the table to the specified size. + /// + /// Does nothing if the table's current size is equal or + /// greater than the requested size. + pub fn grow(&mut self, new_size: usize) { + for _ in 0..=(self.symbols.len().saturating_sub(new_size)) { + self.symbols.push(V::default()); + } + } +} + impl SymbolTable { /// Lookup the symbol for the given value. pub fn find_symbol(&self, value: &V) -> Option { @@ -257,4 +279,16 @@ mod test { assert_eq!(symbol3.to_usize(), 2); } } + + #[test] + fn test_grow() { + symbol_impl!(struct Id(u32)); + + let mut table = SymbolTable::>::new(); + assert_eq!(table.len(), 0); + + table.grow(1); + assert_eq!(table.len(), 1); + assert!(table.get(Id(0)).is_none()); + } } diff --git a/crates/vuur_vm/src/tests.rs b/crates/vuur_vm/src/tests.rs index 89e368a..3740865 100644 --- a/crates/vuur_vm/src/tests.rs +++ b/crates/vuur_vm/src/tests.rs @@ -7,7 +7,7 @@ use crate::{ }; use std::rc::Rc; -use std::time::{Duration, Instant}; +use std::time::Instant; /// Create a recursive fibonacci script function. fn fibonacci(module: Handle) -> Rc { @@ -84,6 +84,7 @@ fn test_vm_v2() { // }, // Op::I32_Add, Op::Return, + Op::End, ]; // Module top-level code. diff --git a/crates/vuur_vm/src/value.rs b/crates/vuur_vm/src/value.rs index 9a85419..99ffae8 100644 --- a/crates/vuur_vm/src/value.rs +++ b/crates/vuur_vm/src/value.rs @@ -1,8 +1,8 @@ -use crate::handle::Handle; use std::cell::RefCell; use std::fmt::{self, Formatter}; use std::rc::{Rc, Weak}; +use crate::handle::Handle; use crate::instruction_set::Op; use crate::symbol_impl; use crate::symbol_table::{Symbol, SymbolTable}; @@ -27,6 +27,11 @@ symbol_impl!( #[derive(Debug, Clone, Copy)] pub struct ConstantId(u16) ); +symbol_impl!( + /// Class method Id. + #[derive(Debug, Clone, Copy)] pub struct MethodId(u32) +); + /// Dynamically typed value. /// /// This is to simplify the internals of the VM for the short term. @@ -202,11 +207,19 @@ impl Module { } #[derive(Debug)] -pub enum Func { +pub enum Method { Script(Rc), Native(NativeFunc), } +pub struct Class { + /// Table of methods belonging to this class. + /// + /// This corresponds with the global method signature + /// table in [`Store`]. + methods: SymbolTable>, +} + #[derive(Debug)] pub struct Closure { pub func: Rc, diff --git a/crates/vuur_vm/src/vm_v2.rs b/crates/vuur_vm/src/vm_v2.rs index 6209efd..390d465 100644 --- a/crates/vuur_vm/src/vm_v2.rs +++ b/crates/vuur_vm/src/vm_v2.rs @@ -7,8 +7,9 @@ use std::rc::Rc; use crate::handle::Handle; use crate::instruction_set::Op; -use crate::symbol_table::Symbol; -use crate::value::{Closure, Module, Program, Slot, Value}; +use crate::store::Store; +use crate::symbol_table::{Symbol, SymbolTable}; +use crate::value::{Closure, MethodId, Module, Program, Slot, Value}; const ENTRY_POINT: &str = "Main"; @@ -19,22 +20,6 @@ pub struct VM { store: Store, } -#[derive(Debug)] -pub struct Store { - modules: HashMap>, - /// Global table of method signatures. - /// - /// The symbol from this table can be used to index into a class' - /// methods. This is the method-overloading mechanism. - methods: Vec<()>, -} - -impl Store { - pub fn insert_func(&mut self) { - todo!("Insert function signature") - } -} - #[derive(Debug)] struct CallFrame { /// Instruction pointer @@ -79,13 +64,13 @@ impl VM { fiber: None, store: Store { modules: HashMap::new(), - methods: vec![], + methods: SymbolTable::new(), }, } } // #[inline(never)] - pub(crate) fn run_program(&mut self, program: &Program) -> Result { + pub fn run_program(&mut self, program: &Program) -> Result { let module = program.module.clone(); let closure = program.closure.clone(); @@ -327,6 +312,9 @@ fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result frame.ip = addr.to_u32() as usize; } } + Op::End => { + unreachable!("module end") + } Op::Abort => { return Err("abort".to_string()); } From c33e587a39c15f4ce27fd8d8b4ccc2b0b55e9f08 Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Mon, 6 May 2024 21:37:23 +0200 Subject: [PATCH 8/8] Native function calls --- crates/vuur_vm/src/core.rs | 35 +++++++++++ crates/vuur_vm/src/instruction_set.rs | 8 ++- crates/vuur_vm/src/lib.rs | 1 + crates/vuur_vm/src/store.rs | 14 ++++- crates/vuur_vm/src/symbol_table.rs | 25 ++++++++ crates/vuur_vm/src/tests.rs | 55 +++++++++++++++-- crates/vuur_vm/src/value.rs | 63 ++++++++++++++++++- crates/vuur_vm/src/vm_v2.rs | 88 +++++++++++++++++++++++---- 8 files changed, 268 insertions(+), 21 deletions(-) create mode 100644 crates/vuur_vm/src/core.rs diff --git a/crates/vuur_vm/src/core.rs b/crates/vuur_vm/src/core.rs new file mode 100644 index 0000000..803f3d8 --- /dev/null +++ b/crates/vuur_vm/src/core.rs @@ -0,0 +1,35 @@ +//! Core module. +use crate::handle::Handle; +use crate::store::Store; +use crate::value::{Env, Module, NativeFunc, NativeFuncPtr, Value, Value::Int}; + +/// Initialize the built-in types and function of the core module. +pub fn init_core(store: &mut Store, module: &mut Module) { + bind_method(store, module, "static max(_,_)", int32_max); + bind_method(store, module, "static print(_)", system_print); +} + +fn bind_method(store: &mut Store, module: &mut Module, sig: &str, ptr: NativeFuncPtr) { + // TODO: Get or insert function signature into store + // Arity from signature + let arity = sig.chars().filter(|c| *c == '_').count() as u8; + // Save native func as global variable + module.vars.push(Value::Native(Handle::new(NativeFunc { ptr, arity }))); + store.methods.push(sig.to_string()); +} + +/// Returns the maximum integer. +fn int32_max(_env: Env, args: &[Value]) -> Result { + match args { + &[Int(a), Int(b)] => Ok(Int(a.max(b))), + _ => Err("unexpected types".to_string()), + } +} + +// ---------------------------------------------------------------------------- +// System + +fn system_print(_env: Env, args: &[Value]) -> Result { + println!("{}", args[0].repr()); + Ok(Value::Nil) +} diff --git a/crates/vuur_vm/src/instruction_set.rs b/crates/vuur_vm/src/instruction_set.rs index d0c17b9..111e26d 100644 --- a/crates/vuur_vm/src/instruction_set.rs +++ b/crates/vuur_vm/src/instruction_set.rs @@ -27,7 +27,7 @@ pub enum Op { I32_GreaterEq, /// Push a constant int32 value onto the operand stack. - I32_Const(ConstantId), + Const(ConstantId), I32_Const_Inline(Arg24), // ------------------------------------------------------------------------ @@ -47,6 +47,9 @@ pub enum Op { Call_Closure { arity: u8, }, + Call_Native { + arity: u8, + }, /// Call a method defined on a class. Call_Method { arity: u8, @@ -90,7 +93,7 @@ impl Op { Op::I32_Greater => -1, Op::I32_LessEq => -1, Op::I32_GreaterEq => -1, - Op::I32_Const(_) => 1, + Op::Const(_) => 1, Op::I32_Const_Inline(_) => 1, Op::Store_Global(_) => 0, Op::Load_Global(_) => 1, @@ -100,6 +103,7 @@ impl Op { Op::Load_Upvalue(_) => 1, Op::Upvalue_Close(_) => 0, Op::Call_Closure { arity } => -(*arity as isize) + 1, + Op::Call_Native { arity } => -(*arity as isize) + 1, Op::Call_Method { arity, .. } => -(*arity as isize), // remember receiver Op::Return => -1, Op::Closure(_) => 1, diff --git a/crates/vuur_vm/src/lib.rs b/crates/vuur_vm/src/lib.rs index 3d8231f..6effb67 100644 --- a/crates/vuur_vm/src/lib.rs +++ b/crates/vuur_vm/src/lib.rs @@ -14,6 +14,7 @@ pub mod symbol_table; pub mod value; pub mod vm_v2; +mod core; mod store; #[cfg(test)] mod tests; diff --git a/crates/vuur_vm/src/store.rs b/crates/vuur_vm/src/store.rs index 93c9901..1753709 100644 --- a/crates/vuur_vm/src/store.rs +++ b/crates/vuur_vm/src/store.rs @@ -21,10 +21,18 @@ pub struct Store { /// /// The symbol from this table can be used to index into a class' methods. /// This is the method-overloading mechanism. - pub(crate) methods: SymbolTable, + // TODO: MethodSig instead of String + pub(crate) methods: SymbolTable, } impl Store { + pub fn new() -> Self { + Self { + modules: HashMap::new(), + methods: SymbolTable::new(), + } + } + pub fn insert_func(&mut self) { todo!("Insert function signature") } @@ -38,6 +46,10 @@ pub struct MethodSig { pub return_: (), } +// `fib(Int32) -> Int32` +// `replace(Str,Str) -> Str` +// `static validate(Int32) -> Bool` +// `native static sqrt(Float) -> Float` #[derive(Clone, Copy)] pub struct MethodFlags(u32); diff --git a/crates/vuur_vm/src/symbol_table.rs b/crates/vuur_vm/src/symbol_table.rs index 1062f45..cc05d29 100644 --- a/crates/vuur_vm/src/symbol_table.rs +++ b/crates/vuur_vm/src/symbol_table.rs @@ -181,6 +181,10 @@ impl SymbolTable { .position(|(_, el)| predicate(el)) .map(|i| (K::from_usize(i), &self.symbols[i])) } + + pub fn iter(&self) -> Iter { + Iter { table: self, index: 0 } + } } impl SymbolTable { @@ -225,6 +229,27 @@ where } } +pub struct Iter<'a, K, V> { + table: &'a SymbolTable, + index: usize, +} + +impl<'a, K: Symbol, V> Iterator for Iter<'a, K, V> { + type Item = (K, &'a V); + + #[inline] + fn next(&mut self) -> Option { + if self.index < self.table.len() { + let index = self.index; + self.index += 1; + let symbol = K::from_usize(index); + Some((symbol, &self.table.symbols[index])) + } else { + None + } + } +} + #[cfg(test)] mod test { use super::*; diff --git a/crates/vuur_vm/src/tests.rs b/crates/vuur_vm/src/tests.rs index 3740865..3c5fae7 100644 --- a/crates/vuur_vm/src/tests.rs +++ b/crates/vuur_vm/src/tests.rs @@ -6,6 +6,8 @@ use crate::{ vm_v2::VM, }; +use crate::core::init_core; +use crate::store::Store; use std::rc::Rc; use std::time::Instant; @@ -56,10 +58,12 @@ fn fibonacci(module: Handle) -> Rc { } #[test] -fn test_vm_v2() { - let fib_arg_1 = 10; +fn test_fibonacci() { + let fib_arg_1 = 20; + let mut store = Store::new(); let module = Handle::new(Module::new("__main__")); + init_core(&mut store, &mut *module.borrow_mut()); // Global variable slots would be determined by top-level `var` and `func` statements. for _ in 0..1 { @@ -100,10 +104,51 @@ fn test_vm_v2() { let program = Program::new(module, closure); // --------------------------------------------------------------------------------------------- - let mut vm = VM::new(); + let mut vm = VM::from_store(store); let start = Instant::now(); let value = vm.run_program(&program); - println!("time: {}µs", (Instant::now() - start).as_micros()); + println!("time: {}ms", (Instant::now() - start).as_millis()); println!("{value:?}"); - assert_eq!(value.unwrap().into_i32().unwrap(), 55); + assert_eq!(value.unwrap().into_i32().unwrap(), 6765); +} + +#[test] +fn test_static_call() { + let mut store = Store::new(); + let module = Handle::new(Module::new("__main__")); + init_core(&mut store, &mut *module.borrow_mut()); + + let code = vec![ + // print(nil) + Op::Load_Global(GlobalId::new(1)), // system_print + Op::Const(ConstantId::new(0)), + Op::Call_Native { arity: 1 }, // system_print + // print(max(7, 11)) + Op::Load_Global(GlobalId::new(1)), // system_print + Op::Load_Global(GlobalId::new(0)), // int32_max + Op::I32_Const_Inline(Arg24::from_i32(7)), + Op::I32_Const_Inline(Arg24::from_i32(11)), + Op::Call_Native { arity: 2 }, // int32_max + Op::Call_Native { arity: 1 }, // system_print + Op::Pop, + // return nil + Op::Const(ConstantId::new(0)), + Op::Return, + Op::End, + ]; + + // Module top-level code. + let func = Rc::new(ScriptFunc { + constants: vec![Value::Nil], + code: code.into_boxed_slice(), + module: module.downgrade(), + }); + + let closure = Handle::new(Closure::new(func)); + let program = Program::new(module, closure); + + // --------------------------------------------------------------------------------------------- + let mut vm = VM::from_store(store); + let value = vm.run_program(&program); + assert!(value.unwrap().is_nil()); } diff --git a/crates/vuur_vm/src/value.rs b/crates/vuur_vm/src/value.rs index 99ffae8..5ba2183 100644 --- a/crates/vuur_vm/src/value.rs +++ b/crates/vuur_vm/src/value.rs @@ -73,6 +73,11 @@ impl fmt::Debug for Value { } impl Value { + #[inline(always)] + pub fn is_nil(&self) -> bool { + matches!(self, Value::Nil) + } + #[inline(always)] pub fn into_func(self) -> Result, String> { match self { @@ -89,6 +94,14 @@ impl Value { } } + #[inline(always)] + pub fn into_native(self) -> Result, String> { + match self { + Value::Native(native) => Ok(native), + _ => Err(self.type_error()), + } + } + #[inline(always)] pub fn into_i32(self) -> Result { match self { @@ -105,6 +118,27 @@ impl Value { fn type_error(&self) -> String { format!("unexpected value type: {self:?}") } + + pub fn repr(&self) -> ValueRepr { + ValueRepr(self) + } +} + +pub struct ValueRepr<'a>(&'a Value); + +impl<'a> fmt::Display for ValueRepr<'a> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match *self.0 { + Value::Nil => write!(f, "nil"), + Value::Bool(v) => write!(f, "{v}"), + Value::Int(v) => write!(f, "{v}"), + Value::Float(v) => write!(f, "{v}"), + Value::Str(ref v) => write!(f, "\"{}\"", v.borrow()), + Value::Func(_) => write!(f, "function"), + Value::Closure(_) => write!(f, "closure"), + Value::Native(_) => write!(f, "native function"), + } + } } /// An executable Vuur program. @@ -204,6 +238,13 @@ impl Module { vars: SymbolTable::new(), } } + + pub fn dump_vars(&self) { + println!("{} variables:", self.name); + for (symbol, var) in self.vars.iter() { + println!(" {symbol:?} : {var:?}"); + } + } } #[derive(Debug)] @@ -292,12 +333,19 @@ pub struct ScriptFunc { pub module: Weak>, } -pub type NativeFuncPtr = fn() -> (); +/// Environment passed to native functions to grant +/// the host access to some virtual machine functions. +pub struct Env {} + +// TODO: Decent error type for VM API. +pub type NativeFuncPtr = fn(env: Env, args: &[Value]) -> Result; /// Host function defined in Rust. #[derive(Debug)] pub struct NativeFunc { pub ptr: NativeFuncPtr, + pub arity: u8, + // TODO: Debug info } #[cfg(test)] @@ -314,4 +362,17 @@ mod test { assert!(std::mem::size_of::>() <= std::mem::size_of::()); assert!(std::mem::size_of::>() <= std::mem::size_of::()); } + + #[test] + fn test_unsized_boxed() { + struct Str { + size: usize, + data: T, + } + + let s: Box> = Box::new(Str { + size: 4, + data: [1, 2, 3, 4], + }); + } } diff --git a/crates/vuur_vm/src/vm_v2.rs b/crates/vuur_vm/src/vm_v2.rs index 390d465..e7308b2 100644 --- a/crates/vuur_vm/src/vm_v2.rs +++ b/crates/vuur_vm/src/vm_v2.rs @@ -9,7 +9,7 @@ use crate::handle::Handle; use crate::instruction_set::Op; use crate::store::Store; use crate::symbol_table::{Symbol, SymbolTable}; -use crate::value::{Closure, MethodId, Module, Program, Slot, Value}; +use crate::value::{Closure, Env, MethodId, Module, NativeFuncPtr, Program, Slot, Value}; const ENTRY_POINT: &str = "Main"; @@ -60,21 +60,22 @@ impl Fiber { impl VM { pub fn new() -> Self { - Self { - fiber: None, - store: Store { - modules: HashMap::new(), - methods: SymbolTable::new(), - }, - } + Self::from_store(Store { + modules: HashMap::new(), + methods: SymbolTable::new(), + }) + } + + pub fn from_store(store: Store) -> Self { + Self { fiber: None, store } } // #[inline(never)] pub fn run_program(&mut self, program: &Program) -> Result { - let module = program.module.clone(); + let _module = program.module.clone(); let closure = program.closure.clone(); - // Setup a fiber + // Set up a fiber let fiber = Handle::new(Fiber::new(closure)); let result = run_interpreter(self, fiber)?; @@ -95,11 +96,11 @@ impl VM { } /// Execute a top-level function inside the given module. - pub fn run_module(&mut self, module: Rc, func_name: &str, args: &[u8]) -> Result<(), String> { + pub fn run_module(&mut self, _module: Rc, _func_name: &str, _args: &[u8]) -> Result<(), String> { todo!() } - pub fn resume_fiber(&mut self, fiber: &mut Fiber) -> Result<(), String> { + pub fn resume_fiber(&mut self, _fiber: &mut Fiber) -> Result<(), String> { todo!() } } @@ -129,6 +130,19 @@ enum RunAction { /// Call a closure. Call { closure: Handle, + // The absolute position in the stack where the + // next frame's arguments start. + // + // Excludes the function object placed on teh stack. + stack_offset: usize, + }, + /// Call a native Rust function. + CallNative { + ptr: NativeFuncPtr, + // The absolute position in the stack where the + // next frame's arguments start. + // + // Excludes the function object placed on teh stack. stack_offset: usize, }, /// Fiber control action. @@ -154,6 +168,7 @@ fn run_interpreter(vm: &mut VM, fiber: Handle) -> Result { } } +#[inline(always)] fn run_fiber(vm: &mut VM, fiber: &mut Fiber) -> Result { let mut frame = fiber.calls.pop().ok_or_else(|| "fiber has no frames on its callstack")?; @@ -185,11 +200,38 @@ fn run_fiber(vm: &mut VM, fiber: &mut Fiber) -> Result { // Put parent frame back onto call stack. fiber.calls.push(new_frame); } + RunAction::CallNative { ptr, stack_offset } => { + // The VM is not re-entrant, meaning native functions + // calling back into the same VM is not supported. + // + // It would be wasteful to push native frames onto a + // heterogeneous call stack and loop again. Instead, + // we take a shortcut and simply call native functions + // from here, allowing the loop to resume the parent + // frame on the next iteration + let env = Env {}; + + let args = &fiber.stack[stack_offset..]; + match ptr(env, args) { + Ok(value) => { + // Drop callee stack and closure value. + fiber.stack.truncate(stack_offset - 1); + fiber.stack.push(value); // return result + } + Err(err) => { + return Err(err); + } + } + } + // Special fiber control action. RunAction::Fiber(_) => {} } } } +/// Bytecode instruction interpreter loop. +/// +/// It may seem odd to inline such a large function, but it improves benchmarks ~10%! #[inline(always)] fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result { let closure = frame.closure.clone(); @@ -207,6 +249,9 @@ fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result .ok_or_else(|| "bytecode buffer out of bounds")?; if cfg!(feature = "trace_ops") { + for (offset, value) in fiber.stack.iter().enumerate() { + println!(" {offset} : {value:?}"); + } println!("{:04} {op:?}", frame.ip); } @@ -252,6 +297,14 @@ fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result let [a, b] = fiber.pop_slots_2(); fiber.stack.push(Value::Bool(a.into_i32()? <= b.into_i32()?)); } + Op::Const(constant_id) => { + let constant = func + .constants + .get(constant_id.to_usize()) + .cloned() + .ok_or_else(|| "constant not in function definition")?; + fiber.stack.push(constant); + } Op::I32_Const_Inline(arg) => { let a = arg.to_i32(); fiber.stack.push(Value::from_i32(a)); @@ -303,6 +356,17 @@ fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result stack_offset: lo, }); } + Op::Call_Native { arity } => { + let lo = fiber.stack.len() - arity as usize; + let func_offset = lo - 1; + let func_value = fiber.stack.get(func_offset).cloned().ok_or_else(|| "stack underflow")?; + let native_func = func_value.into_native()?; + let native_ptr = native_func.borrow().ptr.clone(); + return Ok(RunAction::CallNative { + ptr: native_ptr, + stack_offset: lo, + }); + } Op::Return => { return Ok(RunAction::Return(fiber.stack.pop().unwrap_or(Value::Nil))); }