From 63083737e7f8a1ab2f6de37d7ab830cc8af8e7b2 Mon Sep 17 00:00:00 2001 From: Willem Victor Date: Sun, 5 May 2024 15:11:44 +0200 Subject: [PATCH] Working fibonacci recursive calls --- crates/vuur_vm/Cargo.toml | 4 + crates/vuur_vm/src/handle.rs | 8 +- crates/vuur_vm/src/instruction_set.rs | 16 +-- crates/vuur_vm/src/tests.rs | 59 +++++++-- crates/vuur_vm/src/value.rs | 111 +++++++++++++++- crates/vuur_vm/src/vm_v2.rs | 182 ++++++++++++++++++++------ 6 files changed, 307 insertions(+), 73 deletions(-) diff --git a/crates/vuur_vm/Cargo.toml b/crates/vuur_vm/Cargo.toml index 1dd41e7..21f7bd1 100644 --- a/crates/vuur_vm/Cargo.toml +++ b/crates/vuur_vm/Cargo.toml @@ -12,3 +12,7 @@ vuur_parse = { path = "../vuur_parse" } # Dynamic Objects bytemuck = "1.13" + +[features] +# Prints opcode instructions as they are interpreted. +trace_ops = [] diff --git a/crates/vuur_vm/src/handle.rs b/crates/vuur_vm/src/handle.rs index bf6e394..986114c 100644 --- a/crates/vuur_vm/src/handle.rs +++ b/crates/vuur_vm/src/handle.rs @@ -6,7 +6,7 @@ use std::rc::Rc; pub use std::rc::Weak; /// Shared reference counted handle -pub struct Handle(Rc>); +pub struct Handle(Rc>); impl Handle { #[inline(always)] @@ -40,7 +40,7 @@ impl Handle { } } -impl Clone for Handle { +impl Clone for Handle { #[inline(always)] fn clone(&self) -> Self { Self(self.0.clone()) @@ -49,13 +49,13 @@ impl Clone for Handle { impl fmt::Debug for Handle where - T: fmt::Debug, + T: ?Sized + fmt::Debug, { fn fmt(&self, f: &mut Formatter) -> fmt::Result { let mut debug = f.debug_tuple("Handle"); match self.0.try_borrow() { - Ok(value) => debug.field(&*value).finish(), + Ok(value) => debug.field(&&*value).finish(), Err(_) => debug.field(&"_").finish(), } } diff --git a/crates/vuur_vm/src/instruction_set.rs b/crates/vuur_vm/src/instruction_set.rs index 93d6785..9cf9fd1 100644 --- a/crates/vuur_vm/src/instruction_set.rs +++ b/crates/vuur_vm/src/instruction_set.rs @@ -1,7 +1,7 @@ use std::fmt; use std::fmt::Formatter; -use crate::value::{GlobalId, LocalId, UpValueId}; +use crate::value::{ConstantId, GlobalId, LocalId, UpValueId}; /// Instruction set. #[derive(Debug, Clone, Copy)] @@ -25,7 +25,6 @@ pub enum Op { I32_Greater, I32_LessEq, I32_GreaterEq, - I32_Cmp, /// Push a constant int32 value onto the operand stack. I32_Const { @@ -72,10 +71,10 @@ pub enum Op { func_id: u16, }, Return, - /// Create a closure instance. - /// - /// Expects a function definition to be on the top of the stack. - Closure_Create, + + /// Create a closure instance from the function definition stored + /// in the constant table of the current call frame. + Closure(ConstantId), // ------------------------------------------------------------------------ // Control Flow @@ -109,7 +108,6 @@ impl Op { Op::I32_Greater => -1, Op::I32_LessEq => -1, Op::I32_GreaterEq => -1, - Op::I32_Cmp => -1, Op::I32_Const { .. } => 1, Op::I32_Const_Inline { .. } => 1, Op::Store_Global { .. } => 0, @@ -122,7 +120,7 @@ impl Op { Op::Call_Closure { arity } => -(*arity as isize) + 1, Op::Call_Method { arity, .. } => -(*arity as isize), // remember receiver Op::Return => -1, - Op::Closure_Create => 1, + Op::Closure(_) => 1, Op::Jump => 0, Op::Jump_False { .. } => -1, Op::End => 0, @@ -131,8 +129,6 @@ impl Op { } } -pub type ConstantId = u16; - /// Bytecode argument packed into 24 bits, encoded in little-endian. #[derive(Clone, Copy, PartialEq, Eq)] pub struct Arg24([u8; 3]); diff --git a/crates/vuur_vm/src/tests.rs b/crates/vuur_vm/src/tests.rs index e6c6c72..fc2470b 100644 --- a/crates/vuur_vm/src/tests.rs +++ b/crates/vuur_vm/src/tests.rs @@ -1,15 +1,15 @@ -use crate::value::{GlobalId, LocalId, Program}; +use crate::value::{Slot, Value}; use crate::{ handle::Handle, instruction_set::{Arg24, Op}, - value::{Closure, Module, ScriptFunc}, + value::{Closure, ConstantId, GlobalId, LocalId, Module, Program, ScriptFunc}, vm_v2::{Store, VM}, }; use std::rc::Rc; /// Create a recursive fibonacci script function. -fn fibonacci(store: &mut Store, module: Handle) { - // func fib(n: Int) { +fn fibonacci(module: Handle) -> Rc { + // func fib(n: Int) -> Int { // if n <= 1 { // return n // } else { @@ -25,7 +25,7 @@ fn fibonacci(store: &mut Store, module: Handle) { }, Op::I32_LessEq, Op::Jump_False { - addr: Arg24::from_u32(0), + addr: Arg24::from_u32(6), }, Op::Load_Local { local_id: n }, Op::Return, @@ -49,27 +49,58 @@ fn fibonacci(store: &mut Store, module: Handle) { Op::Call_Closure { arity: 1 }, // fib(n - 1) + fib(n - 2) Op::I32_Add, + Op::Return, ]; + + Rc::new(ScriptFunc { + constants: vec![], + code: code.into_boxed_slice(), + module: module.downgrade(), + }) } #[test] fn test_vm_v2() { + let fib_arg_1 = 10; + let module = Handle::new(Module::new("__main__")); + // Global variable slots would be determined by top-level `var` and `func` statements. + for _ in 0..1 { + module.borrow_mut().vars.push(Value::Nil); + } + + let fib_func = fibonacci(module.clone()); + let code = vec![ + // func fib(n: Int) -> Int: + Op::Closure(ConstantId::new(0)), // create closure + Op::Store_Global { + global_id: GlobalId::new(0), + }, // Store closure in variable + // fib(5) + Op::Load_Global { + global_id: GlobalId::new(0), + }, // Load closure from variable Op::I32_Const_Inline { - arg: Arg24::from_i32(1), - }, - Op::I32_Const_Inline { - arg: Arg24::from_i32(2), + arg: Arg24::from_i32(fib_arg_1), }, - Op::I32_Add, + Op::Call_Closure { arity: 1 }, + // Op::I32_Const_Inline { + // arg: Arg24::from_i32(1), + // }, + // Op::I32_Const_Inline { + // arg: Arg24::from_i32(2), + // }, + // Op::I32_Add, Op::Return, ]; // Module top-level code. let func = Rc::new(ScriptFunc { - constants: vec![], + constants: vec![ + Value::Func(fib_func), // ConstantId(0) + ], code: code.into_boxed_slice(), module: module.downgrade(), }); @@ -79,7 +110,7 @@ fn test_vm_v2() { // --------------------------------------------------------------------------------------------- let mut vm = VM::new(); - let slot = vm.run_program(&program); - println!("{slot:?}"); - assert_eq!(slot.unwrap().raw(), 3); + let value = vm.run_program(&program); + println!("{value:?}"); + assert_eq!(value.unwrap().into_i32().unwrap(), 55); } diff --git a/crates/vuur_vm/src/value.rs b/crates/vuur_vm/src/value.rs index 96f83fd..9a85419 100644 --- a/crates/vuur_vm/src/value.rs +++ b/crates/vuur_vm/src/value.rs @@ -18,10 +18,90 @@ symbol_impl!( ); symbol_impl!( - /// Local variable Id. + /// Up-value variable Id. #[derive(Debug, Clone, Copy)] pub struct UpValueId(u16) ); +symbol_impl!( + /// Constant Id. + #[derive(Debug, Clone, Copy)] pub struct ConstantId(u16) +); + +/// Dynamically typed value. +/// +/// This is to simplify the internals of the VM for the short term. +/// In the future the VM will be statically typed. +/// +/// See [`Slot`] +#[derive(Clone)] +pub enum Value { + Nil, + Bool(bool), + Int(i32), + Float(f32), + Str(Handle), + + // ------------------------------------------------------------------------ + // Reference type objects. + Func(Rc), + Closure(Handle), + Native(Handle), +} + +impl fmt::Debug for Value { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + use Value::*; + + // There are plenty of opportunities for circular + // references, so we don't recurse into complex objects. + match self { + Nil => write!(f, "Nil"), + Bool(v) => f.debug_tuple("Bool").field(&v).finish(), + Int(v) => f.debug_tuple("Int").field(&v).finish(), + Float(v) => f.debug_tuple("Float").field(&v).finish(), + Str(v) => f.debug_tuple("Str").field(&v).finish(), + Func(_) => write!(f, "Func(...)"), + Closure(_) => write!(f, "Closure(...)"), + Native(_) => write!(f, "Native(...)"), + } + } +} + +impl Value { + #[inline(always)] + pub fn into_func(self) -> Result, String> { + match self { + Value::Func(func) => Ok(func), + _ => Err(self.type_error()), + } + } + + #[inline(always)] + pub fn into_closure(self) -> Result, String> { + match self { + Value::Closure(closure) => Ok(closure), + _ => Err(self.type_error()), + } + } + + #[inline(always)] + pub fn into_i32(self) -> Result { + match self { + Value::Int(int) => Ok(int), + _ => Err(self.type_error()), + } + } + + #[inline(always)] + pub fn from_i32(value: i32) -> Self { + Self::Int(value) + } + + fn type_error(&self) -> String { + format!("unexpected value type: {self:?}") + } +} + /// An executable Vuur program. pub struct Program { /// An executable closure object holding the top-level code of the main module. @@ -44,8 +124,18 @@ impl Program { /// /// It holds the raw bits of a value. The encoding is /// specific to the current platform. +/// +/// FIXME: Storing reference type object pointers in a slot. +/// +/// To keep the VM simple, the standard library `Rc` is used +/// for reference types. It doesn't expose its internal pointer, +/// making it hard to build unsafe internals around it. +/// +/// When we have a proper garbage collector with our own +/// handle types we can revisit `Slot`. #[derive(Clone, Copy)] #[repr(transparent)] +#[allow(dead_code)] pub(crate) struct Slot(u64); impl Slot { @@ -75,6 +165,16 @@ impl Slot { pub(crate) fn to_f32(self) -> f32 { f32::from_bits(self.0 as u32) } + + #[inline(always)] + pub(crate) fn from_ptr(ptr: *const T) -> Self { + Self(ptr as usize as u64) + } + + #[inline(always)] + pub(crate) unsafe fn to_ptr(self) -> *mut T { + self.0 as usize as *mut T + } } impl fmt::Debug for Slot { @@ -89,7 +189,7 @@ pub struct Module { /// Name of the module. pub name: String, /// Module level global variables. - pub vars: SymbolTable, + pub vars: SymbolTable, } impl Module { @@ -161,7 +261,10 @@ pub enum UpValue { /// so it can be stored without `RefCell`. #[derive(Debug)] pub struct ScriptFunc { - pub constants: Vec, + /// Values defined in the function body that do not change. + pub constants: Vec, + + /// Interpreter bytecode instructions to be executed. pub code: Box<[Op]>, /// The function keeps a reference to the module it lexically belongs to. @@ -178,6 +281,7 @@ pub struct ScriptFunc { pub type NativeFuncPtr = fn() -> (); +/// Host function defined in Rust. #[derive(Debug)] pub struct NativeFunc { pub ptr: NativeFuncPtr, @@ -192,6 +296,7 @@ mod test { /// Ensure that a slot can hold a pointer on the current architecture. #[test] fn test_slot_size() { + println!("{}", std::mem::size_of::()); assert!(std::mem::size_of::<*const [u8; 1024]>() <= std::mem::size_of::()); assert!(std::mem::size_of::>() <= std::mem::size_of::()); assert!(std::mem::size_of::>() <= std::mem::size_of::()); diff --git a/crates/vuur_vm/src/vm_v2.rs b/crates/vuur_vm/src/vm_v2.rs index 7e009a1..d354a98 100644 --- a/crates/vuur_vm/src/vm_v2.rs +++ b/crates/vuur_vm/src/vm_v2.rs @@ -2,16 +2,13 @@ //! //! //! Complete rewrite of the virtual machine. -use std::cell::RefCell; -use std::cmp::Ordering; use std::collections::HashMap; use std::rc::Rc; use crate::handle::Handle; use crate::instruction_set::Op; -use crate::value::Module; -use crate::value::Slot; -use crate::value::{Closure, Program}; +use crate::symbol_table::Symbol; +use crate::value::{Closure, Module, Program, Slot, Value}; const ENTRY_POINT: &str = "Main"; @@ -25,8 +22,11 @@ pub struct VM { #[derive(Debug)] pub struct Store { modules: HashMap>, - /// Global table of function signatures. - funcs: Vec<()>, + /// Global table of method signatures. + /// + /// The symbol from this table can be used to index into a class' + /// methods. This is the method-overloading mechanism. + methods: Vec<()>, } impl Store { @@ -39,6 +39,8 @@ impl Store { struct CallFrame { /// Instruction pointer ip: usize, + /// Offset into operand stack where this frame's local variables start. + stack_offset: usize, /// Reference to the closure instance that is being executed. closure: Handle, } @@ -46,16 +48,27 @@ struct CallFrame { #[derive(Debug)] pub struct Fiber { /// Operand stack - pub(crate) stack: Vec, + pub(crate) stack: Vec, /// Stack of call frames (activation records). pub(crate) calls: Vec, } impl Fiber { + /// Create a fiber with a top-level function as an entrypoint. pub fn new(closure: Handle) -> Self { Self { - stack: vec![], - calls: vec![CallFrame { ip: 0, closure }], + stack: vec![ + // To keep consistent with the calling convention, + // the called closure must be on the stack when the + // module's top level code returns. + Value::Closure(closure.clone()), + ], + calls: vec![CallFrame { + ip: 0, + // Arguments start after the closure value. + stack_offset: 1, + closure, + }], } } } @@ -66,12 +79,12 @@ impl VM { fiber: None, store: Store { modules: HashMap::new(), - funcs: vec![], + methods: vec![], }, } } - pub(crate) fn run_program(&mut self, program: &Program) -> Result { + pub(crate) fn run_program(&mut self, program: &Program) -> Result { let module = program.module.clone(); let closure = program.closure.clone(); @@ -107,18 +120,18 @@ impl VM { impl Fiber { #[inline(always)] - fn pop_slots_2(&mut self) -> [Slot; 2] { + fn pop_slots_2(&mut self) -> [Value; 2] { let l = self.stack.len(); - let slot_b = self.stack[l - 2]; - let slot_a = self.stack[l - 1]; + let value_a = self.stack[l - 2].clone(); + let value_b = self.stack[l - 1].clone(); self.stack.truncate(self.stack.len() - 2); - [slot_a, slot_b] + [value_a, value_b] } } enum FiberAction { /// Return a value. - Return(Slot), + Return(Value), /// Pause execution of the current fiber and yield control /// back to host. Yield, @@ -126,16 +139,19 @@ enum FiberAction { enum RunAction { /// Successfully return a value. - Return(Slot), - /// Call a script function. - Call, + Return(Value), + /// Call a closure. + Call { + closure: Handle, + stack_offset: usize, + }, /// Fiber control action. Fiber(FiberAction), } /// Run the current fiber in the VM. // TODO: Instead of Slot, return a decent value that's usable in the Rust host. -fn run_interpreter(vm: &mut VM, fiber: Handle) -> Result { +fn run_interpreter(vm: &mut VM, fiber: Handle) -> Result { vm.fiber = Some(fiber.clone()); loop { @@ -157,29 +173,57 @@ fn run_fiber(vm: &mut VM, fiber: &mut Fiber) -> Result { loop { match run_op_loop(vm, fiber, &mut frame)? { - RunAction::Return(slot) => { - // Current frame returned but there are no callers left on the stack. - if fiber.calls.is_empty() { - return Ok(FiberAction::Return(slot)); + RunAction::Return(value) => { + // Drop callee stack and closure value. + fiber.stack.truncate(frame.stack_offset - 1); + + match fiber.calls.pop() { + // Current frame returned but there are no callers left on the stack. + None => { + return Ok(FiberAction::Return(value)); + } + Some(parent_frame) => { + frame = parent_frame; + fiber.stack.push(value); + } } } - RunAction::Call => {} + RunAction::Call { closure, stack_offset } => { + // Setup new call frame. + let mut new_frame = CallFrame { + ip: 0, + stack_offset, + closure, + }; + std::mem::swap(&mut frame, &mut new_frame); + // Put parent frame back onto call stack. + fiber.calls.push(new_frame); + } RunAction::Fiber(_) => {} } } } #[inline(always)] -fn run_op_loop(vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result { +fn run_op_loop(_vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result { let closure = frame.closure.clone(); let func = closure.borrow_mut().func.clone(); + // for op in func.code.iter() { + // println!(" {op:?}"); + // } + loop { let op = func .code .get(frame.ip) .cloned() .ok_or_else(|| "bytecode buffer out of bounds")?; + + if cfg!(feature = "trace_ops") { + println!("{:04} {op:?}", frame.ip); + } + frame.ip += 1; match op { @@ -190,43 +234,97 @@ fn run_op_loop(vm: &mut VM, fiber: &mut Fiber, frame: &mut CallFrame) -> Result< } Op::I32_Add => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(a.to_i32() + b.to_i32())); + fiber.stack.push(Value::from_i32(a.into_i32()? + b.into_i32()?)); } Op::I32_Sub => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(a.to_i32() - b.to_i32())); + fiber.stack.push(Value::from_i32(a.into_i32()? - b.into_i32()?)); } Op::I32_Mul => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(a.to_i32() * b.to_i32())); + fiber.stack.push(Value::from_i32(a.into_i32()? * b.into_i32()?)); } Op::I32_Div => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(a.to_i32() / b.to_i32())); + fiber.stack.push(Value::from_i32(a.into_i32()? / b.into_i32()?)); } Op::I32_Neg => { let a = fiber.stack.pop().ok_or_else(|| "operand stack is empty")?; - fiber.stack.push(Slot::from_i32(-a.to_i32())); + fiber.stack.push(Value::from_i32(-a.into_i32()?)); } Op::I32_Eq => { let [a, b] = fiber.pop_slots_2(); - fiber.stack.push(Slot::from_i32(if a.to_i32() == b.to_i32() { 1 } else { 0 })); + fiber + .stack + .push(Value::from_i32(if a.into_i32()? == b.into_i32()? { 1 } else { 0 })); } - Op::I32_Cmp => { + Op::I32_Less => { let [a, b] = fiber.pop_slots_2(); - let ordering = match Ord::cmp(&a.to_i32(), &b.to_i32()) { - Ordering::Less => -1, - Ordering::Equal => 0, - Ordering::Greater => 1, - }; - fiber.stack.push(Slot::from_i32(ordering)); + fiber.stack.push(Value::Bool(a.into_i32()? < b.into_i32()?)); + } + Op::I32_LessEq => { + let [a, b] = fiber.pop_slots_2(); + fiber.stack.push(Value::Bool(a.into_i32()? <= b.into_i32()?)); } Op::I32_Const_Inline { arg } => { let a = arg.to_i32(); - fiber.stack.push(Slot::from_i32(a)); + fiber.stack.push(Value::from_i32(a)); + } + Op::Store_Global { global_id } => { + let module = func + .module + .upgrade() + .ok_or_else(|| "function lost reference to its lexical module")?; + let value = fiber.stack.pop().unwrap_or(Value::Nil); + module.borrow_mut().vars.insert(global_id, value); + } + Op::Load_Global { global_id } => { + let module = func + .module + .upgrade() + .ok_or_else(|| "function lost reference to its lexical module")?; + let value = module.borrow_mut().vars.get(global_id).clone(); + fiber.stack.push(value); + } + Op::Store_Local { local_id } => { + let index = frame.stack_offset + local_id.to_usize(); + if index >= fiber.stack.len() { + return Err("operand stack overflow".to_string()); + } + fiber.stack[index] = fiber.stack.pop().ok_or_else(|| "operand stack underflow")?; + } + Op::Load_Local { local_id } => { + let value = fiber + .stack + .get(frame.stack_offset + local_id.to_usize()) + .cloned() + .ok_or_else(|| "operand stack overflow")?; + fiber.stack.push(value); + } + Op::Closure(constant_id) => { + let func_value = func.constants.get(constant_id.to_usize()).cloned().unwrap_or(Value::Nil); + let func_def = func_value.into_func()?; + let closure = Handle::new(Closure::new(func_def)); + fiber.stack.push(Value::Closure(closure)); + } + Op::Call_Closure { arity } => { + let lo = fiber.stack.len() - arity as usize; + let closure_offset = lo - 1; + let closure_value = fiber.stack.get(closure_offset).cloned().ok_or_else(|| "stack underflow")?; + let closure = closure_value.into_closure()?; + return Ok(RunAction::Call { + closure, + stack_offset: lo, + }); } Op::Return => { - return Ok(RunAction::Return(fiber.stack.pop().unwrap_or(Slot::ZERO))); + return Ok(RunAction::Return(fiber.stack.pop().unwrap_or(Value::Nil))); + } + Op::Jump_False { addr } => { + let value = fiber.stack.pop().unwrap_or(Value::Nil); + if matches!(value, Value::Bool(false)) { + frame.ip = addr.to_u32() as usize; + } } Op::Abort => { return Err("abort".to_string());