From d742d71c4c642ce6293606be8039672366627d2c Mon Sep 17 00:00:00 2001 From: glyh Date: Wed, 23 Oct 2024 03:15:22 +0800 Subject: [PATCH] all tuple allocation is now on stack --- src/closureps_eval/interpreter.mbt | 1 - src/js/clops2js.mbt | 1 - src/riscv/codegen.mbt | 50 ++++------- src/riscv/emit.mbt | 131 ++++------------------------- src/riscv/extern_stub.mbt | 95 +++++++++++++++++++++ src/riscv/meta.mbt | 27 ++++++ src/riscv/reg.mbt | 3 +- src/riscv/rv_asm.mbt | 17 +++- 8 files changed, 173 insertions(+), 152 deletions(-) create mode 100644 src/riscv/extern_stub.mbt create mode 100644 src/riscv/meta.mbt diff --git a/src/closureps_eval/interpreter.mbt b/src/closureps_eval/interpreter.mbt index a672c53..c0a5f61 100644 --- a/src/closureps_eval/interpreter.mbt +++ b/src/closureps_eval/interpreter.mbt @@ -1,4 +1,3 @@ -// TODO: create base env that includes closure of non closures pub enum Value { Unit Int(Int) diff --git a/src/js/clops2js.mbt b/src/js/clops2js.mbt index a61e89c..8549c80 100644 --- a/src/js/clops2js.mbt +++ b/src/js/clops2js.mbt @@ -1,4 +1,3 @@ -// TODO: create base env that includes closure of non closures struct JsEmitter { clops : @closureps.ClosurePS indent : Int diff --git a/src/riscv/codegen.mbt b/src/riscv/codegen.mbt index befdda3..b6ef1f3 100644 --- a/src/riscv/codegen.mbt +++ b/src/riscv/codegen.mbt @@ -1,5 +1,3 @@ -// TODO: do all allocation at entrance of a block so we don't push and pull regs all the time - enum RegTy { I32 PTR64 @@ -58,28 +56,33 @@ struct CodegenBlock { let global_entrance = "minimbt_main" +fn generate_name(name_var : Var) -> String { + let is_entrance = name_var.id == 0 + if is_entrance { + global_entrance + } else { + name_var.to_string() + } +} + fn CodegenBlock::new( cfg : @ssacfg.SsaCfg, allocation : @hashmap.T[Var, RegRef], name_var : Var ) -> CodegenBlock { - let is_extrance = name_var.id == 0 - let name = if is_extrance { global_entrance } else { name_var.to_string() } + let is_entrance = name_var.id == 0 + let name = generate_name(name_var) // 1. generate prologue - let cur_fn : AssemblyFunction = { name, export: is_extrance, body: [] } - // stores global exit point - if cfg.func_no_free_vars.contains(name_var) { - // Generate a global closure for any function that doesn't take free vars - cur_fn.body.append( - [Label(cfg.label_to_closure[name_var].unwrap().to_string()), DWord(name)], - ) - } + let cur_fn : AssemblyFunction = { name, export: is_entrance, body: [] } cur_fn.body.append( [ Label(name), Comment("args: " + cfg.fn_args[name_var].unwrap().to_string()), ], ) + if is_entrance { + cur_fn.body.append([Comment("set up stack"), La(Sp, "stack_space_end")]) + } // 2. collect dirtied regs let dirtied = @hashset.new() @@ -423,7 +426,6 @@ fn CodegenBlock::resolve_loop( fn get_tuple_offset(t : T, idx : Int) -> Int { // we may pass in a function // then it's converted to tuple during closure conversion - //println(t) match t { Tuple(tys) => { let mut result = 0 @@ -556,21 +558,12 @@ fn CodegenBlock::call_c_conv_aligned( ) } -// TODO: -// 1. I just figure out that all allocation can be done on stack -// and thus MakeArray & MakeTuple can be done more efficiently -// 2. However, I must set up my own stack, so there's no risk of -// stack overflow -// 3. allocation for array can also be push to front so we don't repeatedly -// save & load regs - fn CodegenBlock::codegen(self : CodegenBlock) -> Unit { let block = self.cfg.blocks[self.block_label].unwrap() // as long as the entrance of the body is the first block, our codegen order doesn't matter too much. // 1. Do all mallocs for tuples together let malloc_offsets : @hashmap.T[Var, Int] = @hashmap.new() let mut malloc_offset_cur = 0 - //println(self.block_label) for inst in block.insts { guard let MakeTuple(bind, vals) = inst else { _ => continue @@ -578,20 +571,14 @@ fn CodegenBlock::codegen(self : CodegenBlock) -> Unit { } let mut tuple_byte_size = 0 for val in vals { - //println( - // " \{bind} acc with \{val} \{get_reg_ty(val)} \{get_reg_ty(val).byte_size()}", - //) tuple_byte_size += get_reg_ty(val).byte_size() } malloc_offsets[bind] = malloc_offset_cur malloc_offset_cur += tuple_byte_size } if malloc_offset_cur > 0 { - self.call_c_conv_aligned( - "minimbt_malloc", - [Int(malloc_offset_cur)], - I(malloc_pointer), - ) + // Just allocate on stack, because we don't use them as call stack + self.insert_asm(Addi(Sp, Sp, -malloc_offset_cur)) } // 2. generate asms for each inst @@ -631,7 +618,7 @@ fn CodegenBlock::codegen(self : CodegenBlock) -> Unit { bind, fn(reg) { reg_tup = reg - [Addi(reg, malloc_pointer, malloc_offsets[bind].unwrap())] + [Addi(reg, Sp, malloc_offsets[bind].unwrap())] }, ) // 2. store values into tuple @@ -642,7 +629,6 @@ fn CodegenBlock::codegen(self : CodegenBlock) -> Unit { } } KthTuple(bind, tup, offset) => { - //println("trying to access \{offset} @ \{tup}") let tup_reg = self.pull_val_i(tup) match get_reg_ty(Var(bind)) { F64 => diff --git a/src/riscv/emit.mbt b/src/riscv/emit.mbt index 8db8ce5..d70662f 100644 --- a/src/riscv/emit.mbt +++ b/src/riscv/emit.mbt @@ -1,118 +1,3 @@ -let heap_ptr_label : Ref[String] = { val: "" } - -let stub_label : Ref[String] = { val: "" } - -pub fn emit(cfg : @ssacfg.SsaCfg) -> Array[AssemblyFunction] { - //println(cfg.func_no_free_vars) - let cfg = before_alloc(cfg) - let output = [] - heap_ptr_label.val = cfg.new_named("heap").to_string() - - // generate stub for CPS / C-Calling convention interop - stub_label.val = cfg.new_named("c_stub").to_string() - let stub_resolved_i = cfg.new_named("c_stub_resolved_i").to_string() - let body : Array[RvAsm] = [] - let externals = collect_externals(cfg) - // generate stubs for function returning ints - for external in externals.returns_i { - guard let @typing.Type::Fun(args, _) = external.ty.val else { - _ => @util.die("external non function \{external}") - } - let fn_name = external.to_string() - body.append([Label(fn_name), La(T6, Label("minimbt_" + fn_name))]) - let int_arg_cnt = args.iter().filter(fn(arg) { arg != Double }).count() - let kont_reg = match int_arg_cnt { - 0 => A0 - 1 => A1 - 2 => A2 - 3 => A3 - 4 => A4 - 5 => A5 - _ => @util.die("too many args for external call") - } - // we never return so it's safe to modify the stored regs without backing up - body.append( - [ - Comment("store the register holding continuation"), - Mv(S1, kont_reg), - J(stub_resolved_i), - ], - ) - } - // now in swap reg stores our target function's address - // S1 stores the continuation - // we just need to first call the target function, then move closure pointer - // to a2(where we store continuaion) and then call the continuation - body.append( - [ - Label(stub_resolved_i), - Comment("call the external function"), - Jalr(T6), - Comment("A0 holds result, put continuation as 2nd arg"), - Mv(A1, S1), - Comment("fetch continuation address"), - Ld(T1, { base: S1, offset: 0 }), - Comment("call continuation"), - Jr(T1), - ], - ) - // generates stub for function returning floats - let stub_resolved_f = cfg.new_named("c_stub_resolved_f").to_string() - for external in externals.returns_f { - guard let @typing.Type::Fun(args, _) = external.ty.val else { - _ => @util.die("external non function \{external}") - } - let fn_name = external.to_string() - body.append([Label(fn_name), La(T6, Label("minimbt_" + fn_name))]) - let int_arg_cnt = args.iter().filter(fn(arg) { arg != Double }).count() - let kont_reg = match int_arg_cnt { - 0 => A0 - 1 => A1 - 2 => A2 - 3 => A3 - 4 => A4 - 5 => A5 - _ => @util.die("too many args for external call") - } - // we never return so it's safe to modify the stored regs without backing up - body.append( - [ - Comment("store the register holding continuation"), - Mv(S1, kont_reg), - J(stub_resolved_f), - ], - ) - } - // now in swap reg stores our target function's address - // S1 stores the continuation - // we just need to first call the target function, then move closure pointer - // to a2(where we store continuaion) and then call the continuation - body.append( - [ - Label(stub_resolved_f), - Comment("call the external function"), - Jalr(T6), - Comment("Fa0 holds result, put continuation as 2nd arg"), - Mv(A0, S1), - Comment("fetch continuation address"), - Ld(T1, { base: S1, offset: 0 }), - Comment("call continuation"), - Jr(T1), - ], - ) - let stub_fn : AssemblyFunction = { name: stub_label.val, export: false, body } - output.push(stub_fn) - // genereating asm code for all functions - for item in cfg.fn_args { - let (fn_label, _) = item - let allocation = reg_allocate_on_fn(cfg, fn_label) - let codegen_blk = CodegenBlock::new(cfg, allocation, fn_label) - codegen_blk.codegen() - output.push(codegen_blk.cur_fn.val) - } - output -} - pub struct AssemblyFunction { name : String export : Bool @@ -132,3 +17,19 @@ pub fn AssemblyFunction::output( logger.write_string("\n") } } + +pub fn emit(cfg : @ssacfg.SsaCfg) -> Array[AssemblyFunction] { + let cfg = before_alloc(cfg) + let output = [] + output.push(generate_meta(cfg)) + output.push(generate_stub(cfg)) + // genereating asm code for all functions + for item in cfg.fn_args { + let (fn_label, _) = item + let allocation = reg_allocate_on_fn(cfg, fn_label) + let codegen_blk = CodegenBlock::new(cfg, allocation, fn_label) + codegen_blk.codegen() + output.push(codegen_blk.cur_fn.val) + } + output +} diff --git a/src/riscv/extern_stub.mbt b/src/riscv/extern_stub.mbt new file mode 100644 index 0000000..c9c9be3 --- /dev/null +++ b/src/riscv/extern_stub.mbt @@ -0,0 +1,95 @@ +fn generate_stub(cfg : @ssacfg.SsaCfg) -> AssemblyFunction { + // generate stub for CPS / C-Calling convention interop + let stub_label = cfg.new_named("c_stub").to_string() + let stub_resolved_i = cfg.new_named("c_stub_resolved_i").to_string() + let body : Array[RvAsm] = [] + let externals = collect_externals(cfg) + // generate stubs for function returning ints + for external in externals.returns_i { + guard let @typing.Type::Fun(args, _) = external.ty.val else { + _ => @util.die("external non function \{external}") + } + let fn_name = external.to_string() + body.append([Label(fn_name), La(T6, Label("minimbt_" + fn_name))]) + let int_arg_cnt = args.iter().filter(fn(arg) { arg != Double }).count() + let kont_reg = match int_arg_cnt { + 0 => A0 + 1 => A1 + 2 => A2 + 3 => A3 + 4 => A4 + 5 => A5 + _ => @util.die("too many args for external call") + } + // we never return so it's safe to modify the stored regs without backing up + body.append( + [ + Comment("store the register holding continuation"), + Mv(S1, kont_reg), + J(stub_resolved_i), + ], + ) + } + // now in swap reg stores our target function's address + // S1 stores the continuation + // we just need to first call the target function, then move closure pointer + // to a2(where we store continuaion) and then call the continuation + body.append( + [ + Label(stub_resolved_i), + Comment("call the external function"), + Jalr(T6), + Comment("A0 holds result, put continuation as 2nd arg"), + Mv(A1, S1), + Comment("fetch continuation address"), + Ld(T1, { base: S1, offset: 0 }), + Comment("call continuation"), + Jr(T1), + ], + ) + // generates stub for function returning floats + let stub_resolved_f = cfg.new_named("c_stub_resolved_f").to_string() + for external in externals.returns_f { + guard let @typing.Type::Fun(args, _) = external.ty.val else { + _ => @util.die("external non function \{external}") + } + let fn_name = external.to_string() + body.append([Label(fn_name), La(T6, Label("minimbt_" + fn_name))]) + let int_arg_cnt = args.iter().filter(fn(arg) { arg != Double }).count() + let kont_reg = match int_arg_cnt { + 0 => A0 + 1 => A1 + 2 => A2 + 3 => A3 + 4 => A4 + 5 => A5 + _ => @util.die("too many args for external call") + } + // we never return so it's safe to modify the stored regs without backing up + body.append( + [ + Comment("store the register holding continuation"), + Mv(S1, kont_reg), + J(stub_resolved_f), + ], + ) + } + // now in swap reg stores our target function's address + // S1 stores the continuation + // we just need to first call the target function, then move closure pointer + // to a2(where we store continuaion) and then call the continuation + body.append( + [ + Label(stub_resolved_f), + Comment("call the external function"), + Jalr(T6), + Comment("Fa0 holds result, put continuation as 2nd arg"), + Mv(A0, S1), + Comment("fetch continuation address"), + Ld(T1, { base: S1, offset: 0 }), + Comment("call continuation"), + Jr(T1), + ], + ) + { name: stub_label, export: false, body } +} diff --git a/src/riscv/meta.mbt b/src/riscv/meta.mbt new file mode 100644 index 0000000..04ad2dd --- /dev/null +++ b/src/riscv/meta.mbt @@ -0,0 +1,27 @@ +fn generate_meta(cfg : @ssacfg.SsaCfg) -> AssemblyFunction { + let body : Array[RvAsm] = [] + + // Generate all constant closures + body.push(Section(".data")) + for fn_label in cfg.func_no_free_vars { + body.append( + [ + Label(cfg.label_to_closure[fn_label].unwrap().to_string()), + DWord(generate_name(fn_label)), + ], + ) + } + body.push(Section(".bss")) + body.append( + [ + Comment("Align at 8-byte for x64"), + Align(3), + Label("stack_space"), + Comment("Skips 128 Mib For stack"), + Skip(128 * 1024 * 1024), + Label("stack_space_end"), + ], + ) + body.push(Section(".text")) + { name: "meta", export: false, body } +} diff --git a/src/riscv/reg.mbt b/src/riscv/reg.mbt index b4a522d..69411be 100644 --- a/src/riscv/reg.mbt +++ b/src/riscv/reg.mbt @@ -41,8 +41,6 @@ pub enum Reg { T6 } derive(Eq, Compare, Hash) -let malloc_pointer : Reg = S1 - // we have so much more regs to utilize, lol let alloc_regs : Array[Reg] = [ // prefer saved reg first @@ -56,6 +54,7 @@ let alloc_regs : Array[Reg] = [ S4, S3, S2, + S1, // then use tmp reg T0, T1, diff --git a/src/riscv/rv_asm.mbt b/src/riscv/rv_asm.mbt index c5333ad..d3bc605 100644 --- a/src/riscv/rv_asm.mbt +++ b/src/riscv/rv_asm.mbt @@ -83,8 +83,11 @@ pub enum RvAsm { Ret Seqz(Reg, Reg) DWord(String) + Align(Int) + Skip(Int) // Comments Label(String) + Section(String) Comment(String) } @@ -167,7 +170,7 @@ fn write2[TReg1 : Show, TReg2 : Show]( impl Show for RvAsm with output(self, logger) { match self { - Label(_) => () + Label(_) | Section(_) => () _ => logger.write_string(" ") } match self { @@ -274,10 +277,22 @@ impl Show for RvAsm with output(self, logger) { logger.write_string(".dword ") logger.write_string(data) } + Align(offset) => { + logger.write_string(".align ") + logger.write_string(offset.to_string()) + } + Skip(offset) => { + logger.write_string(".skip ") + logger.write_string(offset.to_string()) + } Label(label) => { logger.write_string(label) logger.write_string(":") } + Section(section) => { + logger.write_string(".section ") + logger.write_string(section) + } Comment(comment) => { logger.write_string("# ") logger.write_string(comment)