From be3f3b119edd3736eeac40f7e416da63a51ecafa Mon Sep 17 00:00:00 2001 From: Tim Hutt Date: Tue, 11 Jun 2024 22:11:47 +0100 Subject: [PATCH] Increase TLB size to 64 This has a very significant effect on the emulator performance with virtual memory enabled (e.g. when booting Linux). Having a more realistic TLB can also help expose bugs, like the recent bug in #496. For example with a larger TLB you can add an assertion in `translate()`. It also allows you to detect missing `sfence.vma`s in software by asserting that the TLB translation is the same as the uncached translation. This is not implemented in this change but we can do it in future (or you can just hack it in when needed). The detection will never be 100% but the bigger the TLB the more missing fences you will detect. 64 was chosen based on benchmarks I did last year. Performance when booting Linux keeps going up until you get to 64. --- model/riscv_vmem.sail | 2 +- model/riscv_vmem_tlb.sail | 83 ++++++++++++++++++++++++++------------- 2 files changed, 57 insertions(+), 28 deletions(-) diff --git a/model/riscv_vmem.sail b/model/riscv_vmem.sail index 2bb96fe7a..3229bb62f 100644 --- a/model/riscv_vmem.sail +++ b/model/riscv_vmem.sail @@ -292,7 +292,7 @@ function translate_TLB_hit(sv_params : SV_Params, mxr : bool, do_sum : bool, ext_ptw : ext_ptw, - tlb_index : nat, + tlb_index : tlb_index_range, ent : TLB_Entry) -> TR_Result(bits(64), PTW_Error) = { let pte = ent.pte; diff --git a/model/riscv_vmem_tlb.sail b/model/riscv_vmem_tlb.sail index 828fa723c..d066f1356 100644 --- a/model/riscv_vmem_tlb.sail +++ b/model/riscv_vmem_tlb.sail @@ -27,17 +27,37 @@ struct TLB_Entry = { age : bits(64) // for replacement policy? } -// Single-entry TLB (could enlarge this in future for better simulation speed) +// 64 entries is based on benchmarks of Linux boots and is where you stop +// seeing performance improvements. +type num_tlb_entries : Int = 64 +type tlb_index_range = range(0, num_tlb_entries - 1) + // PRIVATE -register tlb : option(TLB_Entry) = None() +register tlb : vector(num_tlb_entries, option(TLB_Entry)) = [ + None(), None(), None(), None(), None(), None(), None(), None(), + None(), None(), None(), None(), None(), None(), None(), None(), + None(), None(), None(), None(), None(), None(), None(), None(), + None(), None(), None(), None(), None(), None(), None(), None(), + None(), None(), None(), None(), None(), None(), None(), None(), + None(), None(), None(), None(), None(), None(), None(), None(), + None(), None(), None(), None(), None(), None(), None(), None(), + None(), None(), None(), None(), None(), None(), None(), None(), +] + +// Indexed by the lowest bits of the VPN. +function tlb_hash(vaddr : bits(64)) -> tlb_index_range = + unsigned(vaddr[17 .. 12]) // PUBLIC: invoked in init_vmem() [riscv_vmem.sail] -function init_TLB() -> unit = - tlb = None() +function init_TLB() -> unit = { + foreach (i from 0 to (length(tlb) - 1)) { + tlb[i] = None(); + } +} // PUBLIC: invoked in translate_TLB_hit() [riscv_vmem.sail] -function write_TLB(idx : nat, ent : TLB_Entry) -> unit = - tlb = Some(ent) +function write_TLB(index : tlb_index_range, ent : TLB_Entry) -> unit = + tlb[index] = Some(ent) // PRIVATE function match_TLB_Entry(ent : TLB_Entry, @@ -60,11 +80,13 @@ function flush_TLB_Entry(e : TLB_Entry, } // PUBLIC: invoked in translate() [riscv_vmem.sail] -function lookup_TLB (asid : asidbits, vaddr : bits(64)) -> option((nat, TLB_Entry)) = - match tlb { - None() => None(), - Some(e) => if match_TLB_Entry(e, asid, vaddr) then Some((0, e)) else None() +function lookup_TLB (asid : asidbits, vaddr : bits(64)) -> option((tlb_index_range, TLB_Entry)) = { + let index = tlb_hash(vaddr); + match tlb[index] { + None() => None(), + Some(entry) => if match_TLB_Entry(entry, asid, vaddr) then Some((index, entry)) else None(), } +} // PRIVATE function add_to_TLB(asid : asidbits, @@ -75,21 +97,28 @@ function add_to_TLB(asid : asidbits, level : nat, global : bool, levelBitSize : nat, - PAGESIZE_BITS : nat) -> unit = { - let shift = PAGESIZE_BITS + (level * levelBitSize); + pagesize_bits : nat) -> unit = { + let shift = pagesize_bits + (level * levelBitSize); assert(shift <= 64); let vAddrMask : bits(64) = zero_extend(ones(shift)); let vMatchMask : bits(64) = ~ (vAddrMask); - let entry : TLB_Entry = struct{asid = asid, - global = global, - pte = pte, - pteAddr = pteAddr, - vAddrMask = vAddrMask, - vMatchMask = vMatchMask, - vAddr = vAddr & vMatchMask, - pAddr = shiftl(shiftr(pAddr, shift), shift), - age = mcycle}; - tlb = Some(entry) + + let entry : TLB_Entry = struct{asid = asid, + global = global, + pte = pte, + pteAddr = pteAddr, + vAddrMask = vAddrMask, + vMatchMask = vMatchMask, + vAddr = vAddr & vMatchMask, + pAddr = pAddr & vMatchMask, + age = mcycle}; + + // Add the TLB entry. Note that this may be a super-page, but we still want + // to add it to the index corresponding to the page because that is how + // lookup_TLB looks it up. For superpages will just end up with the same + // TLB entry in multiple slots. + let index = tlb_hash(vAddr); + tlb[index] = Some(entry); } // Top-level TLB flush function @@ -106,10 +135,10 @@ function flush_TLB(asid_xlen : option(xlenbits), None() => None(), Some(a) => Some(zero_extend(a)) }; - match tlb { - None() => (), - Some(e) => if flush_TLB_Entry(e, asid, addr_64b) - then tlb = None() - else () + foreach (i from 0 to (length(tlb) - 1)) { + match tlb[i] { + Some(e) => if flush_TLB_Entry(e, asid, addr_64b) then { tlb[i] = None(); }, + None() => (), + } } }