From 3746ec1502dd2575f8ee5c9e23b2e133c59816a5 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Thu, 27 Jun 2024 12:36:38 +0200 Subject: [PATCH] software prefetcher WIP --- src/main/scala/vexiiriscv/Param.scala | 3 +- .../vexiiriscv/execute/IntAluPlugin.scala | 4 +- .../vexiiriscv/execute/lsu/LsuPlugin.scala | 40 +++++++++++-------- src/main/scala/vexiiriscv/riscv/RegFile.scala | 5 +++ src/main/scala/vexiiriscv/riscv/Rvi.scala | 28 ++++++++----- .../vexiiriscv/test/WhiteboxerPlugin.scala | 2 +- .../scala/vexiiriscv/tester/TestBench.scala | 1 + 7 files changed, 52 insertions(+), 31 deletions(-) diff --git a/src/main/scala/vexiiriscv/Param.scala b/src/main/scala/vexiiriscv/Param.scala index 55e0ce6c..21592d91 100644 --- a/src/main/scala/vexiiriscv/Param.scala +++ b/src/main/scala/vexiiriscv/Param.scala @@ -144,8 +144,9 @@ class ParamSimple(){ fetchL1Sets = 64 fetchL1Ways = 4 fetchL1ReducedBank = true - fetchMemDataWidthMin = 256 + fetchMemDataWidthMin = 64 lsuL1Enable = true + lsuMemDataWidthMin = 64 lsuL1Sets = 64 lsuL1Ways = 4 lsuL1RefillCount = 2 diff --git a/src/main/scala/vexiiriscv/execute/IntAluPlugin.scala b/src/main/scala/vexiiriscv/execute/IntAluPlugin.scala index edfff9e7..0bebb2cf 100644 --- a/src/main/scala/vexiiriscv/execute/IntAluPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/IntAluPlugin.scala @@ -38,10 +38,10 @@ class IntAluPlugin(var layer: LaneLayer, val abce = AluBitwiseCtrlEnum val wb = newWriteback(ifp, formatAt) - val ORI = (host.get[CmoService] match { + val ORI = Rvi.ORI(host.get[CmoService] match { case Some(s) => s.withSoftwarePrefetch case None => false - }).mux(Rvi.ORI_WO_X0, Rvi.ORI_FULL) + }) add(Rvi.ADD ).srcs(Op.ADD , SRC1.RF, SRC2.RF).decode(ALU_CTRL -> ace.ADD_SUB ) add(Rvi.SUB ).srcs(Op.SUB , SRC1.RF, SRC2.RF).decode(ALU_CTRL -> ace.ADD_SUB ) diff --git a/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala b/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala index 69612ec5..45f02590 100644 --- a/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala @@ -147,13 +147,25 @@ class LsuPlugin(var layer : LaneLayer, } val FENCE = Payload(Bool()) - frontend.uopList.foreach(layer(_).addDecoding(FENCE -> False)) - layer.add(Rvi.FENCE).setCompletion(ctrlAt).addDecoding(SEL -> True, LOAD -> False, STORE -> False, ATOMIC -> False, FLOAT -> False, FENCE -> True) + val PREFETCH = Payload(Bool()) + + frontend.uopList.foreach(layer(_).addDecoding(FENCE -> False, PREFETCH -> False)) + layer.add(Rvi.FENCE).setCompletion(ctrlAt).addDecoding(SEL -> True, LOAD -> False, STORE -> False, ATOMIC -> False, FLOAT -> False, FENCE -> True, PREFETCH -> False) elp.setDecodingDefault(FENCE, False) for(uop <- frontend.writingMem if layer(uop).completion.isEmpty) layer(uop).setCompletion(ctrlAt) + val spf = softwarePrefetch generate new Area{ + val pr = layer.add(Rvi.PREFETCH_R) + val pw = layer.add(Rvi.PREFETCH_W) + for(op <- List(pr,pw)) { + op.setCompletion(ctrlAt) + op.addDecoding(SEL -> True, LOAD -> True, STORE -> Bool(op == pw), ATOMIC -> False, FLOAT -> False, FENCE -> False, PREFETCH -> True) + frontend.srcPlugin.specify(op, List(SrcKeys.Op.ADD, SrcKeys.SRC1.RF, SrcKeys.SRC2.S)) + } + } + retainer.release() val injectCtrl = elp.ctrl(0) @@ -315,6 +327,7 @@ class LsuPlugin(var layer : LaneLayer, val ports = ArrayBuffer[Stream[LsuL1Cmd]]() val ls = new Area { + val prefetchOp = Decode.UOP(24 downto 20) val port = ports.addRet(Stream(LsuL1Cmd())) port.valid := isValid && SEL port.address := srcp.ADD_SUB.asUInt.resized //TODO Overflow ? @@ -435,7 +448,7 @@ class LsuPlugin(var layer : LaneLayer, pmaIo.cmd.op(0) := l1.STORE val withAddress = !FENCE - val IO = insert(pmaL1.rsp.fault && !pmaIo.rsp.fault && withAddress) + val IO = insert(pmaL1.rsp.fault && !pmaIo.rsp.fault && withAddress && ! PREFETCH) val writeData = CombInit[Bits](elp(IntRegFile, riscv.RS2)) if(Riscv.withFpu) when(FLOAT){ @@ -565,7 +578,7 @@ class LsuPlugin(var layer : LaneLayer, val tag = p2t(LsuL1.PHYSICAL_ADDRESS) val hits = B(storeBuffer.slots.map(s => s.valid && s.tag === tag)) val hit = hits.orR - val compatibleOp = FROM_LSU && STORE && !ATOMIC && !IO + val compatibleOp = FROM_LSU && STORE && !ATOMIC && !IO && !PREFETCH val notFull = !storeBuffer.ops.full && (storeBuffer.slotsFree || hit) val allowed = notFull && compatibleOp val slotOh = hits | storeBuffer.slotsFreeFirst.andMask(!hit) @@ -657,7 +670,7 @@ class LsuPlugin(var layer : LaneLayer, storeBuffer.push.op.storeId := Decode.STORE_ID } - when(!withAddress){ + when(!withAddress || PREFETCH){ lsuTrap := False } @@ -687,17 +700,10 @@ class LsuPlugin(var layer : LaneLayer, } } -// val fromLsuAbord = FROM_LSU && (!isValid || isCancel || mmuFailure) -// val generalAbord = l1.HAZARD || l1.FAULT || !l1.FLUSH && pmaL1.rsp.fault || preCtrl.MISS_ALIGNED || withStoreBuffer.mux(wb.loadHazard, False) -// l1.ABORD := generalAbord || fromLsuAbord -//// l1.ABORD := FROM_LSU && (!isValid || isCancel || pmaL1.rsp.fault || l1.FAULT || mmuPageFault || tpk.ACCESS_FAULT || tpk.REDO || preCtrl.MISS_ALIGNED || pmaFault || withStoreBuffer.mux(wb.loadHazard, False)) -// -// l1.SKIP_WRITE := l1.ATOMIC && !l1.LOAD && scMiss || withStoreBuffer.mux(!FROM_WB && wb.hit || wb.selfHazard, False) || FROM_LSU && onTrigger.HIT - - val mmuFailure = mmuPageFault || tpk.ACCESS_FAULT || tpk.REDO - l1.ABORD := FROM_LSU && (!isValid || isCancel || pmaL1.rsp.fault || l1.FAULT || mmuFailure || preCtrl.MISS_ALIGNED || pmaL1.rsp.fault || withStoreBuffer.mux(wb.loadHazard || fenceTrap.valid, False)) - l1.SKIP_WRITE := l1.ATOMIC && !l1.LOAD && scMiss || withStoreBuffer.mux(!FROM_WB && wb.hit || wb.selfHazard, False) || FROM_LSU && onTrigger.HIT + l1.ABORD := FROM_LSU && !PREFETCH && (!isValid || isCancel || pmaL1.rsp.fault || l1.FAULT || mmuFailure || preCtrl.MISS_ALIGNED || withStoreBuffer.mux(wb.loadHazard || fenceTrap.valid, False)) + ??? PREFETCH / pmaL1 / miss aligned / mmu failure + l1.SKIP_WRITE := l1.ATOMIC && !l1.LOAD && scMiss || withStoreBuffer.mux(!FROM_WB && wb.hit || wb.selfHazard, False) || FROM_LSU && onTrigger.HIT || PREFETCH if (withStoreBuffer) l1.ABORD setWhen (FROM_WB && wb.selfHazard) @@ -744,7 +750,7 @@ class LsuPlugin(var layer : LaneLayer, val hartRegulation = new L1Waiter{ host[DispatchPlugin].haltDispatchWhen(valid) - when(isValid && SEL && withStoreBuffer.mux(LOAD, True) && (l1.HAZARD || l1.MISS || l1.MISS_UNIQUE)){ + when(isValid && SEL && !PREFETCH && !IO && !FENCE && withStoreBuffer.mux(LOAD, True) && (l1.HAZARD || l1.MISS || l1.MISS_UNIQUE)){ capture(down) } events.foreach(_.waiting setWhen(valid)) @@ -768,7 +774,7 @@ class LsuPlugin(var layer : LaneLayer, } } - val storeFire = down.isFiring && AguPlugin.SEL && AguPlugin.STORE && !onCtrl.IO + val storeFire = down.isFiring && AguPlugin.SEL && AguPlugin.STORE && !onCtrl.IO && !PREFETCH val storeBroadcast = down.isReady && l1.SEL && l1.STORE && !l1.ABORD && !l1.SKIP_WRITE && !l1.MISS && !l1.MISS_UNIQUE && !l1.HAZARD } diff --git a/src/main/scala/vexiiriscv/riscv/RegFile.scala b/src/main/scala/vexiiriscv/riscv/RegFile.scala index eea0462f..fc269496 100644 --- a/src/main/scala/vexiiriscv/riscv/RegFile.scala +++ b/src/main/scala/vexiiriscv/riscv/RegFile.scala @@ -63,6 +63,11 @@ object IntRegFile extends RegfileSpec with AreaObject { key = key, resources = Nil ) + + def TypeCmoPrefetch(key: MaskedLiteral) = SingleDecoding( + key = key, + resources = List(RS1).map(this -> _) + ) } diff --git a/src/main/scala/vexiiriscv/riscv/Rvi.scala b/src/main/scala/vexiiriscv/riscv/Rvi.scala index 9fa2abb3..78ec9aae 100644 --- a/src/main/scala/vexiiriscv/riscv/Rvi.scala +++ b/src/main/scala/vexiiriscv/riscv/Rvi.scala @@ -30,15 +30,24 @@ object Rvi extends AreaObject { val XORI = TypeI(M"-----------------100-----0010011") val SRLI = TypeI(M"000000-----------101-----0010011") val SRAI = TypeI(M"010000-----------101-----0010011") - val ORI_FULL = TypeI(M"-----------------110-----0010011") - val ORI_WO_X0 = TypeI(List( - M"-----------------1101----0010011", - M"-----------------110-1---0010011", - M"-----------------110--1--0010011", - M"-----------------110---1-0010011", - M"-----------------110----10010011", - )) - def ORI(withHints : Boolean) = withHints.mux(ORI_FULL, ORI_WO_X0) + val ORI_FULL = TypeI(M"-----------------110-----0010011") + def ORI(withLsuPrefetch : Boolean) : SingleDecoding = { + if(!withLsuPrefetch) return ORI_FULL + TypeI(List( + M"-----------------1101----0010011", + M"-----------------110-1---0010011", + M"-----------------110--1--0010011", + M"-----------------110---1-0010011", + M"-----------------110----10010011", + M"-------1---------110000000010011", + M"--------1--------110000000010011", + M"---------1-------110000000010011", + M"-------000-0-----110000000010011", + )) + } + + val PREFETCH_R = TypeCmoPrefetch(M"-------00001-----110000000010011") + val PREFETCH_W = TypeCmoPrefetch(M"-------00011-----110000000010011") val ANDI = TypeI(M"-----------------111-----0010011") @@ -147,7 +156,6 @@ object Rvi extends AreaObject { val FLUSH_DATA = TypeNone(M"-------00000-----101-----0001111") - case class LoadSpec(width: Int, signed: Boolean) val loadSpec = mutable.LinkedHashMap[MicroOp, LoadSpec]() diff --git a/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala b/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala index ce2bc1ab..c94b7544 100644 --- a/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala +++ b/src/main/scala/vexiiriscv/test/WhiteboxerPlugin.scala @@ -145,7 +145,7 @@ class WhiteboxerPlugin extends FiberPlugin{ val lp = host.get[LsuPlugin] map (p => new Area { val c = p.logic.onWb - fire := c.down.isFiring && c(AguPlugin.SEL) && c(AguPlugin.LOAD) && !c(TRAP) && !c(p.logic.onCtrl.IO) + fire := c.down.isFiring && c(AguPlugin.SEL) && c(AguPlugin.LOAD) && !c(p.logic.PREFETCH) && !c(TRAP) && !c(p.logic.onCtrl.IO) hartId := c(Global.HART_ID) uopId := c(Decode.UOP_ID) size := c(AguPlugin.SIZE).resized diff --git a/src/main/scala/vexiiriscv/tester/TestBench.scala b/src/main/scala/vexiiriscv/tester/TestBench.scala index d896f153..a3e263ff 100644 --- a/src/main/scala/vexiiriscv/tester/TestBench.scala +++ b/src/main/scala/vexiiriscv/tester/TestBench.scala @@ -160,6 +160,7 @@ class TestOptions{ opt[Double]("ibus-ready-factor") unbounded() action { (v, c) => ibusReadyFactor = v.toFloat } opt[Double]("dbus-ready-factor") unbounded() action { (v, c) => dbusReadyFactor = v.toFloat } opt[Unit]("jtag-remote") unbounded() action { (v, c) => jtagRemote = true } + opt[Int]("memory-latency") action { (v, c) => dbusBaseLatency = v } opt[String]("fsm-putc") unbounded() action { (v, c) => fsmTasksGen += (() => new FsmPutc(v)) } opt[Unit]("fsm-putc-lr") unbounded() action { (v, c) => fsmTasksGen += (() => new FsmPutc("\n")) }