Skip to content

Commit

Permalink
Got very very basic hardware next-line prefetcher to work
Browse files Browse the repository at this point in the history
  • Loading branch information
Dolu1990 committed Jun 29, 2024
1 parent d9be8ef commit aa38c72
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 4 deletions.
7 changes: 5 additions & 2 deletions src/main/scala/vexiiriscv/Param.scala
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class ParamSimple(){
// decoders = 2
// lanes = 2
// storeRs2Late = true
withLateAlu = true
// withLateAlu = true
withMul = true
withDiv = true
withDispatcherBuffer = true
Expand Down Expand Up @@ -541,7 +541,6 @@ class ParamSimple(){
storeRs2At = storeRs2Late.mux(2, 0),
storeBufferSlots = lsuStoreBufferSlots,
storeBufferOps = lsuStoreBufferOps,
hardwarePrefetch = lsuHardwarePrefetch,
softwarePrefetch = lsuSoftwarePrefetch,
translationStorageParameter = MmuStorageParameter(
levels = List(
Expand Down Expand Up @@ -580,6 +579,10 @@ class ParamSimple(){
withCoherency = lsuL1Coherency,
bootMemClear = bootMemClear
)

if(lsuHardwarePrefetch){
plugins += new lsu.PrefetchNextLinePlugin
}
}

if(withMul) {
Expand Down
26 changes: 24 additions & 2 deletions src/main/scala/vexiiriscv/execute/lsu/LsuPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ class LsuPlugin(var layer : LaneLayer,
var translationStorageParameter: Any,
var translationPortParameter: Any,
var softwarePrefetch: Boolean,
var hardwarePrefetch: Boolean,
var addressAt: Int = 0,
var triggerAt : Int = 1,
var ctrlAt: Int = 2,
Expand All @@ -62,6 +61,7 @@ class LsuPlugin(var layer : LaneLayer,

override def withSoftwarePrefetch: Boolean = softwarePrefetch
override def getLsuCachelessBus(): LsuCachelessBus = logic.bus
override def lsuCommitProbe: Flow[LsuCommitProbe] = logic.commitProbe

def busParam = LsuCachelessBusParam(
addressWidth = Global.PHYSICAL_WIDTH,
Expand Down Expand Up @@ -95,6 +95,7 @@ class LsuPlugin(var layer : LaneLayer,
val ts = host[TrapService]
val ss = host[ScheduleService]
val pcs = host.get[PerformanceCounterService]
val hp = host.get[PrefetcherPlugin]
val fpwbp = host.findOption[WriteBackPlugin](p => p.lane == layer.lane && p.rf == FloatRegFile)
val buildBefore = retains(elp.pipelineLock, ats.portsLock)
val earlyLock = retains(List(ats.storageLock) ++ pcs.map(_.elaborationLock).toList)
Expand All @@ -115,6 +116,7 @@ class LsuPlugin(var layer : LaneLayer,
val trapPort = ts.newTrap(layer.lane.getExecuteAge(ctrlAt), Execute.LANE_AGE_WIDTH)
val flushPort = ss.newFlushPort(layer.lane.getExecuteAge(ctrlAt), laneAgeWidth = Execute.LANE_AGE_WIDTH, withUopId = true)
val frontend = new AguFrontend(layer, host)
val commitProbe = Flow(LsuCommitProbe())

// IntFormatPlugin specification
val iwb = ifp.access(wbAt)
Expand Down Expand Up @@ -338,7 +340,8 @@ class LsuPlugin(var layer : LaneLayer,
port.load := LOAD
port.store := STORE
port.atomic := ATOMIC
port.op := LSU_PREFETCH.mux(LsuL1CmdOpcode.PREFETCH, LsuL1CmdOpcode.LSU)
port.op := LsuL1CmdOpcode.LSU
if(softwarePrefetch) when(LSU_PREFETCH) { port.op := LsuL1CmdOpcode.PREFETCH }

val storeId = Reg(Decode.STORE_ID) init (0)
storeId := storeId + U(port.fire)
Expand Down Expand Up @@ -391,6 +394,19 @@ class LsuPlugin(var layer : LaneLayer,
port.storeId := storeBuffer.pop.op.storeId
}

val fromHp = hp.nonEmpty generate new Area {
val feed = hp.get.io.get
val port = ports.addRet(Stream(LsuL1Cmd()))
port.arbitrationFrom(feed)
port.op := LsuL1CmdOpcode.PREFETCH
port.address := feed.address
port.store := feed.unique
port.size := 0
port.load := False
port.atomic := False
port.storeId := 0
}

val arbiter = StreamArbiterFactory().noLock.lowerFirst.buildOn(ports)
arbiter.io.output.ready := !elp.isFreezed()
l1.SEL := arbiter.io.output.valid
Expand Down Expand Up @@ -764,6 +780,12 @@ class LsuPlugin(var layer : LaneLayer,
}
events.foreach(_.waiting setWhen(valid))
}

commitProbe.valid := down.isFiring && SEL && FROM_LSU
commitProbe.address := l1.MIXED_ADDRESS
commitProbe.load := l1.LOAD
commitProbe.store := l1.STORE
commitProbe.trap := lsuTrap
}

val onWb = new elp.Execute(wbAt){
Expand Down
114 changes: 114 additions & 0 deletions src/main/scala/vexiiriscv/execute/lsu/Prefetcher.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package vexiiriscv.execute.lsu

import spinal.core._
import spinal.lib._
import spinal.lib.misc.plugin.FiberPlugin
import vexiiriscv.fetch.LsuService

case class PrefetchCmd() extends Bundle {
val address = LsuL1.MIXED_ADDRESS()
val unique = Bool()
}

case class LsuCommitProbe() extends Bundle {
val address = LsuL1.MIXED_ADDRESS()
val load, store, trap = Bool()
}


abstract class PrefetcherPlugin extends FiberPlugin {
val io = during build Stream(PrefetchCmd())
}

class PrefetchNextLinePlugin extends PrefetcherPlugin {
val logic = during build new Area {
val probe = host[LsuService].lsuCommitProbe
val converted = Stream(PrefetchCmd())
converted.arbitrationFrom(probe.toStream)
converted.address := probe.address + 64
converted.unique := probe.store
io << converted.stage()
}
}


/*
none
0000000000000621
0000000000000279
0000000000000315
00000000000002c0
00000000000006d5
00000000000004d1
next line wo trap
00000000000004ae
000000000000024c
0000000000000300
00000000000002ac
0000000000000617
00000000000004a6
next line with trap
0000000000000341
000000000000024c
00000000000002f1
00000000000002ac
000000000000056c
00000000000004b0
Write speed: 166.7MiB/s
Read speed: 113.3MiB/s
Write speed: 165.8MiB/s
Read speed: 204.4MiB/s
[ 0.000000] clocksource: riscv_clocksource: mask: 0xffffffffffffffff max_cycles: 0x171024e7e0, max_idle_ns: 440795205315 ns
[ 0.000064] sched_clock: 64 bits at 100MHz, resolution 10ns, wraps every 4398046511100ns
[ 0.001594] Console: colour dummy device 80x25
[ 0.002135] printk: console [hvc0] enabled
[ 0.002135] printk: console [hvc0] enabled
[ 0.002978] printk: bootconsole [sbi0] disabled
[ 0.002978] printk: bootconsole [sbi0] disabled
[ 0.004014] Calibrating delay loop (skipped), value calculated using timer frequency.. 200.00 BogoMIPS (lpj=400000)
[ 0.005248] pid_max: default: 32768 minimum: 301
[ 0.006755] Mount-cache hash table entries: 512 (order: 0, 4096 bytes, linear)
[ 0.007583] Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes, linear)
[ 0.017582] rcu: Hierarchical SRCU implementation.
[ 0.020062] smp: Bringing up secondary CPUs ...
[ 0.020601] smp: Brought up 1 node, 1 CPU
[ 0.022926] devtmpfs: initialized
[ 0.027859] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
[ 0.028924] futex hash table entries: 256 (order: 2, 16384 bytes, linear)
[ 0.031451] NET: Registered protocol family 16
[ 0.071113] clocksource: Switched to clocksource riscv_clocksource
[ 0.124171] NET: Registered protocol family 2
[ 0.129262] tcp_listen_portaddr_hash hash table entries: 256 (order: 0, 4096 bytes, linear)
[ 0.130273] TCP established hash table entries: 2048 (order: 2, 16384 bytes, linear)
[ 0.131533] TCP bind hash table entries: 2048 (order: 3, 32768 bytes, linear)
[ 0.132629] TCP: Hash tables configured (established 2048 bind 2048)
[ 0.133867] UDP hash table entries: 256 (order: 1, 8192 bytes, linear)
[ 0.134737] UDP-Lite hash table entries: 256 (order: 1, 8192 bytes, linear)
[ 0.138491] Unpacking initramfs...
[ 0.346114] Initramfs unpacking failed: invalid magic at start of compressed archive
[ 0.395678] Freeing initrd memory: 8192K
[ 0.398889] workingset: timestamp_bits=62 max_order=16 bucket_order=0
[ 0.459122] Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
[ 0.459916] io scheduler mq-deadline registered
[ 0.460437] io scheduler kyber registered
[ 0.793762] NET: Registered protocol family 10
[ 0.799108] Segment Routing with IPv6
[ 0.800025] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver
[ 0.804881] NET: Registered protocol family 17
[ 0.809217] Freeing unused kernel memory: 176K
[ 0.809733] Kernel memory protection not selected by kernel config.
[ 0.810458] Run /init as init process
Starting syslogd: OK
Starting klogd: OK
Running sysctl: OK
Saving random seed: [ 1.568969] random: dd: uninitialized urandom read (512 bytes read)
OK
Starting network: OK
*/
2 changes: 2 additions & 0 deletions src/main/scala/vexiiriscv/fetch/FetchL1Plugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import vexiiriscv.Global._
import Fetch._
import spinal.core.fiber.{Handle, Retainer}
import spinal.lib.system.tag.PmaRegion
import vexiiriscv.execute.lsu.LsuCommitProbe
import vexiiriscv.riscv.CSR
import vexiiriscv.schedule.ReschedulePlugin

Expand All @@ -41,6 +42,7 @@ trait LsuService{
val invalidationRetainer = Retainer()
val invalidationPorts = ArrayBuffer[LsuL1InvalidationBus]()
def newInvalidationPort() = invalidationPorts.addRet(LsuL1InvalidationBus())
def lsuCommitProbe : Flow[LsuCommitProbe]
}

trait LsuL1Service{
Expand Down
4 changes: 4 additions & 0 deletions src/main/scala/vexiiriscv/soc/litex/Soc.scala
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,8 @@ object PythonArgsGen extends App{
|VexiiRiscv.with_rvf = ${withRvf.toInt}
|VexiiRiscv.with_rvd = ${withRvd.toInt}
|VexiiRiscv.with_rvc = ${withRvc.toInt}
|VexiiRiscv.with_lsu_software_prefetch = ${lsuSoftwarePrefetch.toInt}
|VexiiRiscv.with_lsu_hardware_prefetch = ${lsuHardwarePrefetch.toInt}
|VexiiRiscv.internal_bus_width = ${memDataWidth}
|""".stripMargin)
close()
Expand All @@ -353,6 +355,8 @@ object PythonArgsGen extends App{

/*
--lsu-software-prefetch --lsu-hardware-prefetch
# debian 4c
python3 -m litex_boards.targets.digilent_nexys_video --cpu-type=vexiiriscv --cpu-variant=debian --with-jtag-tap --bus-standard axi-lite \
--vexii-args="--performance-counters 9 --regfile-async --lsu-l1-store-buffer-ops=32 --lsu-l1-refill-count 2 --lsu-l1-writeback-count 2 --lsu-l1-store-buffer-slots=2" \
Expand Down

0 comments on commit aa38c72

Please sign in to comment.