Skip to content

Commit

Permalink
Fix DispatchPlugin to allow late-alu to work when the pipeline is lon…
Browse files Browse the repository at this point in the history
…ger than the late RS usages
  • Loading branch information
Dolu1990 committed May 30, 2024
1 parent b426896 commit 102cc5a
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 52 deletions.
4 changes: 2 additions & 2 deletions src/main/scala/vexiiriscv/Param.scala
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,14 @@ class ParamSimple(){
divRadix = 2
decoders = 2
lanes = 2
// withLateAlu = true
withLateAlu = true
withMul = true
withDiv = true
withDispatcherBuffer = true
withAlignerBuffer = true
// withRvc = true
withRva = true
// withRvf = true
withRvf = true
// withRvd = true
withMmu = true
privParam.withSupervisor = true
Expand Down
14 changes: 8 additions & 6 deletions src/main/scala/vexiiriscv/decode/DecoderPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -234,18 +234,20 @@ class DecoderPlugin(var decodeAt : Int) extends FiberPlugin with DecoderService
rfaRd.ENABLE clearWhen (rdZero)
}

val microOpDecoding = new Area {
for ((key, spec) <- decodingSpecs) {
key.assignFromBits(spec.build(Decode.INSTRUCTION, encodings.all).asBits)
}
}

Decode.UOP := Decode.INSTRUCTION

val uopIdBase = harts.map(_.uopId).read(decodeCtrl.link(Global.HART_ID))
Decode.UOP_ID := uopIdBase + laneId
}


decodingLock.await()
val laneDecoding = for(laneId <- 0 until Decode.LANES) yield new decodeCtrl.LaneArea(laneId) {
for ((key, spec) <- decodingSpecs) {
key.assignFromBits(spec.build(Decode.INSTRUCTION, encodings.all).asBits)
}
}

buildBefore.release()
}
}
1 change: 1 addition & 0 deletions src/main/scala/vexiiriscv/decode/Service.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import scala.collection.mutable.ArrayBuffer

trait DecoderService {
val elaborationLock = Retainer()
val decodingLock = Retainer()
def covers() : Seq[Masked] //List of all instruction implemented

def addMicroOpDecoding[T <: BaseType](microOp: MicroOp, key : Payload[T], value: T) : Unit = addMicroOpDecoding(microOp, DecodeList(key -> value))
Expand Down
17 changes: 12 additions & 5 deletions src/main/scala/vexiiriscv/execute/Service.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer


case class RdSpec(DATA: Payload[Bits],
case class RdSpec(rf : RegfileSpec,
DATA: Payload[Bits],
broadcastedFrom : Int,
rfReadableFrom : Int)

case class RsSpec(rs : RfRead){
case class RsSpec(rf : RegfileSpec, rs : RfRead){
var from = 0
}

Expand Down Expand Up @@ -65,12 +66,18 @@ class UopLayerSpec(val uop: MicroOp, val elImpl : LaneLayer, val el : ExecuteLan

def addRsSpec(rfRead : RfRead, executeAt : Int) = {
assert(!rs.contains(rfRead))
val rsSpec = rs.getOrElseUpdate(rfRead, new RsSpec(rfRead))
val rf = uop.resources.collectFirst{
case r : RfResource if r.access == rfRead => r.rf
}.get
val rsSpec = rs.getOrElseUpdate(rfRead, new RsSpec(rf, rfRead))
rsSpec.from = executeAt + el.executeAt
}
def setRdSpec(data: Payload[Bits], broadcastedFrom : Int, rfReadableFrom : Int): Unit = {
assert(rd.isEmpty)
rd = Some(RdSpec(data, broadcastedFrom + el.executeAt, rfReadableFrom + el.executeAt))
val rf = uop.resources.collectFirst {
case r: RfResource if r.access == RD => r.rf
}.get
rd = Some(RdSpec(rf, data, broadcastedFrom + el.executeAt, rfReadableFrom + el.executeAt))
}

def addDecoding(head: (Payload[_ <: BaseType], Any), tail: (Payload[_ <: BaseType], Any)*): Unit = addDecoding(head :: tail.toList)
Expand Down Expand Up @@ -127,7 +134,7 @@ trait ExecuteLaneService extends Area{
def apply(r: RfResource) = getStageable(r)
// def getSpec(op : MicroOp) : MicroOpSpec

def getRdBroadcastedFromMax() = getUopLayerSpec().flatMap(s => s.rd.map(v => v.broadcastedFrom)).max
def getRdBroadcastedFromMax(regFiles : Seq[RegfileSpec]) = getUopLayerSpec().filter(e => e.rd.nonEmpty && regFiles.contains(e.rd.get.rf) ).flatMap(s => s.rd.map(v => v.broadcastedFrom)).max
def getRfReadableAtMax() = getUopLayerSpec().flatMap(s => s.rd.map(v => v.rfReadableFrom)).max

val LAYER_SEL = Payload(Bits(log2Up(getLayers.size) bits))
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/vexiiriscv/misc/PrivilegedPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class PrivilegedPlugin(val p : PrivilegedParam, val hartIds : Seq[Int]) extends
CODE_WIDTH.set((4 +: causesWidthMins).max)

assert(HART_COUNT.get == 1)
api.get

val rdtime = in UInt (64 bits)
val harts = for (hartId <- 0 until HART_COUNT) yield new Area {
Expand Down Expand Up @@ -182,6 +183,7 @@ class PrivilegedPlugin(val p : PrivilegedParam, val hartIds : Seq[Int]) extends
bus.running := running
bus.halted := !running
bus.unavailable := BufferCC(ClockDomain.current.isResetActive)

when(debugMode) {
inhibateInterrupts(hartId)
}
Expand Down
105 changes: 66 additions & 39 deletions src/main/scala/vexiiriscv/schedule/DispatchPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ import spinal.lib.misc.plugin.FiberPlugin
import vexiiriscv.Global
import vexiiriscv.Global.{HART_COUNT, TRAP}
import vexiiriscv.decode.{AccessKeys, Decode, DecodePipelinePlugin, DecoderService}
import vexiiriscv.execute.{Execute, ExecuteLanePlugin, ExecuteLaneService, ExecutePipelinePlugin, LaneLayer}
import vexiiriscv.execute.{Execute, ExecuteLanePlugin, ExecuteLaneService, ExecutePipelinePlugin, LaneLayer, UopLayerSpec}
import vexiiriscv.misc.{CommitService, InflightService, PipelineBuilderPlugin, TrapService}
import vexiiriscv.regfile.RegfileService
import vexiiriscv.riscv.{MicroOp, RD, RfRead, RfResource}
import vexiiriscv.riscv.{MicroOp, RD, RegfileSpec, RfAccess, RfRead, RfResource}

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
Expand Down Expand Up @@ -63,7 +63,7 @@ class DispatchPlugin(var dispatchAt : Int,
val buildBefore = retains(
List(pbp.elaborationLock, dpp.elaborationLock, eupp.pipelineLock) ++ host.list[ExecuteLaneService].map(_.pipelineLock)
)
val dpRetains = retains(dp.elaborationLock)
val dpRetains = retains(dp.decodingLock)
val tsRetains = retains(ts.trapLock)
awaitBuild()

Expand Down Expand Up @@ -148,8 +148,6 @@ class DispatchPlugin(var dispatchAt : Int,
}
}

dpRetains.release()

hmKeys.add(Global.PC)
hmKeys.add(Global.TRAP)
hmKeys.add(Decode.UOP_ID)
Expand All @@ -159,6 +157,47 @@ class DispatchPlugin(var dispatchAt : Int,
hmKeys.add(ac.PHYS)
}


val rfaReads = Decode.rfaKeys.filter(_._1.isInstanceOf[RfRead])

case class HazardChecker(ll: LaneLayer, llId: Int) extends Area {
// Identify which RS are used by the pipeline
val resources = ll.uops.keySet.flatMap(_.resources).distinctLinked
val readAccess = rfaReads.filter(e => resources.exists {
case RfResource(_, e) => true
case _ => false
}).values

val onRs = for (rs <- readAccess) yield new Area {
val self = rs

val uopsOnRs = ArrayBuffer[(MicroOp, Int)]()
val readAts = mutable.LinkedHashSet[Int]()
val regFiles = mutable.LinkedHashSet[RegfileSpec]()
val rfa = rs.rfa.asInstanceOf[RfRead]
for (uop <- ll.uops.values) {
uop.rs.get(rfa).foreach { v =>
val from = ll.lane.rfReadHazardFrom(v.from)
uopsOnRs += uop.uop -> from
readAts += from
regFiles += v.rf
}
}
val regFilesList = regFiles.toArray

val readAtsSorted: List[Int] = readAts.toList.sortWith(_ < _)
val chunks = readAtsSorted.zip((readAtsSorted.map(_ - 1) :+ 100).tail)
val ENABLES = for ((cFrom, cTo) <- chunks) yield {
val en = Payload(Bool())
for ((uop, from) <- uopsOnRs) dp.addMicroOpDecoding(uop, en, Bool(from <= cFrom))
hmKeys += en
en
}
}
}

val hcs = for((ll, llId) <- lanesLayers.zipWithIndex) yield new HazardChecker(ll, llId)

case class MicroOpCtx() extends Bundle{
val valid = Bool()
val laneLayerHits = Bits(lanesLayers.size bits)
Expand Down Expand Up @@ -193,6 +232,7 @@ class DispatchPlugin(var dispatchAt : Int,
val age = Execute.LANE_AGE()
val moving = !ctx.valid || fire || cancel
}
dpRetains.release()

for((c,i) <- candidates.zipWithIndex){
c.age := CountOne(candidates.take(i).map(o => o.ctx.valid && o.ctx.hartId === c.ctx.hartId)).resize(Execute.LANE_AGE_WIDTH)
Expand All @@ -204,49 +244,36 @@ class DispatchPlugin(var dispatchAt : Int,
bypassedSpecs.getOrElseUpdate(el -> at, BypassedSpec(el, at, Payload(Bool()).setName("BYPASSED_AT_" + at))).value
}

val rfaReads = Decode.rfaKeys.filter(_._1.isInstanceOf[RfRead])
val rsHazardChecker = for(c <- candidates) yield new Area {
val onLl = for((ll, llId) <- lanesLayers.zipWithIndex) yield new Area {
// Identify which RS are used by the pipeline
val resources = ll.uops.keySet.flatMap(_.resources).distinctLinked
val readAccess = rfaReads.filter(e => resources.exists{
case RfResource(_, e) => true
case _ => false
}).values
val hazardUntilMax = eus.map(_.getRdBroadcastedFromMax()).max

val onRs = for (rs <- readAccess) yield new Area {
val onLl = for(hc <- hcs) yield new Area{
val onRs = for(rs <- hc.onRs) yield new Area{
val hazards = ArrayBuffer[Bool]()
val decodeSpec = ArrayBuffer[(Masked, Masked)]()
for(uop <- ll.uops.values){
uop.rs.get(rs.rfa.asInstanceOf[RfRead]).foreach{v =>
decodeSpec += Masked(uop.uop.key) -> (v.from >= hazardUntilMax).mux(Masked.one, Masked.zero)
}
}
val skip = Symplify(c.ctx.uop, decodeSpec, 1).as(Bool()) /// Overall, hazardUntilMax doesn't work when using the FPU as the pipeline get longer, need a proper implementation to enable late RS use

for (spec <- bypassedSpecs.values) yield new Area {
for (l <- spec.el.getLayers(); uop <- l.uops.values) {
uop.rd.foreach { rd =>
uop.addDecoding(spec.value -> Bool(rd.broadcastedFrom <= spec.at))
val onChunk = for(((cFrom, cTo), enable) <- rs.chunks.zip(rs.ENABLES)){
for (writeEu <- eus if writeEu.getUopLayerSpec().flatMap(_.rd).map(_.rf).distinctLinked.intersect(rs.regFiles).nonEmpty) {
val hazardRange = cFrom to (writeEu.getRdBroadcastedFromMax(rs.regFilesList) - 1 min cTo)
val offset = rs.chunks.head._1 - 1
assert(hc.ll.lane.rfReadAt == 0, "else need less bypass at the end")
// println(s"${hc.ll.name} ${rs.self.getName()} ${writeEu.laneName} $hazardRange offset=$offset")
for (id <- hazardRange) {
assert(id-offset >= 1)
val node = writeEu.ctrl(id-offset) //id - hazardFrom + 1
hazards += c.ctx.hm(enable) && node.up(rdKeys.ENABLE) && node.up(rdKeys.PHYS) === c.ctx.hm(rs.self.PHYS) && node.up(rdKeys.RFID) === c.ctx.hm(rs.self.RFID) && !node(getBypassed(writeEu, id))
}
}
}
for(writeEu <- eus) {
val hazardFrom = ll.lane.rfReadHazardFrom(ll.getRsUseAtMin()) // This is a pessimistic aproach
val hazardUntil = writeEu.getRdBroadcastedFromMax()
val hazardRange = hazardFrom until hazardUntil
for(id <- hazardRange) {
val node = writeEu.ctrl(id-hazardFrom+1)
hazards += node.up(rdKeys.ENABLE) && node.up(rdKeys.PHYS) === c.ctx.hm(rs.PHYS) && node.up(rdKeys.RFID) === c.ctx.hm(rs.RFID) && !node(getBypassed(writeEu, id)) // node.isValid &&
}
}
val hazard = c.ctx.hm(rs.ENABLE) && hazards.orR && !skip
val hazard = c.ctx.hm(rs.self.ENABLE) && hazards.orR
}
c.rsHazards(llId) := onRs.map(_.hazard).orR
c.rsHazards(hc.llId) := onRs.map(_.hazard).orR
}
}

for (spec <- bypassedSpecs.values) yield new Area {
for (l <- spec.el.getLayers(); uop <- l.uops.values) {
uop.rd.foreach { rd =>
uop.addDecoding(spec.value -> Bool(rd.broadcastedFrom <= spec.at))
}
}
}

val reservationChecker = for (c <- candidates) yield new Area {
val onLl = for ((ll, llId) <- lanesLayers.zipWithIndex) yield new Area {
Expand Down

0 comments on commit 102cc5a

Please sign in to comment.