From 9d4e67ab6d82ca4a799ef5e02d1f1d71dbb49bd4 Mon Sep 17 00:00:00 2001 From: Dolu1990 Date: Tue, 23 Apr 2024 17:08:28 +0200 Subject: [PATCH] Got FpuDiv to work --- src/main/scala/vexiiriscv/Param.scala | 1 + .../scala/vexiiriscv/execute/DivPlugin.scala | 34 +++++- .../vexiiriscv/execute/fpu/FpuAddPlugin.scala | 1 + .../execute/fpu/FpuAddSharedPlugin.scala | 12 +- .../vexiiriscv/execute/fpu/FpuDivPlugin.scala | 109 ++++++++++++++++++ .../vexiiriscv/execute/fpu/FpuMulPlugin.scala | 12 +- .../execute/fpu/FpuPackerPlugin.scala | 14 ++- .../execute/fpu/FpuSqrtPlugin.scala | 5 +- .../vexiiriscv/execute/fpu/Interface.scala | 9 ++ src/main/scala/vexiiriscv/misc/DivRadix.scala | 7 ++ .../scala/vexiiriscv/scratchpad/Synt.scala | 72 ++++++++---- 11 files changed, 227 insertions(+), 49 deletions(-) create mode 100644 src/main/scala/vexiiriscv/execute/fpu/FpuDivPlugin.scala diff --git a/src/main/scala/vexiiriscv/Param.scala b/src/main/scala/vexiiriscv/Param.scala index cdba1363..47f52c4c 100644 --- a/src/main/scala/vexiiriscv/Param.scala +++ b/src/main/scala/vexiiriscv/Param.scala @@ -664,6 +664,7 @@ class ParamSimple(){ plugins += new execute.fpu.FpuAddPlugin(early0) plugins += new execute.fpu.FpuMulPlugin(early0, withFma = !skipFma, fmaFullAccuracy = fpuFmaFullAccuracy) plugins += new execute.fpu.FpuSqrtPlugin(early0) + plugins += new execute.fpu.FpuDivPlugin(early0) plugins += new execute.fpu.FpuPackerPlugin(lane0) // plugins += new execute.fpu.FpuEmbedded() } diff --git a/src/main/scala/vexiiriscv/execute/DivPlugin.scala b/src/main/scala/vexiiriscv/execute/DivPlugin.scala index 4692ce94..c000f659 100644 --- a/src/main/scala/vexiiriscv/execute/DivPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/DivPlugin.scala @@ -4,7 +4,7 @@ import spinal.core._ import spinal.lib._ import spinal.lib.misc.pipeline._ import vexiiriscv.execute.RsUnsignedPlugin._ -import vexiiriscv.misc.{AdderAggregator, DivComp, DivRadix2, DivRadix, MulSpliter} +import vexiiriscv.misc.{AdderAggregator, DivComp, DivRadix, DivRadix2, DivRsp, MulSpliter} import vexiiriscv.riscv.Riscv._ import vexiiriscv.riscv._ @@ -17,14 +17,36 @@ object DivPlugin extends AreaObject { val DIV_REVERT_RESULT = Payload(Bool()) } +trait DivReuse{ + def divInject(layer : LaneLayer, at : Int, a : UInt, b : UInt, iterations : UInt) : Unit + def divRsp : DivRsp + def divRadix : Int +} + class DivPlugin(val layer : LaneLayer, var impl : (Int, Int, Boolean) => DivComp, var divAt: Int = 0, var writebackAt : Int = 0, var radix: Int = 2, - var area: Boolean = true) extends ExecutionUnitElementSimple(layer){ + var area: Boolean = true) extends ExecutionUnitElementSimple(layer) with DivReuse { import DivPlugin._ + + override def divInject(layer: LaneLayer, at: Int, a: UInt, b: UInt, interations : UInt): Unit = { + assert(layer == this.layer && at == this.divAt) + logic.processing.request := True + val divWidth = logic.processing.div.width + assert(divWidth >= widthOf(a)) + assert(divWidth >= widthOf(b)) + logic.processing.div.io.cmd.a := a.resized // << (divWidth-widthOf(a)) + logic.processing.div.io.cmd.b := b.resized // << (divWidth-widthOf(b)) + logic.processing.div.io.cmd.normalized := True + logic.processing.div.io.cmd.iterations := interations + } + + override def divRsp: DivRsp = logic.processing.div.io.rsp + override def divRadix: Int = radix + val logic = during setup new Logic { awaitBuild() @@ -59,14 +81,18 @@ class DivPlugin(val layer : LaneLayer, DIV_REVERT_RESULT := (RS1_REVERT ^ (RS2_REVERT && !REM)) && !(RS2_FORMATED === 0 && RS2_SIGNED && !REM) //RS2_SIGNED == RS1_SIGNED anyway val cmdSent = RegInit(False) setWhen (div.io.cmd.fire) clearWhen (isReady) - div.io.cmd.valid := isValid && SEL && !cmdSent + val request = isValid && SEL + div.io.cmd.valid := request && !cmdSent div.io.cmd.a := RS1_UNSIGNED.resized div.io.cmd.b := RS2_UNSIGNED.resized + div.io.cmd.normalized := False + div.io.cmd.iterations.assignDontCare() div.io.flush := isReady div.io.rsp.ready := False + val unscheduleRequest = RegNext(isCancel) clearWhen (isReady) init (False) - val freeze = isValid && SEL && !div.io.rsp.valid & !unscheduleRequest + val freeze = request && !div.io.rsp.valid & !unscheduleRequest el.freezeWhen(freeze) val selected = REM ? div.io.rsp.remain otherwise div.io.rsp.result diff --git a/src/main/scala/vexiiriscv/execute/fpu/FpuAddPlugin.scala b/src/main/scala/vexiiriscv/execute/fpu/FpuAddPlugin.scala index 2a470140..44ec536d 100644 --- a/src/main/scala/vexiiriscv/execute/fpu/FpuAddPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/fpu/FpuAddPlugin.scala @@ -60,6 +60,7 @@ class FpuAddPlugin(val layer : LaneLayer, addPort.cmd.roundMode := FpuUtils.ROUNDING addPort.cmd.hartId := Global.HART_ID addPort.cmd.uopId := Decode.UOP_ID + addPort.cmd.flags.clearAll() } buildBefore.release() diff --git a/src/main/scala/vexiiriscv/execute/fpu/FpuAddSharedPlugin.scala b/src/main/scala/vexiiriscv/execute/fpu/FpuAddSharedPlugin.scala index 805c3eef..043ed44d 100644 --- a/src/main/scala/vexiiriscv/execute/fpu/FpuAddSharedPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/fpu/FpuAddSharedPlugin.scala @@ -23,6 +23,7 @@ case class FpuAddSharedCmd(p1 : FloatUnpackedParam, p2 : FloatUnpackedParam, ats val roundMode = FpuRoundMode() val hartId = Global.HART_ID() val uopId = Decode.UOP_ID() + val flags = FpuFlags() } class FpuAddSharedPort(_cmd : FpuAddSharedCmd) extends Area{ @@ -47,8 +48,7 @@ class FpuAddSharedPlugin(lane: ExecuteLanePlugin, val logic = during setup new Area{ val fpp = host.find[FpuPackerPlugin](p => p.lane == lane) - val ffwbp = host.find[FpuFlagsWritebackPlugin](p => p.lane == lane) - val buildBefore = retains(lane.pipelineLock, fpp.elaborationLock, ffwbp.elaborationLock) + val buildBefore = retains(lane.pipelineLock, fpp.elaborationLock) val uopLock = retains(lane.uopLock) awaitBuild() val latency = packAt @@ -68,7 +68,6 @@ class FpuAddSharedPlugin(lane: ExecuteLanePlugin, val uopsAt = mutable.LinkedHashMap[Int, ArrayBuffer[UopLayerSpec]]() for (port <- ports; (uop, at) <- port.uopsAt) uopsAt.getOrElseUpdate(at, ArrayBuffer[UopLayerSpec]()) += uop - val flagsWb = ffwbp.createPort(uopsAt.keys.toList) val packAts = uopsAt.keys.map(_ + latency).toList val packPort = fpp.createPort(packAts, packParam) for ((at, uops) <- uopsAt) { @@ -93,6 +92,7 @@ class FpuAddSharedPlugin(lane: ExecuteLanePlugin, val FORMAT = insert(reader(_.format)) val ROUNDMODE = insert(reader(_.roundMode)) val RDN = insert( ROUNDMODE === FpuRoundMode.RDN) + val FLAGS = insert(reader(_.flags)) Global.HART_ID := reader(_.hartId) Decode.UOP_ID := reader(_.uopId) valid := reader.oh.orR @@ -117,10 +117,6 @@ class FpuAddSharedPlugin(lane: ExecuteLanePlugin, val onPack = new pip.Area(packAt) { val mask = inserter.GROUP_OH.andMask(isValid) - flagsWb.ats := mask - flagsWb.flags.clearAll() - flagsWb.flags.NV.setWhen(adder.result.NV) - packPort.cmd.at := mask packPort.cmd.value.mode := adder.result.RESULT.mode packPort.cmd.value.quiet := adder.result.RESULT.quiet @@ -131,6 +127,8 @@ class FpuAddSharedPlugin(lane: ExecuteLanePlugin, packPort.cmd.roundMode := inserter.ROUNDMODE packPort.cmd.hartId := Global.HART_ID packPort.cmd.uopId := Decode.UOP_ID + packPort.cmd.flags := inserter.FLAGS + packPort.cmd.flags.NV.setWhen(adder.result.NV) ready := !lane.isFreezed() } diff --git a/src/main/scala/vexiiriscv/execute/fpu/FpuDivPlugin.scala b/src/main/scala/vexiiriscv/execute/fpu/FpuDivPlugin.scala new file mode 100644 index 00000000..a0ac0f63 --- /dev/null +++ b/src/main/scala/vexiiriscv/execute/fpu/FpuDivPlugin.scala @@ -0,0 +1,109 @@ +package vexiiriscv.execute.fpu + +import spinal.core._ +import spinal.lib.misc.pipeline._ +import spinal.lib.misc.plugin.FiberPlugin +import vexiiriscv.Global +import vexiiriscv.decode.Decode +import vexiiriscv.execute._ +import vexiiriscv.execute.fpu.FpuUtils.FORMAT +import vexiiriscv.riscv._ + + +class FpuDivPlugin(val layer : LaneLayer, + var exeAt : Int = 0, + var packAt : Int = 0) extends FiberPlugin{ + val p = FpuUtils + + val SEL = Payload(Bool()) + + val logic = during setup new Area{ + val fup = host[FpuUnpackerPlugin] + val fpp = host[FpuPackerPlugin] + val dr = host[DivReuse] + val buildBefore = retains(layer.el.pipelineLock) + val uopLock = retains(layer.el.uopLock, fup.elaborationLock, fpp.elaborationLock) + awaitBuild() + + val packParam = FloatUnpackedParam( + exponentMax = p.unpackedConfig.exponentMax-p.unpackedConfig.exponentMin, + exponentMin = p.unpackedConfig.exponentMin-p.unpackedConfig.exponentMax-1, + mantissaWidth = p.unpackedConfig.mantissaWidth+2 + ) + val packPort = fpp.createPort(List(packAt), packParam) + + layer.el.setDecodingDefault(SEL, False) + def add(uop: MicroOp, decodings: (Payload[_ <: BaseType], Any)*) = { + val spec = layer.add(uop) + spec.addDecoding(SEL -> True) + spec.addDecoding(decodings) + uop.resources.foreach { + case RfResource(_, rs: RfRead) => fup.unpack(uop, rs) + case _ => + } + packPort.uopsAt += spec -> packAt + } + + add(Rvfd.FDIV_S, FORMAT -> FpuFormat.FLOAT) + if(Riscv.RVD) { + add(Rvfd.FDIV_D, FORMAT -> FpuFormat.DOUBLE) + } + + uopLock.release() + + + val RS1_FP = fup(RS1) + val RS2_FP = fup(RS2) + val internalMantissaSize = p.mantissaWidth + val iterationsWished = 1+internalMantissaSize+2+1 + val drift = iterationsWished % log2Up(dr.divRadix) + val interations = iterationsWished + drift + + val onExecute = new layer.Execute(exeAt) { + when(isValid && SEL) { + dr.divInject(layer, exeAt, U(B"1" ## RS1_FP.mantissa.raw), U(B"1" ## RS2_FP.mantissa.raw), interations-1) + } + val DIVIDER_RSP = insert(dr.divRsp.result(drift+1, iterationsWished bits) | U(dr.divRsp.remain.orR || dr.divRsp.result(0, drift+1 bits).orR).resized) + } + import onExecute.DIVIDER_RSP + + val onPack = new layer.Execute(packAt) { + val needShift = !DIVIDER_RSP.msb + val mantissa = needShift.mux(DIVIDER_RSP(0, internalMantissaSize+2 bits), DIVIDER_RSP(1, internalMantissaSize+2 bits) | U(DIVIDER_RSP(0)).resized) + val exponent = RS1_FP.exponent - RS2_FP.exponent - AFix(U(needShift)) + + packPort.cmd.at(0) := isValid && SEL + packPort.cmd.value.setNormal + packPort.cmd.value.quiet := False + packPort.cmd.value.sign := RS1_FP.sign ^ RS2_FP.sign + packPort.cmd.value.exponent := exponent + packPort.cmd.value.mantissa := mantissa + packPort.cmd.format := FORMAT + packPort.cmd.roundMode := FpuUtils.ROUNDING + packPort.cmd.hartId := Global.HART_ID + packPort.cmd.uopId := Decode.UOP_ID + + val forceOverflow = RS1_FP.isInfinity || RS2_FP.isZero + val infinitynan = RS1_FP.isZero && RS2_FP.isZero || RS1_FP.isInfinity && RS2_FP.isInfinity + val forceNan = RS1_FP.isNan || RS2_FP.isNan || infinitynan + val forceZero = RS1_FP.isZero || RS2_FP.isInfinity + + packPort.cmd.flags.NX := False + packPort.cmd.flags.UF := False + packPort.cmd.flags.OF := False + packPort.cmd.flags.DZ := !forceNan && !RS1_FP.isInfinity && RS2_FP.isZero + packPort.cmd.flags.NV := False + + when(forceNan) { + packPort.cmd.value.setNanQuiet + packPort.cmd.flags.NV setWhen((infinitynan || RS1_FP.isNanSignaling || RS2_FP.isNanSignaling)) + } elsewhen(forceOverflow) { + packPort.cmd.value.setInfinity + } elsewhen(forceZero) { + packPort.cmd.value.setZero + } + } + + buildBefore.release() + } +} diff --git a/src/main/scala/vexiiriscv/execute/fpu/FpuMulPlugin.scala b/src/main/scala/vexiiriscv/execute/fpu/FpuMulPlugin.scala index d09e2c12..4d7cca5e 100644 --- a/src/main/scala/vexiiriscv/execute/fpu/FpuMulPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/fpu/FpuMulPlugin.scala @@ -26,14 +26,11 @@ class FpuMulPlugin(val layer : LaneLayer, val fup = host[FpuUnpackerPlugin] val fpp = host[FpuPackerPlugin] val fasp = host[FpuAddSharedPlugin] - val ffwp = host[FpuFlagsWritebackPlugin] val mp = host[MulReuse] - val buildBefore = retains(layer.el.pipelineLock, ffwp.elaborationLock) + val buildBefore = retains(layer.el.pipelineLock) val uopLock = retains(layer.el.uopLock, fup.elaborationLock, fpp.elaborationLock) awaitBuild() - val flagsWb = ffwp.createPort(List(packAt)) - val packParam = FloatUnpackedParam( exponentMax = p.unpackedConfig.exponentMax * 2 + 1, exponentMin = p.unpackedConfig.exponentMin * 2, @@ -130,13 +127,12 @@ class FpuMulPlugin(val layer : LaneLayer, import norm._ val onPack = new layer.el.Execute(packAt) { - flagsWb.ats(0) := isValid && SEL - flagsWb.flags.clearAll() + val nv = False val mode = FloatMode.NORMAL() when(FORCE_NAN) { mode := FloatMode.NAN - flagsWb.flags.NV setWhen ((INFINITY_NAN || RS1_FP.isNanSignaling || RS2_FP.isNanSignaling)) + nv setWhen ((INFINITY_NAN || RS1_FP.isNanSignaling || RS2_FP.isNanSignaling)) }.elsewhen(FORCE_OVERFLOW) { mode := FloatMode.INF }.elsewhen(FORCE_ZERO) { @@ -153,6 +149,7 @@ class FpuMulPlugin(val layer : LaneLayer, packPort.cmd.roundMode := FpuUtils.ROUNDING packPort.cmd.hartId := Global.HART_ID packPort.cmd.uopId := Decode.UOP_ID + packPort.cmd.flags.assign(NV = nv) if(withFma) { addPort.cmd.at(0) := isValid && SEL && FMA @@ -166,6 +163,7 @@ class FpuMulPlugin(val layer : LaneLayer, addPort.cmd.roundMode := FpuUtils.ROUNDING addPort.cmd.hartId := Global.HART_ID addPort.cmd.uopId := Decode.UOP_ID + addPort.cmd.flags.assign(NV = nv) } } diff --git a/src/main/scala/vexiiriscv/execute/fpu/FpuPackerPlugin.scala b/src/main/scala/vexiiriscv/execute/fpu/FpuPackerPlugin.scala index 5a354358..530e2cee 100644 --- a/src/main/scala/vexiiriscv/execute/fpu/FpuPackerPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/fpu/FpuPackerPlugin.scala @@ -22,6 +22,7 @@ case class FpuPackerCmd(p : FloatUnpackedParam, ats : Seq[Int]) extends Bundle{ val roundMode = FpuRoundMode() val hartId = Global.HART_ID() val uopId = Decode.UOP_ID() + val flags = FpuFlags() } class FpuPackerPort(_cmd : FpuPackerCmd) extends Area{ @@ -58,7 +59,7 @@ class FpuPackerPlugin(val lane: ExecuteLanePlugin, val wbPorts = mutable.LinkedHashMap[Int, Flow[Bits]]() val uopsAt = mutable.LinkedHashMap[Int, ArrayBuffer[UopLayerSpec]]() for(port <- ports; (uop, at) <- port.uopsAt) uopsAt.getOrElseUpdate(at, ArrayBuffer[UopLayerSpec]()) += uop - val flagsWb = ffwbp.createPort(uopsAt.keys.toList) + val flagsWb = ffwbp.createPort(uopsAt.keys.map(_ + latency).toList) for((at, uops) <- uopsAt) { val port = wbp.createPort(at+latency).setName("FpuPackerPlugin_wb_at_" + at) wbPorts(at) = port @@ -89,6 +90,7 @@ class FpuPackerPlugin(val lane: ExecuteLanePlugin, val VALUE = insert(OhMux.or(reader.oh.asBits, remapped.toSeq)) val FORMAT = insert(reader(_.format)) val ROUNDMODE = insert(reader(_.roundMode)) + val FLAGS = insert(reader(_.flags)) Global.HART_ID := reader(_.hartId) Decode.UOP_ID := reader(_.uopId) valid := reader.oh.orR @@ -248,11 +250,11 @@ class FpuPackerPlugin(val lane: ExecuteLanePlugin, val csr = host[FpuCsrPlugin] flagsWb.ats := GROUP_OH.andMask(valid) - flagsWb.flags.NX := nx - flagsWb.flags.UF := uf - flagsWb.flags.OF := of - flagsWb.flags.DZ := False - flagsWb.flags.NV := False + flagsWb.flags.NX := FLAGS.NX || nx + flagsWb.flags.UF := FLAGS.UF || uf + flagsWb.flags.OF := FLAGS.OF || of + flagsWb.flags.DZ := FLAGS.DZ + flagsWb.flags.NV := FLAGS.NV p.whenDouble(FORMAT) { diff --git a/src/main/scala/vexiiriscv/execute/fpu/FpuSqrtPlugin.scala b/src/main/scala/vexiiriscv/execute/fpu/FpuSqrtPlugin.scala index 3a91cf12..6266ef21 100644 --- a/src/main/scala/vexiiriscv/execute/fpu/FpuSqrtPlugin.scala +++ b/src/main/scala/vexiiriscv/execute/fpu/FpuSqrtPlugin.scala @@ -78,7 +78,6 @@ class FpuSqrtPlugin(val layer : LaneLayer, packPort.cmd.roundMode := FpuUtils.ROUNDING packPort.cmd.hartId := Global.HART_ID packPort.cmd.uopId := Decode.UOP_ID - packPort.cmd.value.setNormal packPort.cmd.value.quiet := False packPort.cmd.value.sign := RS1_FP.sign @@ -89,7 +88,7 @@ class FpuSqrtPlugin(val layer : LaneLayer, packPort.cmd.value.setInfinity } - val NV = False //TODO FPU FLAG + val NV = False when(negative) { packPort.cmd.value.setNanQuiet NV := True @@ -101,6 +100,8 @@ class FpuSqrtPlugin(val layer : LaneLayer, when(RS1_FP.isZero) { packPort.cmd.value.setZero } + + packPort.cmd.flags.assign(NV = NV) } buildBefore.release() diff --git a/src/main/scala/vexiiriscv/execute/fpu/Interface.scala b/src/main/scala/vexiiriscv/execute/fpu/Interface.scala index c35a8ce5..777ac483 100644 --- a/src/main/scala/vexiiriscv/execute/fpu/Interface.scala +++ b/src/main/scala/vexiiriscv/execute/fpu/Interface.scala @@ -130,6 +130,15 @@ case class FpuFlags() extends Bundle{ List(NX, UF, OF, DZ, NV).foreach(_ := False) } + def assign(NX : Bool = False, UF : Bool = False, OF : Bool = False, DZ : Bool = False, NV : Bool = False) = { + this.NX := NX + this.UF := UF + this.OF := OF + this.DZ := DZ + this.NV := NV + this + } + def |(that : FpuFlags) = { val ret = FpuFlags() ret.NX := this.NX | that.NX diff --git a/src/main/scala/vexiiriscv/misc/DivRadix.scala b/src/main/scala/vexiiriscv/misc/DivRadix.scala index 3debf906..6522133c 100644 --- a/src/main/scala/vexiiriscv/misc/DivRadix.scala +++ b/src/main/scala/vexiiriscv/misc/DivRadix.scala @@ -9,6 +9,8 @@ import spinal.lib._ case class DivCmd(width : Int) extends Bundle{ val a,b = UInt(width bits) + val normalized = Bool() + val iterations = UInt(log2Up(width) bits) } case class DivRsp(width : Int) extends Bundle{ @@ -96,6 +98,11 @@ class DivRadix(width : Int, radix : Int) extends DivComp(width) { numerator := io.cmd.a |<< shift } } + when(io.cmd.normalized){ + counter := iterations-1-(io.cmd.iterations >> log2Up(radixBits)) + shifter := io.cmd.a + numerator := 0 + } } when(io.flush){ diff --git a/src/test/scala/vexiiriscv/scratchpad/Synt.scala b/src/test/scala/vexiiriscv/scratchpad/Synt.scala index cb7c080d..4fee39df 100644 --- a/src/test/scala/vexiiriscv/scratchpad/Synt.scala +++ b/src/test/scala/vexiiriscv/scratchpad/Synt.scala @@ -54,56 +54,66 @@ object IntegrationSynthBench extends App{ // add("no fpu") { p => // p.fetchL1Enable = true // p.lsuL1Enable = true -// p.lsuL1Sets = 64 -// p.lsuL1Ways = 1 // p.relaxedBranch = true // p.withMul = true +// p.withDiv = true +// p.skipFma = true +// p.divArea = false // } -// -// + + add("no fpu with bypass") { p => + p.fetchL1Enable = true + p.lsuL1Enable = true + p.relaxedBranch = true + p.withMul = true + p.withDiv = true + p.skipFma = true + p.divArea = false + p.allowBypassFrom = 0 + } + + // // add("with fpu no fma") { p => // p.fetchL1Enable = true // p.lsuL1Enable = true -// p.lsuL1Sets = 64 -// p.lsuL1Ways = 1 // p.relaxedBranch = true // p.withRvf = true -// p.withRvf = true // p.withMul = true +// p.withDiv = true // p.skipFma = true +// p.divArea = false // } // // add("with fpu inaccurate fma") { p => // p.fetchL1Enable = true // p.lsuL1Enable = true -// p.lsuL1Sets = 64 -// p.lsuL1Ways = 1 // p.relaxedBranch = true // p.withRvf = true -// p.withRvf = true // p.withMul = true +// p.withDiv = true // p.fpuFmaFullAccuracy = false +// p.divArea = false // } // add("with fpu") { p => // p.fetchL1Enable = true // p.lsuL1Enable = true -// p.lsuL1Sets = 64 -// p.lsuL1Ways = 1 // p.relaxedBranch = true // p.withRvf = true // p.withMul = true +// p.withDiv = true +// p.divArea = false +// } +// +// add("with fpu bypass") { p => +// p.fetchL1Enable = true +// p.lsuL1Enable = true +// p.relaxedBranch = true +// p.withRvf = true +// p.withMul = true +// p.withDiv = true +// p.allowBypassFrom = 0 +// p.divArea = false // } - - add("with fpu bypass") { p => - p.fetchL1Enable = true - p.lsuL1Enable = true - p.lsuL1Sets = 64 - p.lsuL1Ways = 1 - p.relaxedBranch = true - p.withRvf = true - p.withMul = true - p.allowBypassFrom = 0 - } // add ("fmax") { p => @@ -1070,4 +1080,20 @@ with_fpu_bypass -> Artix 7 -> 90 Mhz 3773 LUT 2996 FF Artix 7 -> 183 Mhz 4158 LUT 3029 FF +no_fpu -> +Artix 7 -> 90 Mhz 1725 LUT 1715 FF +Artix 7 -> 206 Mhz 1950 LUT 1745 FF +with_fpu_no_fma -> +Artix 7 -> 90 Mhz 3048 LUT 2855 FF +Artix 7 -> 181 Mhz 3478 LUT 2871 FF +with_fpu_inaccurate_fma -> +Artix 7 -> 90 Mhz 3317 LUT 2985 FF +Artix 7 -> 180 Mhz 3689 LUT 3022 FF +with_fpu -> +Artix 7 -> 90 Mhz 3504 LUT 3097 FF +Artix 7 -> 184 Mhz 3898 LUT 3149 FF +with_fpu_bypass -> +Artix 7 -> 90 Mhz 4217 LUT 3227 FF +Artix 7 -> 185 Mhz 4634 LUT 3264 FF + */ \ No newline at end of file