Skip to content

Commit

Permalink
WIP [MemPool-Spatz][FPU]
Browse files Browse the repository at this point in the history
1. Add MemPool-Spatz and MinPool-Spatz FPU configurations
2. WIP: Add FPU support for these configurations.
3. Various bug fix.
  • Loading branch information
msc23h24 Diyou Shen (dishen) committed Dec 1, 2023
1 parent fca9e9b commit e447292
Show file tree
Hide file tree
Showing 22 changed files with 1,888 additions and 62 deletions.
2 changes: 1 addition & 1 deletion Bender.lock
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ packages:
dependencies:
- common_cells
spatz:
revision: 51c88bfa3287b0206165d5edb4a31a2c3e23ab94
revision: c8e444c6b64e2b905f4767d158fa3a8c718fef39
version: null
source:
Git: git@iis-git.ee.ethz.ch:spatz/spatz.git
Expand Down
3 changes: 2 additions & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies:
reqrsp_interface: { path: "hardware/deps/reqrsp_interface" }
snitch: { path: "hardware/deps/snitch" }
tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.5 }
spatz: { git: "git@iis-git.ee.ethz.ch:spatz/spatz.git", rev: 51c88bf }
spatz: { git: "git@iis-git.ee.ethz.ch:spatz/spatz.git", rev: c8e444c }
FPnew: { git: "https://github.com/pulp-platform/cvfpu.git", rev: pulp-v0.1.3 }

workspace:
Expand All @@ -30,6 +30,7 @@ sources:
- hardware/src/mempool_cc.sv
- hardware/src/snitch_addr_demux.sv
- hardware/src/tcdm_adapter.sv
- hardware/src/tcdm_id_remapper.sv
- hardware/src/tcdm_shim.sv
- hardware/src/tcdm_wide_narrow_mux.sv
- hardware/src/address_scrambler.sv
Expand Down
2 changes: 1 addition & 1 deletion config/mempool_spatz4.mk
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ n_ipu ?= 4
n_fpu ?= 0

# Deactivate the XpulpIMG extension
xpulpimg ?= 0
xpulpimg ?= 0
43 changes: 43 additions & 0 deletions config/mempool_spatz4_fpu.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2021 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

# Author: Matheus Cavalcante, ETH Zurich

###############
## MemPool ##
###############

# Number of cores
num_cores ?= 64

# Number of groups
num_groups ?= 4

# Number of cores per MemPool tile
num_cores_per_tile ?= 1

# L1 scratchpad banking factor
banking_factor ?= 4

# Radix for hierarchical AXI interconnect
axi_hier_radix ?= 20

# Number of AXI masters per group
axi_masters_per_group ?= 1

# Activate Spatz and RVV
spatz ?= 1

# Lenght of single vector register
vlen ?= 512

# Number of IPUs
n_ipu ?= 4

n_fpu ?= 4

# Deactivate the XpulpIMG extension
xpulpimg ?= 0

rvf ?= 1
53 changes: 53 additions & 0 deletions config/minpool_spatz4.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright 2021 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

# Author: Matheus Cavalcante, ETH Zurich

###############
## MemPool ##
###############

# Number of cores
num_cores ?= 4

# Number of groups
num_groups ?= 4

# Number of cores per MemPool tile
num_cores_per_tile ?= 1

# L1 scratchpad banking factor
banking_factor ?= 4

#########################
## AXI configuration ##
#########################
# AXI bus data width (in bits)
axi_data_width ?= 256

# Read-only cache line width in AXI interconnect (in bits)
ro_line_width ?= 256

# Number of DMA backends in each group
dmas_per_group ?= 1

# Radix for hierarchical AXI interconnect
axi_hier_radix ?= 2

# Number of AXI masters per group
axi_masters_per_group ?= 1

# Activate Spatz and RVV
spatz ?= 1

# Lenght of single vector register
vlen ?= 512

# Number of IPUs
n_ipu ?= 4

n_fpu ?= 0

# Deactivate the XpulpIMG extension
xpulpimg ?= 0
55 changes: 55 additions & 0 deletions config/minpool_spatz4_fpu.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright 2021 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0

# Author: Matheus Cavalcante, ETH Zurich

###############
## MemPool ##
###############

# Number of cores
num_cores ?= 4

# Number of groups
num_groups ?= 4

# Number of cores per MemPool tile
num_cores_per_tile ?= 1

# L1 scratchpad banking factor
banking_factor ?= 4

#########################
## AXI configuration ##
#########################
# AXI bus data width (in bits)
axi_data_width ?= 256

# Read-only cache line width in AXI interconnect (in bits)
ro_line_width ?= 256

# Number of DMA backends in each group
dmas_per_group ?= 1

# Radix for hierarchical AXI interconnect
axi_hier_radix ?= 2

# Number of AXI masters per group
axi_masters_per_group ?= 1

# Activate Spatz and RVV
spatz ?= 1

# Lenght of single vector register
vlen ?= 512

# Number of IPUs
n_ipu ?= 4

n_fpu ?= 4

rvf ?= 1

# Deactivate the XpulpIMG extension
xpulpimg ?= 0
25 changes: 18 additions & 7 deletions hardware/deps/snitch/src/snitch_md.sv
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,21 @@ module snitch_md
/// AXI-like handshaking.
/// Same IDs need to be handled in-order.
output logic [31:0] acc_qaddr_o,
`ifdef TARGET_SPATZ
output logic [5:0] acc_qid_o, // what should be the id width, 5 or 6?
input logic [5:0] acc_pid_i,
`else
output logic [4:0] acc_qid_o, // what should be the id width, 5 or 6?
input logic [4:0] acc_pid_i,
`endif
output logic [31:0] acc_qdata_op_o,
output logic [31:0] acc_qdata_arga_o,
output logic [31:0] acc_qdata_argb_o,
output logic [31:0] acc_qdata_argc_o,
output logic acc_qvalid_o,
input logic acc_qready_i,
input logic [31:0] acc_pdata_i,
input logic [4:0] acc_pid_i,
input logic acc_pwrite_i,
input logic acc_perror_i,
input logic acc_pvalid_i,
output logic acc_pready_o,
Expand Down Expand Up @@ -2111,7 +2117,8 @@ module snitch_md
riscv_instr::FEQ_S,
riscv_instr::FMV_X_S,
riscv_instr::FCVT_W_S,
riscv_instr::FCVT_WU_S: begin
riscv_instr::FCVT_WU_S,
riscv_instr::FMV_X_W: begin
if (FP_EN && RVF) begin
acc_register_rd = 1'b1;
acc_qvalid_o = valid_instr;
Expand Down Expand Up @@ -2179,7 +2186,8 @@ module snitch_md
end

// Single Precision Floating-Point
riscv_instr::FMV_S_X,
// riscv_instr::FMV_S_X,
riscv_instr::FMV_W_X,
riscv_instr::FCVT_S_W,
riscv_instr::FCVT_S_WU: begin
if (FP_EN && RVF) begin
Expand Down Expand Up @@ -2767,7 +2775,7 @@ module snitch_md
gpr_we[0] = 1'b1;
gpr_waddr[0] = lsu_rd;
gpr_wdata[0] = ld_result[31:0];
end else if (acc_pvalid_i) begin
end else if (acc_pvalid_i & acc_pwrite_i) begin
// if we are not retiring another instruction retire the accelerated one now
retire_acc = 1'b1;
gpr_we[0] = 1'b1;
Expand All @@ -2789,7 +2797,8 @@ module snitch_md
gpr_wdata[1] = ld_result[31:0];
// external interfaces
// Snitch and LSU have priority
lsu_pready = 1'b1;
// lsu_pready = 1'b1;
lsu_pready = 1'b0;
acc_pready_o = 1'b0;
retire_acc = 1'b0;
retire_load = 1'b0;
Expand All @@ -2799,7 +2808,8 @@ module snitch_md
if (lsu_pvalid) begin
retire_load = 1'b1;
gpr_we[1] = 1'b1;
end else if (acc_pvalid_i) begin
lsu_pready = 1'b1;
end else if (acc_pvalid_i & acc_pwrite_i) begin
retire_acc = 1'b1;
gpr_we[1] = 1'b1;
gpr_waddr[1] = acc_pid_i;
Expand All @@ -2808,7 +2818,7 @@ module snitch_md
end
// if we are not retiring another instruction retire the load now
end else begin
if (acc_pvalid_i) begin
if (acc_pvalid_i & acc_pwrite_i) begin
retire_acc = 1'b1;
gpr_we[0] = 1'b1;
gpr_waddr[0] = acc_pid_i;
Expand All @@ -2818,6 +2828,7 @@ module snitch_md
if (lsu_pvalid) begin
retire_load = 1'b1;
gpr_we[1] = 1'b1;
lsu_pready = 1'b1;
end
end
end
Expand Down
102 changes: 51 additions & 51 deletions hardware/scripts/questa/wave.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -34,54 +34,54 @@ for {set core 0} {$core < [examine -radix dec mempool_pkg::NumCoresPerTile]} {i
}

# Add specific cores from different tiles
do ../scripts/questa/wave_core.tcl 1 0 0

# Add groups
for {set group 0} {$group < [examine -radix dec /mempool_pkg::NumGroups]} {incr group} {
# Add tiles
for {set tile 0} {$tile < [expr min(4,[examine -radix dec /mempool_pkg::NumTilesPerGroup])]} {incr tile} {
do ../scripts/questa/wave_tile.tcl $group $tile
}

# Interconnects
for {set tgtgroup 0} {$tgtgroup < [examine -radix dec /mempool_pkg::NumGroups]} {incr tgtgroup} {
if {$tgtgroup != $group} {
set interco_idx [expr $group ^ $tgtgroup]
add wave -group group_[$group] -group interconnect_to_group[$tgtgroup] /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/gen_remote_interco[$interco_idx]/i_remote_interco/*
}
}
add wave -group group_[$group] -group interconnect_local /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_local_interco/*
}

# Add cluster
do ../scripts/questa/wave_cluster.tcl

add wave -Group Control_Registers /mempool_tb/dut/i_ctrl_registers/*

add wave -Group DMA /mempool_tb/dut/i_mempool_dma/*
add wave -Group DMA -Group Reg /mempool_tb/dut/i_mempool_dma/i_mempool_dma_frontend_reg_top/*
for {set group 0} {$group < [examine -radix dec /mempool_pkg::NumGroups]} {incr group} {
add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/NoMstPorts
add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/DmaRegionWidth
add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/DmaRegionStart
add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/DmaRegionEnd
add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/DmaRegionAddressBits
add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/FullRegionAddressBits
add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/*
for {set dma 0} {$dma < [examine -radix dec /mempool_pkg::NumDmasPerGroup]} {incr dma} {
add wave -Group DMA_${group}_${dma} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/gen_dmas[$dma]/i_axi_dma_backend/*
}
}

add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/NoMstPorts
add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/DmaRegionWidth
add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/DmaRegionStart
add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/DmaRegionEnd
add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/DmaRegionAddressBits
add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/FullRegionAddressBits
add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/*


add wave -Group DMA_split /mempool_tb/dut/i_mempool_cluster/i_idma_split_midend/*

do ../scripts/questa/wave_cache.tcl 0 0 0
do ../scripts/questa/wave_core.tcl 3 0 0

# # Add groups
# for {set group 0} {$group < [examine -radix dec /mempool_pkg::NumGroups]} {incr group} {
# # Add tiles
# for {set tile 0} {$tile < [expr min(4,[examine -radix dec /mempool_pkg::NumTilesPerGroup])]} {incr tile} {
# do ../scripts/questa/wave_tile.tcl $group $tile
# }

# # Interconnects
# for {set tgtgroup 0} {$tgtgroup < [examine -radix dec /mempool_pkg::NumGroups]} {incr tgtgroup} {
# if {$tgtgroup != $group} {
# set interco_idx [expr $group ^ $tgtgroup]
# add wave -group group_[$group] -group interconnect_to_group[$tgtgroup] /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/gen_remote_interco[$interco_idx]/i_remote_interco/*
# }
# }
# add wave -group group_[$group] -group interconnect_local /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_local_interco/*
# }

# # Add cluster
# do ../scripts/questa/wave_cluster.tcl

# add wave -Group Control_Registers /mempool_tb/dut/i_ctrl_registers/*

# add wave -Group DMA /mempool_tb/dut/i_mempool_dma/*
# add wave -Group DMA -Group Reg /mempool_tb/dut/i_mempool_dma/i_mempool_dma_frontend_reg_top/*
# for {set group 0} {$group < [examine -radix dec /mempool_pkg::NumGroups]} {incr group} {
# add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/NoMstPorts
# add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/DmaRegionWidth
# add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/DmaRegionStart
# add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/DmaRegionEnd
# add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/DmaRegionAddressBits
# add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/FullRegionAddressBits
# add wave -Group DMA_midend_${group} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/i_idma_distributed_midend/*
# for {set dma 0} {$dma < [examine -radix dec /mempool_pkg::NumDmasPerGroup]} {incr dma} {
# add wave -Group DMA_${group}_${dma} /mempool_tb/dut/i_mempool_cluster/gen_groups[$group]/i_group/gen_dmas[$dma]/i_axi_dma_backend/*
# }
# }

# add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/NoMstPorts
# add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/DmaRegionWidth
# add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/DmaRegionStart
# add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/DmaRegionEnd
# add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/DmaRegionAddressBits
# add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/FullRegionAddressBits
# add wave -Group DMA_midend_cluster /mempool_tb/dut/i_mempool_cluster/i_idma_distributed_midend/*


# add wave -Group DMA_split /mempool_tb/dut/i_mempool_cluster/i_idma_split_midend/*

# do ../scripts/questa/wave_cache.tcl 0 0 0
Loading

0 comments on commit e447292

Please sign in to comment.