From 4245d87e433979234af3766b2cffe37f7a97dc4c Mon Sep 17 00:00:00 2001 From: "msc23h24 Diyou Shen (dishen)" Date: Wed, 22 Nov 2023 10:49:04 +0100 Subject: [PATCH] WIP: mempool-spatz: 1. Port spatz into mempool 2. Add new configuration file for mempool-spatz 3. Various bug fixes Note: spatz accelerator is not tested yet. --- Bender.lock | 23 +++++++++++++++++++++++ Bender.yml | 2 +- Makefile | 8 ++++++-- config/mempool.mk | 2 ++ config/mempool_spatz4.mk | 6 ++++-- hardware/Makefile | 18 +++++++++++++----- hardware/deps/snitch/Bender.yml | 2 +- hardware/deps/snitch/src/snitch_ipu.sv | 4 ++++ hardware/deps/snitch/src/snitch_md.sv | 2 ++ hardware/deps/snitch/src/snitch_pkg.sv | 4 ++++ hardware/src/mempool_pkg.sv | 24 +++++++++++++++++------- hardware/src/mempool_tile.sv | 7 ++++--- 12 files changed, 81 insertions(+), 21 deletions(-) diff --git a/Bender.lock b/Bender.lock index c505479c7..47131d0be 100644 --- a/Bender.lock +++ b/Bender.lock @@ -69,6 +69,14 @@ packages: dependencies: - axi - common_cells + riscv-dbg: + revision: null + version: null + source: + Path: hardware/deps/riscv-dbg + dependencies: + - common_cells + - tech_cells_generic snitch: revision: ~ version: ~ @@ -76,6 +84,21 @@ packages: Path: hardware/deps/snitch dependencies: - common_cells + spatz: + revision: efec12edf249a7fc8819b00ca5452c553f446675 + version: null + source: + Git: git@iis-git.ee.ethz.ch:spatz/spatz.git + dependencies: + - axi + - axi_riscv_atomics + - common_cells + - fpnew + - idma + - register_interface + - reqrsp_interface + - riscv-dbg + - tech_cells_generic tech_cells_generic: revision: 78e817972307bcb9f6c77d18d64480ef595f03c9 version: 0.2.5 diff --git a/Bender.yml b/Bender.yml index b67fd6e93..1bb4af7f2 100644 --- a/Bender.yml +++ b/Bender.yml @@ -14,7 +14,7 @@ dependencies: reqrsp_interface: { path: "hardware/deps/reqrsp_interface" } snitch: { path: "hardware/deps/snitch" } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.5 } - # spatz: { git: "git@iis-git.ee.ethz.ch:spatz/spatz.git", rev: 14d06547c3ac3e20a19880a02cdb43f79213c8c7 } + spatz: { git: "git@iis-git.ee.ethz.ch:spatz/spatz.git", rev: efec12e } FPnew: { git: "https://github.com/pulp-platform/cvfpu.git", rev: pulp-v0.1.3 } workspace: diff --git a/Makefile b/Makefile index 8bf01a62a..62dd2ede3 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# Copyright 2021 ETH Zurich and University of Bologna. + # Copyright 2021 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 @@ -53,7 +53,10 @@ else endif ifeq ($(spatz), 1) - OPCODES := "opcodes-frep_CUSTOM opcodes-rvv opcodes-smallfloat" + RV32XPULPIMG := opcodes-xpulpabs_CUSTOM opcodes-xpulpbr_CUSTOM opcodes-xpulpclip_CUSTOM opcodes-xpulpmacsi_CUSTOM opcodes-xpulpminmax_CUSTOM opcodes-xpulpslet_CUSTOM + RV32XPULPIMG += opcodes-xpulpbitop_CUSTOM + MEMPOOL_ISA := opcodes-frep_CUSTOM $(RV32XPULPIMG) opcodes-xpulppostmod_CUSTOM + OPCODES := "$(MEMPOOL_ISA) opcodes-rvv opcodes-smallfloat" endif # Default target @@ -182,6 +185,7 @@ endif hardware/deps/snitch/src/riscv_instr.sv: toolchain/riscv-opcodes/* ifeq ($(spatz), 1) MY_OPCODES=$(OPCODES) make -C toolchain/riscv-opcodes inst.sverilog +# make -C toolchain/riscv-opcodes inst.sverilog else make -C toolchain/riscv-opcodes inst.sverilog endif diff --git a/config/mempool.mk b/config/mempool.mk index ec2c34154..2eb9e80c0 100644 --- a/config/mempool.mk +++ b/config/mempool.mk @@ -25,3 +25,5 @@ axi_hier_radix ?= 20 # Number of AXI masters per group axi_masters_per_group ?= 1 + +xpulpimg ?= 0 diff --git a/config/mempool_spatz4.mk b/config/mempool_spatz4.mk index aec5d18ea..185784120 100644 --- a/config/mempool_spatz4.mk +++ b/config/mempool_spatz4.mk @@ -21,7 +21,7 @@ num_cores_per_tile ?= 1 banking_factor ?= 4 # Radix for hierarchical AXI interconnect -axi_hier_radix ?= 16 +axi_hier_radix ?= 20 # Number of AXI masters per group axi_masters_per_group ?= 1 @@ -30,10 +30,12 @@ axi_masters_per_group ?= 1 spatz ?= 1 # Lenght of single vector register -vlen ?= 512 +vlen ?= 256 # Number of IPUs n_ipu ?= 4 +n_fpu ?= 0 + # Deactivate the XpulpIMG extension xpulpimg ?= 0 \ No newline at end of file diff --git a/hardware/Makefile b/hardware/Makefile index 2faf4d255..2ce6f25af 100644 --- a/hardware/Makefile +++ b/hardware/Makefile @@ -47,6 +47,8 @@ verilator_top ?= mempool_tb_verilator python ?= python3 # Enable tracing snitch_trace ?= 0 +# Enable spatz (automatically set when using spatz configuration) +spatz ?= 0 # Check if the specified QuestaSim version exists ifeq (, $(shell which $(questa_cmd))) @@ -90,14 +92,20 @@ vlog_args += -work $(library) vlog_defs += -DNUM_CORES=$(num_cores) -DNUM_CORES_PER_TILE=$(num_cores_per_tile) -DNUM_GROUPS=$(num_groups) -DBANKING_FACTOR=$(banking_factor) vlog_defs += -DL2_BASE=$(l2_base) -DL2_SIZE=$(l2_size) -DL2_BANKS=$(l2_banks) vlog_defs += -DL1_BANK_SIZE=$(l1_bank_size) -vlog_defs += -DBOOT_ADDR=$(boot_addr) -DXPULPIMG=$(xpulpimg) +vlog_defs += -DBOOT_ADDR=$(boot_addr) -DXPULPIMG=$(xpulpimg) -DRVV=$(spatz) vlog_defs += -DSNITCH_TRACE=$(snitch_trace) +vlog_defs += -DRVF=$(rvf) -DRVD=$(rvd) vlog_defs += -DAXI_DATA_WIDTH=$(axi_data_width) vlog_defs += -DRO_LINE_WIDTH=$(ro_line_width) vlog_defs += -DDMAS_PER_GROUP=$(dmas_per_group) vlog_defs += -DAXI_HIER_RADIX=$(axi_hier_radix) -DAXI_MASTERS_PER_GROUP=$(axi_masters_per_group) vlog_defs += -DSEQ_MEM_SIZE=$(seq_mem_size) -DXQUEUE_SIZE=$(xqueue_size) +ifeq ($(spatz), 1) + vlog_defs += -DVLEN=$(vlen) -DN_IPU=$(n_ipu) -DN_FPU=$(n_fpu) -DN_FU=$(shell awk 'BEGIN{print ($(n_ipu) > $(n_fpu)) ? $(n_ipu) : $(n_fpu)}') + bender_defs += -t spatz +endif + # Traffic generation enabled ifdef tg tg_ncycles ?= 10000 @@ -140,7 +148,7 @@ $(buildpath)/$(library): .PHONY: compile compile: dpi lib $(buildpath) $(buildpath)/compile.tcl update_opcodes $(buildpath)/compile.tcl: $(bender) $(config_mk) Makefile $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f) - $(bender) script vsim --vlog-arg="$(vlog_args)" $(vlog_defs) -t rtl -t mempool_vsim > $(buildpath)/compile.tcl + $(bender) script vsim --vlog-arg="$(vlog_args)" $(vlog_defs) -t rtl -t mempool_vsim $(bender_defs) > $(buildpath)/compile.tcl echo "exit" >> $(buildpath)/compile.tcl cd $(buildpath) && $(questa_cmd) vsim -work $(library) -c -do compile.tcl @@ -177,7 +185,7 @@ $(buildpath)/$(dpi_library)/mempool_dpi.so: $(dpi) .PHONY: elabvcs elabvcs: dpivcs $(buildpath) $(buildpath)/compilevcs.sh update_opcodes $(buildpath)/compilevcs.sh: $(bender) $(config_mk) Makefile $(MEMPOOL_DIR)/Bender.yml $(shell find {src,tb,deps} -type f) - $(bender) script vcs --vlogan-bin="$(vcs_cmd) vlogan" --vlog-arg="$(vlogan_args)" $(vlog_defs) -t rtl -t mempool_vsim > $(buildpath)/compilevcs.sh + $(bender) script vcs --vlogan-bin="$(vcs_cmd) vlogan" --vlog-arg="$(vlogan_args)" $(vlog_defs) -t rtl -t mempool_vsim $(bender_defs) > $(buildpath)/compilevcs.sh echo "exit" >> $(buildpath)/compilevcs.sh # Call VCS cd $(buildpath) && \ @@ -251,7 +259,7 @@ $(VERILATOR_MK): $(VERILATOR_CONF) $(VERILATOR_WAIVE) $(MEMPOOL_DIR)/Bender.yml # Overwrite Bootaddress to L2 base while we don't have a DPI to write a wake-up $(eval boot_addr=$(l2_base)) # Create Bender script of all RTL files - $(bender) script verilator $(vlog_defs) -t rtl -t mempool_verilator > $(verilator_files) + $(bender) script verilator $(vlog_defs) -t rtl -t mempool_verilator $(bender_defs) > $(verilator_files) # Append the verilator library files @echo '' >> $(verilator_files) # Append the verilator library files: Includes @@ -283,7 +291,7 @@ lint: spyglass/tmp/files spyglass/sdc/func.sdc spyglass/scripts/run_lint.tcl spyglass/tmp/files: $(bender) mkdir -p spyglass/tmp - $(bender) script verilator $(vlog_defs) -t rtl -t mempool_verilator > spyglass/tmp/files + $(bender) script verilator $(vlog_defs) -t rtl -t mempool_verilator $(bender_defs) > spyglass/tmp/files ################ # Tracing # diff --git a/hardware/deps/snitch/Bender.yml b/hardware/deps/snitch/Bender.yml index 8a3d0752b..5b0597a64 100644 --- a/hardware/deps/snitch/Bender.yml +++ b/hardware/deps/snitch/Bender.yml @@ -21,7 +21,7 @@ sources: - src/snitch_axi_pkg.sv - src/snitch_icache/snitch_icache_pkg.sv # rest of RTL - - src/snitch.sv + # - src/snitch.sv - src/snitch_md.sv - src/snitch_regfile_ff.sv # - src/snitch_regfile_latch.sv diff --git a/hardware/deps/snitch/src/snitch_ipu.sv b/hardware/deps/snitch/src/snitch_ipu.sv index 931eeba3e..053a07d17 100644 --- a/hardware/deps/snitch/src/snitch_ipu.sv +++ b/hardware/deps/snitch/src/snitch_ipu.sv @@ -72,6 +72,7 @@ module snitch_ipu #( div_valid_op = acc_qvalid_i; acc_qready_o = div_ready_op; end +`ifdef XPULPIMG_EXTENSION riscv_instr::P_ABS, // Xpulpimg: p.abs riscv_instr::P_SLET, // Xpulpimg: p.slet riscv_instr::P_SLETU, // Xpulpimg: p.sletu @@ -228,6 +229,7 @@ module snitch_ipu #( illegal_instruction = 1'b1; end end +`endif default: illegal_instruction = 1'b1; endcase end @@ -458,6 +460,7 @@ module dspu #( mac_op = MulHigh; res_sel = Mac; end +`ifdef XPULPIMG_EXTENSION // Instructions from Xpulpimg riscv_instr::P_ABS: begin cmp_op_b_sel = Zero; @@ -1299,6 +1302,7 @@ module dspu #( simd_mode = High; res_sel = Simd; end +`endif default: ; endcase end diff --git a/hardware/deps/snitch/src/snitch_md.sv b/hardware/deps/snitch/src/snitch_md.sv index 4b0e6ae96..f12c356e4 100644 --- a/hardware/deps/snitch/src/snitch_md.sv +++ b/hardware/deps/snitch/src/snitch_md.sv @@ -1350,6 +1350,7 @@ module snitch_md /* end of RVV extension */ /* Xpulpimg extension */ +`ifdef XPULPIMG_EXTENSION // Post-increment loads/stores riscv_instr::P_LB_IRPOST: begin // Xpulpimg: p.lb rd,iimm(rs1!) if (snitch_pkg::XPULPIMG) begin @@ -1893,6 +1894,7 @@ module snitch_md illegal_inst = 1'b1; end end +`endif /* end of Xpulpimg extension */ /* Floating point instructions */ diff --git a/hardware/deps/snitch/src/snitch_pkg.sv b/hardware/deps/snitch/src/snitch_pkg.sv index 99f5418fa..d736955c6 100644 --- a/hardware/deps/snitch/src/snitch_pkg.sv +++ b/hardware/deps/snitch/src/snitch_pkg.sv @@ -14,8 +14,12 @@ package snitch_pkg; localparam int NumIntOutstandingLoads = `ifdef TRAFFIC_GEN 2048 `else 8 `endif; localparam MetaIdWidth = idx_width(NumIntOutstandingLoads); // Xpulpimg extension enabled? +`ifdef XPULPIMG + localparam bit XPULPIMG_EXTENSION = 1'b1; +`endif localparam bit XPULPIMG = `ifdef XPULPIMG `XPULPIMG `else 1'bX `endif; + typedef logic [31:0] addr_t; typedef logic [DataWidth-1:0] data_t; typedef logic [StrbWidth-1:0] strb_t; diff --git a/hardware/src/mempool_pkg.sv b/hardware/src/mempool_pkg.sv index 881d1d58e..b8bf9b2e3 100644 --- a/hardware/src/mempool_pkg.sv +++ b/hardware/src/mempool_pkg.sv @@ -43,15 +43,20 @@ package mempool_pkg; /*********************** * MEMORY PARAMETERS * ***********************/ + // Element widths + localparam integer unsigned XLEN = 32; // Snitch always operates on 32 bit integers + localparam integer unsigned FLEN = RVD ? 64 : 32; + localparam integer unsigned ELEN = RVD ? 64 : 32; localparam integer unsigned AddrWidth = 32; - localparam integer unsigned DataWidth = 32; + localparam integer unsigned DataWidth = ELEN; localparam integer unsigned BeWidth = DataWidth / 8; localparam integer unsigned ByteOffset = $clog2(BeWidth); localparam integer unsigned BankingFactor = `ifdef BANKING_FACTOR `BANKING_FACTOR `else 0 `endif; localparam bit LrScEnable = 1'b1; localparam integer unsigned TCDMSizePerBank = `ifdef L1_BANK_SIZE `L1_BANK_SIZE `else 0 `endif; - localparam integer unsigned NumBanks = NumCores * BankingFactor; + // localparam integer unsigned NumBanks = NumCores * BankingFactor; + localparam integer unsigned NumBanks = NumCores * NumFUsPerCore * BankingFactor; localparam integer unsigned NumBanksPerTile = NumBanks / NumTiles; localparam integer unsigned NumBanksPerGroup = NumBanks / NumGroups; localparam integer unsigned TCDMAddrMemWidth = $clog2(TCDMSizePerBank / mempool_pkg::BeWidth); @@ -144,16 +149,20 @@ package mempool_pkg; * INSTRUCTION CACHE * ***********************/ - localparam int unsigned ICacheSizeByte = 512 * NumCoresPerCache; // Total Size of instruction cache in bytes - localparam int unsigned ICacheSets = NumCoresPerCache / 2; // Number of sets - localparam int unsigned ICacheLineWidth = 32 * 2 * NumCoresPerCache; // Size of each cache line in bits + // localparam int unsigned ICacheSizeByte = 512 * NumCoresPerCache; // Total Size of instruction cache in bytes + // localparam int unsigned ICacheSets = NumCoresPerCache / 2; // Number of sets + // localparam int unsigned ICacheLineWidth = 32 * 2 * NumCoresPerCache; // Size of each cache line in bits + localparam int unsigned ICacheSizeByte = 512 * NumFUsPerTile; // Total Size of instruction cache in bytes + localparam int unsigned ICacheSets = NumFUsPerTile; // Number of sets + localparam int unsigned ICacheLineWidth = 32 * 2 * NumFUsPerTile; // Size of each cache line in bits /********************* * READ-ONLY CACHE * *********************/ localparam int unsigned AxiHierRadix = `ifdef AXI_HIER_RADIX `AXI_HIER_RADIX `else NumTilesPerGroup `endif; - localparam int unsigned ROCacheLineWidth = `ifdef RO_LINE_WIDTH `RO_LINE_WIDTH `else 0 `endif; + // localparam int unsigned ROCacheLineWidth = `ifdef RO_LINE_WIDTH `RO_LINE_WIDTH `else 0 `endif; + localparam int unsigned ROCacheLineWidth = `ifdef RO_LINE_WIDTH `RO_LINE_WIDTH `else ICacheLineWidth `endif; localparam int unsigned ROCacheSizeByte = 8192; localparam int unsigned ROCacheSets = 2; @@ -213,7 +222,8 @@ package mempool_pkg; typedef logic [TCDMAddrMemWidth-1:0] bank_addr_t; typedef logic [TCDMAddrMemWidth+idx_width(NumBanksPerTile)-1:0] tile_addr_t; typedef logic [MetaIdWidth-1:0] meta_id_t; - typedef logic [idx_width(NumCoresPerTile)-1:0] tile_core_id_t; + // typedef logic [idx_width(NumCoresPerTile)-1:0] tile_core_id_t; + typedef logic [idx_width(NumCoresPerTile * NumDataPortsPerCore)-1:0] tile_core_id_t; typedef logic [idx_width(NumTilesPerGroup)-1:0] tile_group_id_t; typedef logic [idx_width(NumGroups)-1:0] group_id_t; typedef logic [3:0] amo_t; diff --git a/hardware/src/mempool_tile.sv b/hardware/src/mempool_tile.sv index 5b1e76e13..3557b21d1 100644 --- a/hardware/src/mempool_tile.sv +++ b/hardware/src/mempool_tile.sv @@ -123,17 +123,18 @@ module mempool_tile `ifndef TARGET_SPATZ mempool_cc #( .BootAddr (BootAddr) - ) + ) `else spatz_mempool_cc #( .BootAddr ( BootAddr ), - .RVE ( RVE ), - .RVM ( RVM ), + .RVE ( 0 ), + .RVM ( 1 ), .NumMemPortsPerSpatz ( NumMemPortsPerSpatz ), .TCDMPorts ( NumDataPortsPerCore ) ) `endif riscv_core ( + .clk_i (clk_i ), .rst_i (!rst_ni ), .hart_id_i (hart_id ),