From 4fc2c92683ac0afbda687d7a3a8dc93220867dc6 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Wed, 24 Jan 2024 16:51:19 +0100 Subject: [PATCH 1/7] Allow questasim to return exit code in case of errors. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2c75e3a8..73b062ea 100644 --- a/Makefile +++ b/Makefile @@ -107,7 +107,7 @@ $(library): compile: $(library) $(dpi) $(dpi-library)/cl_dpi.so @test -f Bender.lock || { echo "ERROR: Bender.lock file does not exist. Did you run make checkout in bender mode?"; exit 1; } @test -f scripts/compile.tcl || { echo "ERROR: scripts/compile.tcl file does not exist. Did you run make scripts in bender mode?"; exit 1; } - $(VSIM) -c -do 'source scripts/compile.tcl; quit' + $(VSIM) -c -do 'quit -code [source scripts/compile.tcl]' build: compile $(dpi) $(VOPT) $(compile_flag) -suppress 3053 -suppress 8885 -work $(library) $(top_level) -o $(top_level)_optimized +acc -check_synthesis From 578cb13b85f46487d9cf6ceb40275879628257ed Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Wed, 24 Jan 2024 16:51:56 +0100 Subject: [PATCH 2/7] Add FPU wrapper as a unit within PULP cluster. --- Bender.yml | 1 + rtl/fpu_wrap.sv | 114 +++++++++++++++++++++++ rtl/pulp_cluster.sv | 216 +++++++++++++++++++++++++------------------- 3 files changed, 237 insertions(+), 94 deletions(-) create mode 100644 rtl/fpu_wrap.sv diff --git a/Bender.yml b/Bender.yml index 15d35abb..e0c2c881 100644 --- a/Bender.yml +++ b/Bender.yml @@ -72,6 +72,7 @@ sources: - rtl/cluster_peripherals.sv - rtl/data_periph_demux.sv - rtl/core_demux_wrap.sv + - rtl/fpu_wrap.sv # Level 2 - rtl/core_region.sv - target: simulation diff --git a/rtl/fpu_wrap.sv b/rtl/fpu_wrap.sv new file mode 100644 index 00000000..5cba5e50 --- /dev/null +++ b/rtl/fpu_wrap.sv @@ -0,0 +1,114 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +module fpu_wrap + import riscv_defines::*; +#( + parameter int unsigned DataWidth = 32, + parameter int unsigned FpuNumOperands = 3, + parameter int unsigned FpuOpcodeWidth = 6, + parameter int unsigned FpuInFlagsWidth = 15, + parameter int unsigned FpuOutFlagsWidth = 5, + parameter int unsigned FpuFmtBits = fpnew_pkg::FP_FORMAT_BITS, + parameter int unsigned FpuIntFmtBits = fpnew_pkg::INT_FORMAT_BITS, + parameter int unsigned FpuRoundBits = 3, + parameter int unsigned FpuOpBits = fpnew_pkg::OP_BITS, + parameter int unsigned FpuDivSqrt = 0 +)( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + input logic [31:0] hart_id_i, + // APU Side: Master port + input logic fpu_req_i, + output logic fpu_gnt_o, + // request channel + input logic [FpuNumOperands-1:0][DataWidth-1:0] fpu_operands_i, + input logic [FpuOpcodeWidth-1:0] fpu_op_i, + input logic [FpuInFlagsWidth-1:0] fpu_flags_i, + // response channel + output logic fpu_valid_o, + output logic [DataWidth-1:0] fpu_result_o, + output logic [FpuOutFlagsWidth-1:0] fpu_flags_o +); + +localparam fpnew_pkg::unit_type_t C_DIV = FpuDivSqrt ? fpnew_pkg::MERGED : + fpnew_pkg::DISABLED; + +logic fpu_op_mod; +logic fpu_vec_op; +logic [FpuOpBits-1:0] fpu_op; + +logic [FpuFmtBits-1:0] dst_fmt; +logic [FpuFmtBits-1:0] src_fmt; +logic [FpuIntFmtBits-1:0] int_fmt; +logic [FpuRoundBits-1:0] fp_rnd_mode; + +assign {fpu_vec_op, fpu_op_mod, fpu_op} = fpu_op_i; +assign {int_fmt, src_fmt, dst_fmt, fp_rnd_mode} = fpu_flags_i; + +// ----------- +// FPU Config +// ----------- +// Features (enabled formats, vectors etc.) +localparam fpnew_pkg::fpu_features_t FpuFeatures = '{ + Width: C_FLEN, + EnableVectors: C_XFVEC, + EnableNanBox: 1'b0, + FpFmtMask: {C_RVF, C_RVD, C_XF16, C_XF8, C_XF16ALT, C_XF8ALT}, + IntFmtMask: {C_XFVEC && (C_XF8 || C_XF8ALT), + C_XFVEC && (C_XF16 || C_XF16ALT), 1'b1, 1'b0} +}; + +// Implementation (number of registers etc) +localparam fpnew_pkg::fpu_implementation_t FpuImplementation = '{ + PipeRegs: '{// FP32, FP64, FP16, FP8, FP16alt, FP8alt + '{C_LAT_FP32, C_LAT_FP64, + C_LAT_FP16, C_LAT_FP8 , + C_LAT_FP16ALT, C_LAT_FP8ALT}, // ADDMUL + '{default: C_LAT_DIVSQRT}, // DIVSQRT + '{default: C_LAT_NONCOMP}, // NONCOMP + '{default: C_LAT_CONV }, // CONV + '{default: C_LAT_DOTP }}, // SDOTP + UnitTypes: '{'{default: fpnew_pkg::MERGED}, // ADDMUL + '{default: C_DIV}, // DIVSQRT + '{default: fpnew_pkg::PARALLEL}, // NONCOMP + '{default: fpnew_pkg::MERGED}, // CONV + '{default: fpnew_pkg::DISABLED}}, // SDOTP + PipeConfig: fpnew_pkg::BEFORE +}; + +//--------------- +// FPU instance +//--------------- +fpnew_top #( + .Features ( FpuImplementation ), + .Implementation ( FpuFeatures ), + .TagType ( logic ) +) i_fpnew ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .hart_id_i ( hart_id_i ), + .operands_i ( fpu_operands_i ), + .rnd_mode_i ( fpnew_pkg::roundmode_e'(fp_rnd_mode) ), + .op_i ( fpnew_pkg::operation_e'(fpu_op) ), + .op_mod_i ( fpu_op_mod ), + .src_fmt_i ( fpnew_pkg::fp_format_e'(src_fmt) ), + .dst_fmt_i ( fpnew_pkg::fp_format_e'(dst_fmt) ), + .int_fmt_i ( fpnew_pkg::int_format_e'(int_fmt) ), + .vectorial_op_i ( fpu_vec_op ), + .tag_i ( '0 ), + .simd_mask_i ( '1 ), + .in_valid_i ( fpu_req_i ), + .in_ready_o ( fpu_gnt_o ), + .flush_i ( '0 ), + .result_o ( fpu_result_o ), + .status_o ( fpu_flags_o ), + .tag_o ( ), + .out_valid_o ( fpu_valid_o ), + .out_ready_i ( 1'b1 ), + .busy_o ( ) +); + +endmodule: fpu_wrap diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 81db335b..c31e6022 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -69,7 +69,8 @@ module pulp_cluster parameter BOOT_ADDR = 32'h78000000, parameter INSTR_RDATA_WIDTH = 32, - parameter CLUST_FPU = 0, + parameter bit CLUST_FPU = 1, + parameter int unsigned NumFpu = NB_CORES, parameter CLUST_FP_DIVSQRT = 0, parameter CLUST_SHARED_FP = 0, parameter CLUST_SHARED_FP_DIVSQRT = 0, @@ -424,18 +425,18 @@ XBAR_PERIPH_BUS s_core_euctrl_bus[NB_CORES-1:0](); // apu-interconnect // handshake signals -logic [NB_CORES-1:0] s_apu_master_req; -logic [NB_CORES-1:0] s_apu_master_gnt; +logic [NB_CORES-1:0] fpu_master_req; +logic [NB_CORES-1:0] fpu_master_gnt; // request channel -logic [NB_CORES-1:0][APU_NARGS_CPU-1:0][31:0] s_apu_master_operands; -logic [NB_CORES-1:0][APU_WOP_CPU-1:0] s_apu_master_op; -logic [NB_CORES-1:0][WAPUTYPE-1:0] s_apu_master_type; -logic [NB_CORES-1:0][APU_NDSFLAGS_CPU-1:0] s_apu_master_flags; +logic [NB_CORES-1:0][APU_NARGS_CPU-1:0][31:0] fpu_master_operands; +logic [NB_CORES-1:0][APU_WOP_CPU-1:0] fpu_master_op; +logic [NB_CORES-1:0][WAPUTYPE-1:0] fpu_master_type; +logic [NB_CORES-1:0][APU_NDSFLAGS_CPU-1:0] fpu_master_in_flags; // response channel -logic [NB_CORES-1:0] s_apu_master_rready; -logic [NB_CORES-1:0] s_apu_master_rvalid; -logic [NB_CORES-1:0][31:0] s_apu_master_rdata; -logic [NB_CORES-1:0][APU_NUSFLAGS_CPU-1:0] s_apu_master_rflags; +logic [NB_CORES-1:0] fpu_master_rready; +logic [NB_CORES-1:0] fpu_master_valid; +logic [NB_CORES-1:0][31:0] fpu_master_result; +logic [NB_CORES-1:0][APU_NUSFLAGS_CPU-1:0] fpu_master_out_flags; //----------------------------------------------------------------------// // Interfaces between ICache - L0 - Icache_Interco and Icache_ctrl_unit // @@ -954,18 +955,49 @@ generate .pc_backup_o ( backup_bus[i].pc_backup ), .csr_backup_o ( backup_bus[i].csr_backup ), //apu interface - .apu_master_req_o ( s_apu_master_req [i] ), - .apu_master_gnt_i ( s_apu_master_gnt [i] ), - .apu_master_type_o ( s_apu_master_type [i] ), - .apu_master_operands_o ( s_apu_master_operands[i] ), - .apu_master_op_o ( s_apu_master_op [i] ), - .apu_master_flags_o ( s_apu_master_flags [i] ), - .apu_master_valid_i ( s_apu_master_rvalid [i] ), - .apu_master_ready_o ( s_apu_master_rready [i] ), - .apu_master_result_i ( s_apu_master_rdata [i] ), - .apu_master_flags_i ( s_apu_master_rflags [i] ) + .apu_master_req_o ( fpu_master_req [i] ), + .apu_master_gnt_i ( fpu_master_gnt [i] ), + .apu_master_type_o ( fpu_master_type [i] ), + .apu_master_operands_o ( fpu_master_operands [i] ), + .apu_master_op_o ( fpu_master_op [i] ), + .apu_master_flags_o ( fpu_master_in_flags [i] ), + .apu_master_valid_i ( fpu_master_valid [i] ), + .apu_master_ready_o ( fpu_master_rready [i] ), + .apu_master_result_i ( fpu_master_result [i] ), + .apu_master_flags_i ( fpu_master_out_flags[i] ) ); + if (CLUST_FPU) begin: gen_fpu + fpu_wrap #( + .DataWidth ( 32 ), + .FpuNumOperands ( APU_NARGS_CPU ), + .FpuOpcodeWidth ( APU_WOP_CPU ), + .FpuInFlagsWidth ( APU_NDSFLAGS_CPU ), + .FpuOutFlagsWidth ( APU_NUSFLAGS_CPU ), + .FpuFmtBits ( fpnew_pkg::FP_FORMAT_BITS ), + .FpuIntFmtBits ( fpnew_pkg::INT_FORMAT_BITS ), + .FpuRoundBits ( 3 ), + .FpuOpBits ( fpnew_pkg::OP_BITS ), + .FpuDivSqrt ( CLUST_FP_DIVSQRT ) + ) i_fpu_wrap ( + .clk_i ( clk_core[i] ), + .rst_ni ( rst_ni ), + .hart_id_i ( i ), + .fpu_req_i ( fpu_master_req[i] ), + .fpu_gnt_o ( fpu_master_gnt[i] ), + .fpu_operands_i ( fpu_master_operands[i] ), + .fpu_op_i ( fpu_master_op[i] ), + .fpu_flags_i ( fpu_master_in_flags[i] ), + .fpu_valid_o ( fpu_master_valid[i] ), + .fpu_result_o ( fpu_master_result[i] ), + .fpu_flags_o ( fpu_master_out_flags[i] ) + ); + end else begin: gen_no_fpu + assign fpu_master_gnt[i] = '0; + assign fpu_master_valid[i] = '0; + assign fpu_master_result[i] = '0; + assign fpu_master_out_flags[i] = '0; + end assign dbg_core_halted[i] = core2hmr[i].debug_halted; // Binding inputs/outputs from HMR to the system and vice versa @@ -1114,80 +1146,76 @@ hmr_unit #( //**************************************************** //**** Shared FPU cluster - Shared execution units *** //**************************************************** -// request channel -logic [NB_CORES-1:0][2:0][31:0] s_apu__operands; -logic [NB_CORES-1:0][5:0] s_apu__op; -logic [NB_CORES-1:0][2:0] s_apu__type; -logic [NB_CORES-1:0][14:0] s_apu__flags; -// response channel -logic [NB_CORES-1:0][4:0] s_apu__rflags; - -genvar k; -for(k=0;k Date: Thu, 25 Jan 2024 16:05:33 +0100 Subject: [PATCH 3/7] Directly use Questa to compile DPIs. --- Makefile | 25 ++- scripts/start.tcl | 2 +- tb/dpi/elfloader.cc | 135 ---------------- tb/dpi/elfloader.cpp | 353 ++++++++++++++++++++++++++++++++++++++++++ tb/pulp_cluster_tb.sv | 5 +- 5 files changed, 365 insertions(+), 155 deletions(-) delete mode 100644 tb/dpi/elfloader.cc create mode 100644 tb/dpi/elfloader.cpp diff --git a/Makefile b/Makefile index 73b062ea..3a01df22 100644 --- a/Makefile +++ b/Makefile @@ -4,10 +4,11 @@ ROOT_DIR = $(strip $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))) +QUESTA ?= questa-2022.3 GIT ?= git BENDER ?= bender -VSIM ?= vsim -VOPT ?= vopt +VSIM ?= $(QUESTA) vsim +VOPT ?= $(QUESTA) vopt top_level ?= pulp_cluster_tb dpi-library ?= work-dpi library ?= work @@ -76,7 +77,7 @@ Bender.lock: ## Clone pulp-runtime as SW stack pulp-runtime: - git clone git@github.com:pulp-platform/pulp-runtime.git -b yt/carfield $@ + git clone git@github.com:pulp-platform/pulp-runtime.git -b astral $@ ## Clone regression tests for bare-metal verification regression-tests: @@ -92,30 +93,22 @@ sim_clean: scripts/compile.tcl: | Bender.lock $(call generate_vsim, $@, $(bender_defs) $(bender_targs),..) - -# compile the elfloader.cpp -$(dpi-library)/%.o: tb/dpi/%.cc $(dpi_hdr) - mkdir -p $(dpi-library) - $(CXX) -shared -fPIC -std=c++0x -Bsymbolic $(CFLAGS) -c $< -o $@ - -$(dpi-library)/cl_dpi.so: $(dpi) - $(CXX) -shared -m64 -o $(dpi-library)/cl_dpi.so $? -L$(RISCV)/lib -L$(SPIKE_ROOT)/lib -Wl,-rpath,$(RISCV)/lib -Wl,-rpath,$(SPIKE_ROOT)/lib -lfesvr + echo 'vlog "$(realpath $(ROOT_DIR))/tb/dpi/elfloader.cpp" -ccflags "-std=c++11"' >> $@ + echo 'vopt +permissive -suppress 3053 -suppress 8885 +UVM_NO_RELNOTES $(top_level) -o $(top_level)_optimized' $(library): $(QUESTA) vlib $(library) -compile: $(library) $(dpi) $(dpi-library)/cl_dpi.so +compile: $(library) @test -f Bender.lock || { echo "ERROR: Bender.lock file does not exist. Did you run make checkout in bender mode?"; exit 1; } @test -f scripts/compile.tcl || { echo "ERROR: scripts/compile.tcl file does not exist. Did you run make scripts in bender mode?"; exit 1; } $(VSIM) -c -do 'quit -code [source scripts/compile.tcl]' -build: compile $(dpi) +build: compile $(VOPT) $(compile_flag) -suppress 3053 -suppress 8885 -work $(library) $(top_level) -o $(top_level)_optimized +acc -check_synthesis - run: - $(VSIM) +permissive $(questa-flags) $(questa-cmd) -suppress 3053 -suppress 8885 -lib $(library) +MAX_CYCLES=$(max_cycles) +UVM_TESTNAME=$(test_case) +APP=$(elf-bin) +notimingchecks +nospecify -t 1ps \ - $(uvm-flags) $(QUESTASIM_FLAGS) -sv_lib $(dpi-library)/cl_dpi \ + $(VSIM) +permissive $(questa-flags) $(uvm-flags) $(QUESTASIM_FLAGS) $(questa-cmd) -suppress 3053 -suppress 8885 -lib $(library) +MAX_CYCLES=$(max_cycles) +UVM_TESTNAME=$(test_case) +APP=$(elf-bin) +notimingchecks +nospecify -t 1ps \ ${top_level}_optimized +permissive-off ++$(elf-bin) ++$(target-options) ++$(cl-bin) | tee sim.log .PHONY: test-rt-par-bare diff --git a/scripts/start.tcl b/scripts/start.tcl index 9928618e..a9ebc7fa 100644 --- a/scripts/start.tcl +++ b/scripts/start.tcl @@ -3,7 +3,7 @@ if {![info exists VSIM_PATH ]} { set VSIM_PATH "" } -vsim +permissive -suppress 3053 -suppress 8885 -lib $VSIM_PATH/work +APP=./build/test/test +notimingchecks +nospecify -t 1ps -sv_lib $VSIM_PATH/work-dpi/cl_dpi pulp_cluster_tb_optimized +permissive-off ++./build/test/test +vsim +permissive -suppress 3053 -suppress 8885 +UVM_NO_RELNOTES -lib $VSIM_PATH/work +APP=./build/test/test +notimingchecks +nospecify -t 1ps pulp_cluster_tb_optimized +permissive-off ++./build/test/test add log -r /* run -all diff --git a/tb/dpi/elfloader.cc b/tb/dpi/elfloader.cc deleted file mode 100644 index 2aa1dfb1..00000000 --- a/tb/dpi/elfloader.cc +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define SHT_PROGBITS 0x1 -#define SHT_GROUP 0x11 - -// address and size -std::vector> sections; -std::map symbols; -// memory based address and content -std::map> mems; -reg_t entry; -int section_index = 0; - -void write (uint64_t address, uint64_t len, uint8_t* buf) { - uint64_t datum; - std::vector mem; - for (int i = 0; i < len; i++) { - mem.push_back(buf[i]); - } - mems.insert(std::make_pair(address, mem)); -} - -// Communicate the section address and len -// Returns: -// 0 if there are no more sections -// 1 if there are more sections to load -extern "C" char get_section (long long* address, long long* len) { - if (section_index < sections.size()) { - *address = sections[section_index].first; - *len = sections[section_index].second; - section_index++; - return 1; - } else return 0; -} - -extern "C" char read_section (long long address, const svOpenArrayHandle buffer) { - // get actual poitner - void* buf = svGetArrayPtr(buffer); - // check that the address points to a section - assert(mems.count(address) > 0); - // copy array - int i = 0; - for (auto &datum : mems.find(address)->second) { - *((char *) buf + i) = datum; - i++; - } -} - -extern "C" void read_elf(const char* filename) { - int fd = open(filename, O_RDONLY); - struct stat s; - assert(fd != -1); - if (fstat(fd, &s) < 0) - abort(); - size_t size = s.st_size; - - char* buf = (char*)mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - assert(buf != MAP_FAILED); - close(fd); - - assert(size >= sizeof(Elf64_Ehdr)); - const Elf64_Ehdr* eh64 = (const Elf64_Ehdr*)buf; - assert(IS_ELF32(*eh64) || IS_ELF64(*eh64)); - - - - std::vector zeros; - std::map symbols; - - #define LOAD_ELF(ehdr_t, phdr_t, shdr_t, sym_t) do { \ - ehdr_t* eh = (ehdr_t*)buf; \ - phdr_t* ph = (phdr_t*)(buf + eh->e_phoff); \ - entry = eh->e_entry; \ - assert(size >= eh->e_phoff + eh->e_phnum*sizeof(*ph)); \ - for (unsigned i = 0; i < eh->e_phnum; i++) { \ - if(ph[i].p_type == PT_LOAD && ph[i].p_memsz) { \ - if (ph[i].p_filesz) { \ - assert(size >= ph[i].p_offset + ph[i].p_filesz); \ - sections.push_back(std::make_pair(ph[i].p_paddr, ph[i].p_memsz)); \ - write(ph[i].p_paddr, ph[i].p_filesz, (uint8_t*)buf + ph[i].p_offset); \ - } \ - zeros.resize(ph[i].p_memsz - ph[i].p_filesz); \ - } \ - } \ - shdr_t* sh = (shdr_t*)(buf + eh->e_shoff); \ - assert(size >= eh->e_shoff + eh->e_shnum*sizeof(*sh)); \ - assert(eh->e_shstrndx < eh->e_shnum); \ - assert(size >= sh[eh->e_shstrndx].sh_offset + sh[eh->e_shstrndx].sh_size); \ - char *shstrtab = buf + sh[eh->e_shstrndx].sh_offset; \ - unsigned strtabidx = 0, symtabidx = 0; \ - for (unsigned i = 0; i < eh->e_shnum; i++) { \ - unsigned max_len = sh[eh->e_shstrndx].sh_size - sh[i].sh_name; \ - if ((sh[i].sh_type & SHT_GROUP) && strcmp(shstrtab + sh[i].sh_name, ".strtab") != 0 && strcmp(shstrtab + sh[i].sh_name, ".shstrtab") != 0) \ - assert(strnlen(shstrtab + sh[i].sh_name, max_len) < max_len); \ - if (sh[i].sh_type & SHT_PROGBITS) continue; \ - if (strcmp(shstrtab + sh[i].sh_name, ".strtab") == 0) \ - strtabidx = i; \ - if (strcmp(shstrtab + sh[i].sh_name, ".symtab") == 0) \ - symtabidx = i; \ - } \ - if (strtabidx && symtabidx) { \ - char* strtab = buf + sh[strtabidx].sh_offset; \ - sym_t* sym = (sym_t*)(buf + sh[symtabidx].sh_offset); \ - for (unsigned i = 0; i < sh[symtabidx].sh_size/sizeof(sym_t); i++) { \ - unsigned max_len = sh[strtabidx].sh_size - sym[i].st_name; \ - assert(sym[i].st_name < sh[strtabidx]. sh_size); \ - assert(strnlen(strtab + sym[i].st_name, max_len) < max_len); \ - symbols[strtab + sym[i].st_name] = sym[i].st_value; \ - } \ - } \ - } while(0) - - if (IS_ELF32(*eh64)) - LOAD_ELF(Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Sym); - else - LOAD_ELF(Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Sym); - - munmap(buf, size); -} diff --git a/tb/dpi/elfloader.cpp b/tb/dpi/elfloader.cpp new file mode 100644 index 00000000..4bb0fd62 --- /dev/null +++ b/tb/dpi/elfloader.cpp @@ -0,0 +1,353 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Modified version of the RISC-V Frontend Server +// (https://github.com/riscvarchive/riscv-fesvr, e41cfc3001293b5625c25412bd9b26e6e4ab8f7e) +// +// Nicole Narr +// Christopher Reinwardt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IS_ELF(hdr) \ + ((hdr).e_ident[0] == 0x7f && (hdr).e_ident[1] == 'E' && \ + (hdr).e_ident[2] == 'L' && (hdr).e_ident[3] == 'F') + +#define IS_ELF32(hdr) (IS_ELF(hdr) && (hdr).e_ident[4] == 1) +#define IS_ELF64(hdr) (IS_ELF(hdr) && (hdr).e_ident[4] == 2) + +#define PT_LOAD 1 +#define SHT_NOBITS 8 +#define SHT_PROGBITS 0x1 +#define SHT_GROUP 0x11 + +typedef struct { + uint8_t e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf32_Ehdr; + +typedef struct { + uint32_t sh_name; + uint32_t sh_type; + uint32_t sh_flags; + uint32_t sh_addr; + uint32_t sh_offset; + uint32_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint32_t sh_addralign; + uint32_t sh_entsize; +} Elf32_Shdr; + +typedef struct +{ + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +} Elf32_Phdr; + +typedef struct +{ + uint32_t st_name; + uint32_t st_value; + uint32_t st_size; + uint8_t st_info; + uint8_t st_other; + uint16_t st_shndx; +} Elf32_Sym; + +typedef struct { + uint8_t e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf64_Ehdr; + +typedef struct { + uint32_t sh_name; + uint32_t sh_type; + uint64_t sh_flags; + uint64_t sh_addr; + uint64_t sh_offset; + uint64_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint64_t sh_addralign; + uint64_t sh_entsize; +} Elf64_Shdr; + +typedef struct { + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; +} Elf64_Phdr; + +typedef struct { + uint32_t st_name; + uint8_t st_info; + uint8_t st_other; + uint16_t st_shndx; + uint64_t st_value; + uint64_t st_size; +} Elf64_Sym; + +// address and size +std::vector> sections; + +// memory based address and content +std::map> mems; + +// Entrypoint +uint64_t entry = 0; +int section_index = 0; + +extern "C" { + char get_entry(long long *entry_ret); + char get_section(long long *address_ret, long long *len_ret); + char read_section(long long address, const svOpenArrayHandle buffer, long long len); + char read_elf(const char *filename); +} + +static void write (uint64_t address, uint64_t len, uint8_t *buf) +{ + std::vector mem; + for (int i = 0; i < len; i++) { + mem.push_back(buf[i]); + } + mems.insert(std::make_pair(address, mem)); +} + +// Return the entry point reported by the ELF file +// Must be called after reading the elf file obviously +extern "C" char get_entry(long long *entry_ret) +{ + *entry_ret = entry; + return 0; +} + +// Iterator over the section addresses and lengths +// Returns: +// 0 if there are no more sections +// 1 if there are more sections to load +extern "C" char get_section(long long *address_ret, long long *len_ret) +{ + if (section_index < sections.size()) { + *address_ret = sections[section_index].first; + *len_ret = sections[section_index].second; + section_index++; + return 1; + } else { + return 0; + } +} + +extern "C" char read_section(long long address, const svOpenArrayHandle buffer, long long len) +{ + // get actual pointer + char *buf = (char *) svGetArrayPtr(buffer); + + // check that the address points to a section + if (!mems.count(address)) { + printf("[ELF] ERROR: No section found for address %p\n", address); + return -1; + } + + // copy array + long long int len_tmp = len; + for (auto &datum : mems.find(address)->second) { + if(len_tmp-- == 0){ + printf("[ELF] ERROR: Copied 0x%lx bytes. Buffer is full but there is still data available.\n", len); + return -1; + } + + *buf++ = datum; + } + + return 0; +} + +template +static void load_elf(char *buf, size_t size) +{ + E *eh = (E *) buf; + P *ph = (P *) (buf + eh->e_phoff); + Sh *sh = (Sh *) (buf + eh->e_shoff); + + char *shstrtab = NULL; + + if(size < eh->e_phoff + (eh->e_phnum * sizeof(P))){ + printf("[ELF] ERROR: Filesize is smaller than advertised program headers (0x%lx vs 0x%lx)\n", size, eh->e_phoff + (eh->e_phnum * sizeof(P))); + return; + } + + entry = eh->e_entry; + printf("[ELF] INFO: Entrypoint at %p\n", entry); + + // Iterate over all program header entries + for (unsigned int i = 0; i < eh->e_phnum; i++) { + // Check whether the current program header entry contains a loadable section of nonzero size + if(ph[i].p_type == PT_LOAD && ph[i].p_memsz) { + // Is this section something else than zeros? + if (ph[i].p_filesz) { + assert(size >= ph[i].p_offset + ph[i].p_filesz); + sections.push_back(std::make_pair(ph[i].p_paddr, ph[i].p_memsz)); + write(ph[i].p_paddr, ph[i].p_filesz, (uint8_t*)buf + ph[i].p_offset); + } + + if(ph[i].p_memsz > ph[i].p_filesz){ + printf("[ELF] WARNING: The section starting @ %p contains 0x%lx zero bytes which will NOT be preloaded!\n", + ph[i].p_paddr, (ph[i].p_memsz - ph[i].p_filesz)); + } + } + } + + if(size < eh->e_shoff + (eh->e_shnum * sizeof(Sh))){ + printf("[ELF] ERROR: Filesize is smaller than advertised section headers (0x%lx vs 0x%lx)\n", + size, eh->e_shoff + (eh->e_shnum * sizeof(Sh))); + return; + } + + if(eh->e_shstrndx >= eh->e_shnum){ + printf("[ELF] ERROR: Malformed ELF file. The index of the section header strings is out of bounds (0x%lx vs max 0x%lx)", + eh->e_shstrndx, eh->e_shnum); + return; + } + + if(size < sh[eh->e_shstrndx].sh_offset + sh[eh->e_shstrndx].sh_size){ + printf("[ELF] ERROR: Filesize is smaller than advertised size of section name table (0x%lx vs 0x%lx)\n", + size, sh[eh->e_shstrndx].sh_offset + sh[eh->e_shstrndx].sh_size); + return; + } + + // Get a direct pointer to the section name section + shstrtab = buf + sh[eh->e_shstrndx].sh_offset; + unsigned int strtabidx = 0, symtabidx = 0; + + // Iterate over all section headers to find .strtab and .symtab + for (unsigned int i = 0; i < eh->e_shnum; i++) { + // Get an upper limit on how long the name can be (length of the section name section minus the offset of the name) + unsigned int max_len = sh[eh->e_shstrndx].sh_size - sh[i].sh_name; + + // Is this the string table? + if(strcmp(shstrtab + sh[i].sh_name, ".strtab") == 0){ + printf("[ELF] INFO: Found string table at offset 0x%lx\n", sh[i].sh_offset); + strtabidx = i; + continue; + } + + // Is this the symbol table? + if(strcmp(shstrtab + sh[i].sh_name, ".symtab") == 0){ + printf("[ELF] INFO: Found symbol table at offset 0x%lx\n", sh[i].sh_offset); + symtabidx = i; + continue; + } + } +} + +extern "C" char read_elf(const char *filename) +{ + char *buf = NULL; + Elf64_Ehdr* eh64 = NULL; + int fd = open(filename, O_RDONLY); + char retval = 0; + struct stat s; + size_t size = 0; + + if(fd == -1){ + printf("[ELF] ERROR: Unable to open file %s\n", filename); + retval = -1; + goto exit; + } + + if(fstat(fd, &s) < 0) { + printf("[ELF] ERROR: Unable to read stats for file %s\n", filename); + retval = -1; + goto exit_fd; + } + + size = s.st_size; + + if(size < sizeof(Elf64_Ehdr)){ + printf("[ELF] ERROR: File %s is too small to contain a valid ELF header (0x%lx vs 0x%lx)\n", filename, size, sizeof(Elf64_Ehdr)); + retval = -1; + goto exit_fd; + } + + buf = (char *) mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if(buf == MAP_FAILED){ + printf("[ELF] ERROR: Unable to memory map file %s\n", filename); + retval = -1; + goto exit_fd; + } + + printf("[ELF] INFO: File %s was memory mapped to %p\n", filename, buf); + + eh64 = (Elf64_Ehdr *) buf; + + if(!(IS_ELF32(*eh64) || IS_ELF64(*eh64))){ + printf("[ELF] ERROR: File %s does not contain a valid ELF signature\n", filename); + retval = -1; + goto exit_mmap; + } + + if (IS_ELF32(*eh64)){ + load_elf(buf, size); + } else { + load_elf(buf, size); + } + +exit_mmap: + munmap(buf, size); + +exit_fd: + close(fd); + +exit: + return retval; +} diff --git a/tb/pulp_cluster_tb.sv b/tb/pulp_cluster_tb.sv index bf36d3f7..2b8ec71e 100644 --- a/tb/pulp_cluster_tb.sv +++ b/tb/pulp_cluster_tb.sv @@ -17,13 +17,12 @@ `timescale 1ps/1ps `include "pulp_soc_defines.sv" -`include "uvm_macros.svh" `include "axi/assign.svh" `include "axi/typedef.svh" import "DPI-C" function read_elf(input string filename); import "DPI-C" function byte get_section(output longint address, output longint len); -import "DPI-C" context function byte read_section(input longint address, inout byte buffer[]); +import "DPI-C" context function byte read_section(input longint address, inout byte buffer[], input longint len); module pulp_cluster_tb; @@ -397,7 +396,7 @@ module pulp_cluster_tb; sections[section_addr >> AxiWideByteOffset] = num_words; buffer = new[num_words * AxiWideBeWidth]; - void'(read_section(section_addr, buffer)); + void'(read_section(section_addr, buffer, section_len)); for (int i = 0; i < num_words; i++) begin automatic logic [AxiWideBeWidth-1:0][7:0] word = '0; for (int j = 0; j < AxiWideBeWidth; j++) begin From 9bfeba948d0345a67baea95656b0511b198f5ab8 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Thu, 25 Jan 2024 22:56:24 +0100 Subject: [PATCH 4/7] Add private FPUs. --- Bender.local | 1 + Bender.lock | 14 +++++++++++--- Bender.yml | 4 ++-- rtl/pulp_cluster.sv | 38 ++++---------------------------------- 4 files changed, 18 insertions(+), 39 deletions(-) diff --git a/Bender.local b/Bender.local index 2386c8ee..c6274f27 100644 --- a/Bender.local +++ b/Bender.local @@ -1,4 +1,5 @@ overrides: + hci : { git: "https://github.com/pulp-platform/hci.git" , rev: 3cb3d99b2cebfeed55cb6ab9d98fce7b99e97cb9 } # branch: master axi : { git: "https://github.com/pulp-platform/axi.git" , version: =0.39.1-beta } register_interface : { git: "https://github.com/pulp-platform/register_interface.git" , rev: 19163bb5191d2669a8cbc267cdd4ce8e60f20746 } # branch: master cluster_interconnect: { git: "https://github.com/pulp-platform/cluster_interconnect.git", rev: 89e1019d64a86425211be6200770576cbdf3e8b3 } # branch: assertion-fix diff --git a/Bender.lock b/Bender.lock index c86f60bb..ceaaed57 100644 --- a/Bender.lock +++ b/Bender.lock @@ -104,8 +104,16 @@ packages: Git: https://github.com/pulp-platform/fpu_div_sqrt_mvp.git dependencies: - common_cells + fpu_interco: + revision: b5f7a315929308823cacd81e1e4898f1eeecfc64 + version: null + source: + Git: https://github.com/pulp-platform/fpu_interco.git + dependencies: + - fpnew + - riscv hci: - revision: 78fb8fc8a6f2c376554562c47755b4d0febaba25 + revision: 3cb3d99b2cebfeed55cb6ab9d98fce7b99e97cb9 version: null source: Git: https://github.com/pulp-platform/hci.git @@ -133,8 +141,8 @@ packages: dependencies: - tech_cells_generic hwpe-stream: - revision: ddc154424187dff42a8fcec946c768ceb13f13de - version: 1.6.4 + revision: 389bd7fb1975d2df1546910c5f220c668122e646 + version: 1.6.5 source: Git: https://github.com/pulp-platform/hwpe-stream.git dependencies: diff --git a/Bender.yml b/Bender.yml index e0c2c881..7347953d 100644 --- a/Bender.yml +++ b/Bender.yml @@ -21,7 +21,7 @@ dependencies: idma: { git: "https://github.com/pulp-platform/iDMA.git", rev: 437ffa9dac5dea0daccfd3e8ae604d4f6ae2cdf1 } # branch: master hier-icache: { git: "https://github.com/pulp-platform/hier-icache.git", rev: "a971e364bf8090cf77fafad995b480c1ac7ea4e0" } # branch: yt/carfield cluster_peripherals: { git: "https://github.com/pulp-platform/cluster_peripherals.git", rev: c015839816938a790c8da5fd5829cfc536f1ca9c } # branch: yt/return-reg - # fpu_interco: { git: "https://github.com/pulp-platform/fpu_interco.git", rev: "4aec4b68424947b0c4cf25fd7c4b907cb9ec3dfa" } # branch: yt/carfield + fpu_interco: { git: "https://github.com/pulp-platform/fpu_interco.git", rev: "b5f7a315929308823cacd81e1e4898f1eeecfc64" } # branch: astral axi: { git: "https://github.com/pulp-platform/axi.git", version: =0.39.1-beta } axi_slice: { git: "https://github.com/pulp-platform/axi_slice.git", version: 1.1.4 } # deprecated, replaced by axi_cut (in axi repo) timer_unit: { git: "https://github.com/pulp-platform/timer_unit.git", version: 1.0.2 } @@ -31,7 +31,7 @@ dependencies: cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: e863f576699815b38cc9d80dbdede8ed5efd5991 } # `michaero/safety-island-clic` branch ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } scm: { git: "https://github.com/pulp-platform/scm.git", rev: 74426dee36f28ae1c02f7635cf844a0156145320 } # branch: yt/bump-clkgating - hci: { git: "https://github.com/pulp-platform/hci.git", rev: b2e6f391aa6c10c03f45b693d80a0aaddecf169b } # branch: master + hci: { git: "https://github.com/pulp-platform/hci.git", rev: 3cb3d99b2cebfeed55cb6ab9d98fce7b99e97cb9 } # branch: test_mode_fix register_interface: { git: "https://github.com/pulp-platform/register_interface.git", rev: 19163bb5191d2669a8cbc267cdd4ce8e60f20746 } # branch: master common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.29.0 } redundancy_cells: { git: "https://github.com/pulp-platform/redundancy_cells.git", rev: 32023555679cfdb8a0a073ad4c17fc3a5d1ddea5 } # branch: yt/rapidrecovery diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index c31e6022..f3786487 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -27,6 +27,7 @@ module pulp_cluster import pulp_cluster_package::*; import hci_package::*; import rapid_recovery_pkg::*; + import fpnew_pkg::*; #( // cluster parameters parameter CORE_TYPE_CL = 1, // 0 for CV32, 1 for RI5CY, 2 for IBEX RV32IMC @@ -70,7 +71,7 @@ module pulp_cluster parameter INSTR_RDATA_WIDTH = 32, parameter bit CLUST_FPU = 1, - parameter int unsigned NumFpu = NB_CORES, + parameter int unsigned NumFpus = NB_CORES, parameter CLUST_FP_DIVSQRT = 0, parameter CLUST_SHARED_FP = 0, parameter CLUST_SHARED_FP_DIVSQRT = 0, @@ -967,37 +968,6 @@ generate .apu_master_flags_i ( fpu_master_out_flags[i] ) ); - if (CLUST_FPU) begin: gen_fpu - fpu_wrap #( - .DataWidth ( 32 ), - .FpuNumOperands ( APU_NARGS_CPU ), - .FpuOpcodeWidth ( APU_WOP_CPU ), - .FpuInFlagsWidth ( APU_NDSFLAGS_CPU ), - .FpuOutFlagsWidth ( APU_NUSFLAGS_CPU ), - .FpuFmtBits ( fpnew_pkg::FP_FORMAT_BITS ), - .FpuIntFmtBits ( fpnew_pkg::INT_FORMAT_BITS ), - .FpuRoundBits ( 3 ), - .FpuOpBits ( fpnew_pkg::OP_BITS ), - .FpuDivSqrt ( CLUST_FP_DIVSQRT ) - ) i_fpu_wrap ( - .clk_i ( clk_core[i] ), - .rst_ni ( rst_ni ), - .hart_id_i ( i ), - .fpu_req_i ( fpu_master_req[i] ), - .fpu_gnt_o ( fpu_master_gnt[i] ), - .fpu_operands_i ( fpu_master_operands[i] ), - .fpu_op_i ( fpu_master_op[i] ), - .fpu_flags_i ( fpu_master_in_flags[i] ), - .fpu_valid_o ( fpu_master_valid[i] ), - .fpu_result_o ( fpu_master_result[i] ), - .fpu_flags_o ( fpu_master_out_flags[i] ) - ); - end else begin: gen_no_fpu - assign fpu_master_gnt[i] = '0; - assign fpu_master_valid[i] = '0; - assign fpu_master_result[i] = '0; - assign fpu_master_out_flags[i] = '0; - end assign dbg_core_halted[i] = core2hmr[i].debug_halted; // Binding inputs/outputs from HMR to the system and vice versa @@ -1146,7 +1116,7 @@ hmr_unit #( //**************************************************** //**** Shared FPU cluster - Shared execution units *** //**************************************************** -if (CLUST_SHARED_FP) begin: gen_shared_fpu +if (CLUST_FPU) begin: gen_fpu_subsystem // request channel logic [NB_CORES-1:0][2:0][31:0] s_apu__operands; logic [NB_CORES-1:0][5:0] s_apu__op; @@ -1166,7 +1136,7 @@ if (CLUST_SHARED_FP) begin: gen_shared_fpu shared_fpu_cluster #( .NB_CORES ( NB_CORES ), .NB_APUS ( 1 ), - .NB_FPNEW ( 4 ), + .NB_FPNEW ( NumFpus ), .FP_TYPE_WIDTH ( 3 ), .NB_CORE_ARGS ( 3 ), From 3931d03a4599cc0a7b12ad351378fb6423893bfc Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Thu, 25 Jan 2024 23:07:43 +0100 Subject: [PATCH 5/7] Delete FPU wrapper. --- Bender.yml | 1 - rtl/fpu_wrap.sv | 114 ------------------------------------------------ 2 files changed, 115 deletions(-) delete mode 100644 rtl/fpu_wrap.sv diff --git a/Bender.yml b/Bender.yml index 7347953d..823bc714 100644 --- a/Bender.yml +++ b/Bender.yml @@ -72,7 +72,6 @@ sources: - rtl/cluster_peripherals.sv - rtl/data_periph_demux.sv - rtl/core_demux_wrap.sv - - rtl/fpu_wrap.sv # Level 2 - rtl/core_region.sv - target: simulation diff --git a/rtl/fpu_wrap.sv b/rtl/fpu_wrap.sv deleted file mode 100644 index 5cba5e50..00000000 --- a/rtl/fpu_wrap.sv +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2023 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -module fpu_wrap - import riscv_defines::*; -#( - parameter int unsigned DataWidth = 32, - parameter int unsigned FpuNumOperands = 3, - parameter int unsigned FpuOpcodeWidth = 6, - parameter int unsigned FpuInFlagsWidth = 15, - parameter int unsigned FpuOutFlagsWidth = 5, - parameter int unsigned FpuFmtBits = fpnew_pkg::FP_FORMAT_BITS, - parameter int unsigned FpuIntFmtBits = fpnew_pkg::INT_FORMAT_BITS, - parameter int unsigned FpuRoundBits = 3, - parameter int unsigned FpuOpBits = fpnew_pkg::OP_BITS, - parameter int unsigned FpuDivSqrt = 0 -)( - // Clock and Reset - input logic clk_i, - input logic rst_ni, - input logic [31:0] hart_id_i, - // APU Side: Master port - input logic fpu_req_i, - output logic fpu_gnt_o, - // request channel - input logic [FpuNumOperands-1:0][DataWidth-1:0] fpu_operands_i, - input logic [FpuOpcodeWidth-1:0] fpu_op_i, - input logic [FpuInFlagsWidth-1:0] fpu_flags_i, - // response channel - output logic fpu_valid_o, - output logic [DataWidth-1:0] fpu_result_o, - output logic [FpuOutFlagsWidth-1:0] fpu_flags_o -); - -localparam fpnew_pkg::unit_type_t C_DIV = FpuDivSqrt ? fpnew_pkg::MERGED : - fpnew_pkg::DISABLED; - -logic fpu_op_mod; -logic fpu_vec_op; -logic [FpuOpBits-1:0] fpu_op; - -logic [FpuFmtBits-1:0] dst_fmt; -logic [FpuFmtBits-1:0] src_fmt; -logic [FpuIntFmtBits-1:0] int_fmt; -logic [FpuRoundBits-1:0] fp_rnd_mode; - -assign {fpu_vec_op, fpu_op_mod, fpu_op} = fpu_op_i; -assign {int_fmt, src_fmt, dst_fmt, fp_rnd_mode} = fpu_flags_i; - -// ----------- -// FPU Config -// ----------- -// Features (enabled formats, vectors etc.) -localparam fpnew_pkg::fpu_features_t FpuFeatures = '{ - Width: C_FLEN, - EnableVectors: C_XFVEC, - EnableNanBox: 1'b0, - FpFmtMask: {C_RVF, C_RVD, C_XF16, C_XF8, C_XF16ALT, C_XF8ALT}, - IntFmtMask: {C_XFVEC && (C_XF8 || C_XF8ALT), - C_XFVEC && (C_XF16 || C_XF16ALT), 1'b1, 1'b0} -}; - -// Implementation (number of registers etc) -localparam fpnew_pkg::fpu_implementation_t FpuImplementation = '{ - PipeRegs: '{// FP32, FP64, FP16, FP8, FP16alt, FP8alt - '{C_LAT_FP32, C_LAT_FP64, - C_LAT_FP16, C_LAT_FP8 , - C_LAT_FP16ALT, C_LAT_FP8ALT}, // ADDMUL - '{default: C_LAT_DIVSQRT}, // DIVSQRT - '{default: C_LAT_NONCOMP}, // NONCOMP - '{default: C_LAT_CONV }, // CONV - '{default: C_LAT_DOTP }}, // SDOTP - UnitTypes: '{'{default: fpnew_pkg::MERGED}, // ADDMUL - '{default: C_DIV}, // DIVSQRT - '{default: fpnew_pkg::PARALLEL}, // NONCOMP - '{default: fpnew_pkg::MERGED}, // CONV - '{default: fpnew_pkg::DISABLED}}, // SDOTP - PipeConfig: fpnew_pkg::BEFORE -}; - -//--------------- -// FPU instance -//--------------- -fpnew_top #( - .Features ( FpuImplementation ), - .Implementation ( FpuFeatures ), - .TagType ( logic ) -) i_fpnew ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .hart_id_i ( hart_id_i ), - .operands_i ( fpu_operands_i ), - .rnd_mode_i ( fpnew_pkg::roundmode_e'(fp_rnd_mode) ), - .op_i ( fpnew_pkg::operation_e'(fpu_op) ), - .op_mod_i ( fpu_op_mod ), - .src_fmt_i ( fpnew_pkg::fp_format_e'(src_fmt) ), - .dst_fmt_i ( fpnew_pkg::fp_format_e'(dst_fmt) ), - .int_fmt_i ( fpnew_pkg::int_format_e'(int_fmt) ), - .vectorial_op_i ( fpu_vec_op ), - .tag_i ( '0 ), - .simd_mask_i ( '1 ), - .in_valid_i ( fpu_req_i ), - .in_ready_o ( fpu_gnt_o ), - .flush_i ( '0 ), - .result_o ( fpu_result_o ), - .status_o ( fpu_flags_o ), - .tag_o ( ), - .out_valid_o ( fpu_valid_o ), - .out_ready_i ( 1'b1 ), - .busy_o ( ) -); - -endmodule: fpu_wrap From b373cb03192babb3d1dbcd726ad353bc721bac4c Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Sat, 27 Jan 2024 00:12:07 +0100 Subject: [PATCH 6/7] Rmove shared FPU and bump core. --- Bender.lock | 10 +--- Bender.yml | 3 +- rtl/core_region.sv | 2 +- rtl/pulp_cluster.sv | 109 ++++---------------------------------------- 4 files changed, 13 insertions(+), 111 deletions(-) diff --git a/Bender.lock b/Bender.lock index ceaaed57..f86b28ba 100644 --- a/Bender.lock +++ b/Bender.lock @@ -104,14 +104,6 @@ packages: Git: https://github.com/pulp-platform/fpu_div_sqrt_mvp.git dependencies: - common_cells - fpu_interco: - revision: b5f7a315929308823cacd81e1e4898f1eeecfc64 - version: null - source: - Git: https://github.com/pulp-platform/fpu_interco.git - dependencies: - - fpnew - - riscv hci: revision: 3cb3d99b2cebfeed55cb6ab9d98fce7b99e97cb9 version: null @@ -224,7 +216,7 @@ packages: - common_cells - common_verification riscv: - revision: a1dcae35edae6092ddbf92c424690cb903b678d5 + revision: c760db14dbd6cc3ec3b8ae8274df2eac7225bcac version: null source: Git: git@github.com:AlSaqr-platform/riscv_nn.git diff --git a/Bender.yml b/Bender.yml index 823bc714..8b2df85e 100644 --- a/Bender.yml +++ b/Bender.yml @@ -21,13 +21,12 @@ dependencies: idma: { git: "https://github.com/pulp-platform/iDMA.git", rev: 437ffa9dac5dea0daccfd3e8ae604d4f6ae2cdf1 } # branch: master hier-icache: { git: "https://github.com/pulp-platform/hier-icache.git", rev: "a971e364bf8090cf77fafad995b480c1ac7ea4e0" } # branch: yt/carfield cluster_peripherals: { git: "https://github.com/pulp-platform/cluster_peripherals.git", rev: c015839816938a790c8da5fd5829cfc536f1ca9c } # branch: yt/return-reg - fpu_interco: { git: "https://github.com/pulp-platform/fpu_interco.git", rev: "b5f7a315929308823cacd81e1e4898f1eeecfc64" } # branch: astral axi: { git: "https://github.com/pulp-platform/axi.git", version: =0.39.1-beta } axi_slice: { git: "https://github.com/pulp-platform/axi_slice.git", version: 1.1.4 } # deprecated, replaced by axi_cut (in axi repo) timer_unit: { git: "https://github.com/pulp-platform/timer_unit.git", version: 1.0.2 } common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.21.0 } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.3 } - riscv: { git: "git@github.com:AlSaqr-platform/riscv_nn.git", rev: a1dcae35edae6092ddbf92c424690cb903b678d5 } # branch: yt/hmr + riscv: { git: "git@github.com:AlSaqr-platform/riscv_nn.git", rev: astral-v1.0 } cv32e40p: { git: "https://github.com/pulp-platform/cv32e40p.git", rev: e863f576699815b38cc9d80dbdede8ed5efd5991 } # `michaero/safety-island-clic` branch ibex: { git: "https://github.com/pulp-platform/ibex.git", rev: "pulpissimo-v6.1.2" } scm: { git: "https://github.com/pulp-platform/scm.git", rev: 74426dee36f28ae1c02f7635cf844a0156145320 } # branch: yt/bump-clkgating diff --git a/rtl/core_region.sv b/rtl/core_region.sv index 485030de..057c5ad8 100644 --- a/rtl/core_region.sv +++ b/rtl/core_region.sv @@ -238,7 +238,7 @@ import rapid_recovery_pkg::*; .PULP_CLUSTER ( 1 ), .FPU ( FPU ), .N_EXT_PERF_COUNTERS ( N_EXT_PERF_COUNTERS_ACTUAL ), - .Zfinx ( 0 ), + .Zfinx ( FPU ), .WAPUTYPE ( WAPUTYPE ), .DM_HaltAddress ( DEBUG_START_ADDR + 16'h0800 ) ) RI5CY_CORE ( diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index f3786487..e0f9ed98 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -424,21 +424,6 @@ hci_core_intf #( // cores -> event unit ctrl XBAR_PERIPH_BUS s_core_euctrl_bus[NB_CORES-1:0](); -// apu-interconnect -// handshake signals -logic [NB_CORES-1:0] fpu_master_req; -logic [NB_CORES-1:0] fpu_master_gnt; -// request channel -logic [NB_CORES-1:0][APU_NARGS_CPU-1:0][31:0] fpu_master_operands; -logic [NB_CORES-1:0][APU_WOP_CPU-1:0] fpu_master_op; -logic [NB_CORES-1:0][WAPUTYPE-1:0] fpu_master_type; -logic [NB_CORES-1:0][APU_NDSFLAGS_CPU-1:0] fpu_master_in_flags; -// response channel -logic [NB_CORES-1:0] fpu_master_rready; -logic [NB_CORES-1:0] fpu_master_valid; -logic [NB_CORES-1:0][31:0] fpu_master_result; -logic [NB_CORES-1:0][APU_NUSFLAGS_CPU-1:0] fpu_master_out_flags; - //----------------------------------------------------------------------// // Interfaces between ICache - L0 - Icache_Interco and Icache_ctrl_unit // // // @@ -956,16 +941,16 @@ generate .pc_backup_o ( backup_bus[i].pc_backup ), .csr_backup_o ( backup_bus[i].csr_backup ), //apu interface - .apu_master_req_o ( fpu_master_req [i] ), - .apu_master_gnt_i ( fpu_master_gnt [i] ), - .apu_master_type_o ( fpu_master_type [i] ), - .apu_master_operands_o ( fpu_master_operands [i] ), - .apu_master_op_o ( fpu_master_op [i] ), - .apu_master_flags_o ( fpu_master_in_flags [i] ), - .apu_master_valid_i ( fpu_master_valid [i] ), - .apu_master_ready_o ( fpu_master_rready [i] ), - .apu_master_result_i ( fpu_master_result [i] ), - .apu_master_flags_i ( fpu_master_out_flags[i] ) + .apu_master_req_o ( ), + .apu_master_gnt_i ( '0 ), + .apu_master_type_o ( ), + .apu_master_operands_o ( ), + .apu_master_op_o ( ), + .apu_master_flags_o ( ), + .apu_master_valid_i ( '0 ), + .apu_master_ready_o ( ), + .apu_master_result_i ( '0 ), + .apu_master_flags_i ( '0 ) ); assign dbg_core_halted[i] = core2hmr[i].debug_halted; @@ -1113,80 +1098,6 @@ hmr_unit #( .core_bus_outputs_i ( '0 ) ); -//**************************************************** -//**** Shared FPU cluster - Shared execution units *** -//**************************************************** -if (CLUST_FPU) begin: gen_fpu_subsystem - // request channel - logic [NB_CORES-1:0][2:0][31:0] s_apu__operands; - logic [NB_CORES-1:0][5:0] s_apu__op; - logic [NB_CORES-1:0][2:0] s_apu__type; - logic [NB_CORES-1:0][14:0] s_apu__flags; - // response channel - logic [NB_CORES-1:0][4:0] s_apu__rflags; - - for(genvar k=0; k< NB_CORES; k++) begin - assign s_apu__operands[k][2:0] = fpu_master_operands[k][2:0]; - assign s_apu__op[k][5:0] = fpu_master_op[k][5:0]; - assign s_apu__type[k][2:0] = fpu_master_type[k][2:0]; - assign s_apu__flags[k][14:0] = fpu_master_in_flags[k][14:0]; - assign fpu_master_out_flags[k][4:0] = s_apu__rflags[k][4:0]; - end - - shared_fpu_cluster #( - .NB_CORES ( NB_CORES ), - .NB_APUS ( 1 ), - .NB_FPNEW ( NumFpus ), - .FP_TYPE_WIDTH ( 3 ), - - .NB_CORE_ARGS ( 3 ), - .CORE_DATA_WIDTH ( 32 ), - .CORE_OPCODE_WIDTH ( 6 ), - .CORE_DSFLAGS_CPU ( 15 ), - .CORE_USFLAGS_CPU ( 5 ), - - .NB_APU_ARGS ( 2 ), - .APU_OPCODE_WIDTH ( 6 ), - .APU_DSFLAGS_CPU ( 15 ), - .APU_USFLAGS_CPU ( 5 ), - - .NB_FPNEW_ARGS ( 3 ), //= 3, - .FPNEW_OPCODE_WIDTH ( 6 ), //= 6, - .FPNEW_DSFLAGS_CPU ( 15 ), //= 15, - .FPNEW_USFLAGS_CPU ( 5 ), //= 5, - - .APUTYPE_ID ( 1 ), - .FPNEWTYPE_ID ( 0 ), - - .C_FPNEW_FMTBITS (fpnew_pkg::FP_FORMAT_BITS ), - .C_FPNEW_IFMTBITS (fpnew_pkg::INT_FORMAT_BITS ), - .C_ROUND_BITS (3 ), - .C_FPNEW_OPBITS (fpnew_pkg::OP_BITS ), - .USE_FPU_OPT_ALLOC ("FALSE"), - .USE_FPNEW_OPT_ALLOC ("TRUE"), - .FPNEW_INTECO_TYPE ("SINGLE_INTERCO") - ) i_shared_fpu_cluster ( - .clk ( clk_i ), - .rst_n ( rst_ni ), - .test_mode_i ( test_mode_i ), - .core_slave_req_i ( fpu_master_req ), - .core_slave_gnt_o ( fpu_master_gnt ), - .core_slave_type_i ( s_apu__type ), - .core_slave_operands_i ( s_apu__operands ), - .core_slave_op_i ( s_apu__op ), - .core_slave_flags_i ( s_apu__flags ), - .core_slave_rready_i ( fpu_master_rready ), - .core_slave_rvalid_o ( fpu_master_valid ), - .core_slave_rdata_o ( fpu_master_result ), - .core_slave_rflags_o ( s_apu__rflags ) - ); -end else begin: gen_no_shared_fpu - assign fpu_master_gnt = '0; - assign fpu_master_valid = '0; - assign fpu_master_result = '0; - assign fpu_master_out_flags = '0; -end - //************************************************************** //**** HW Processing Engines / Cluster-Coupled Accelerators **** //************************************************************** From 388261e1a8fb248e9dd25cf3dd63917cb14b6e2e Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Sat, 27 Jan 2024 00:15:55 +0100 Subject: [PATCH 7/7] Update regression-tests fetch branch. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3a01df22..039e1c7e 100644 --- a/Makefile +++ b/Makefile @@ -81,7 +81,7 @@ pulp-runtime: ## Clone regression tests for bare-metal verification regression-tests: - git clone git@github.com:pulp-platform/regression_tests.git -b yt/carfield $@ + git clone git@github.com:pulp-platform/regression_tests.git -b astral $@ ######################## # Build and simulation #