From 942269b4292d0ab35e3b82f98746578035df8ccc Mon Sep 17 00:00:00 2001 From: "msc23h24 Diyou Shen (dishen)" Date: Fri, 17 Nov 2023 16:24:54 +0100 Subject: [PATCH] WIP: Upgrade Snitch to support RVV and FP. --- Bender.lock | 15 +++ Bender.yml | 5 +- Makefile | 2 +- hardware/deps/snitch/Bender.yml | 1 + hardware/deps/snitch/src/snitch_md.sv | 37 +++--- hardware/src/mempool_cc.sv | 168 +++++++++++++++++++------- 6 files changed, 165 insertions(+), 63 deletions(-) diff --git a/Bender.lock b/Bender.lock index ac78066b8..c505479c7 100644 --- a/Bender.lock +++ b/Bender.lock @@ -29,6 +29,21 @@ packages: source: Git: "https://github.com/pulp-platform/common_verification.git" dependencies: [] + fpnew: + revision: null + version: null + source: + Path: hardware/deps/fpnew + dependencies: + - common_cells + - fpu_div_sqrt_mvp + fpu_div_sqrt_mvp: + revision: null + version: null + source: + Path: hardware/deps/fpu_div_sqrt_mvp + dependencies: + - common_cells idma: revision: ~ version: ~ diff --git a/Bender.yml b/Bender.yml index 74fbca3b6..b67fd6e93 100644 --- a/Bender.yml +++ b/Bender.yml @@ -14,9 +14,8 @@ dependencies: reqrsp_interface: { path: "hardware/deps/reqrsp_interface" } snitch: { path: "hardware/deps/snitch" } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.5 } - spatz: { git: "git@iis-git.ee.ethz.ch:spatz/spatz.git", rev: fdfe8cf } - FPnew: { git: "git@github.com:openhwgroup/cvfpu.git", rev: pulp-v0.1.3 } - + # spatz: { git: "git@iis-git.ee.ethz.ch:spatz/spatz.git", rev: 14d06547c3ac3e20a19880a02cdb43f79213c8c7 } + FPnew: { git: "https://github.com/pulp-platform/cvfpu.git", rev: pulp-v0.1.3 } workspace: checkout_dir: "./hardware/deps" diff --git a/Makefile b/Makefile index 473dd8896..8bf01a62a 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ else endif ifeq ($(spatz), 1) - OPCODES := "opcodes-rvv opcodes-smallfloat" + OPCODES := "opcodes-frep_CUSTOM opcodes-rvv opcodes-smallfloat" endif # Default target diff --git a/hardware/deps/snitch/Bender.yml b/hardware/deps/snitch/Bender.yml index 24427e910..8a3d0752b 100644 --- a/hardware/deps/snitch/Bender.yml +++ b/hardware/deps/snitch/Bender.yml @@ -22,6 +22,7 @@ sources: - src/snitch_icache/snitch_icache_pkg.sv # rest of RTL - src/snitch.sv + - src/snitch_md.sv - src/snitch_regfile_ff.sv # - src/snitch_regfile_latch.sv - src/snitch_lsu.sv diff --git a/hardware/deps/snitch/src/snitch_md.sv b/hardware/deps/snitch/src/snitch_md.sv index 0b746f1e4..4b0e6ae96 100644 --- a/hardware/deps/snitch/src/snitch_md.sv +++ b/hardware/deps/snitch/src/snitch_md.sv @@ -12,7 +12,7 @@ // `SNITCH_ENABLE_PERF Enables mcycle, minstret performance counters (read only) // `SNITCH_ENABLE_STALL_COUNTER Enables stall_ins, stall_raw, stall_lsu performance counters (read only) -module snitch +module snitch_md import snitch_pkg::meta_id_t; #( parameter logic [31:0] BootAddr = 32'h0000_1000, @@ -36,7 +36,7 @@ module snitch parameter bit XDivSqrt = 0, parameter int RegNrWritePorts = 2, // Implement one or two write ports into the register file - parameter type acc_rsp_t = logic, + parameter type acc_issue_rsp_t = logic, // Dependant parameters. localparam bit FP_EN = RVF || RVD // Enable FP in general ) ( @@ -75,7 +75,7 @@ module snitch /// AXI-like handshaking. /// Same IDs need to be handled in-order. output logic [31:0] acc_qaddr_o, - output logic [5:0] acc_qid_o, + output logic [4:0] acc_qid_o, // what should be the id width, 5 or 6? output logic [31:0] acc_qdata_op_o, output logic [31:0] acc_qdata_arga_o, output logic [31:0] acc_qdata_argb_o, @@ -83,13 +83,13 @@ module snitch output logic acc_qvalid_o, input logic acc_qready_i, input logic [31:0] acc_pdata_i, - input logic [5:0] acc_pid_i, + input logic [4:0] acc_pid_i, input logic acc_perror_i, input logic acc_pvalid_i, output logic acc_pready_o, - input acc_rsp_t acc_qdata_rsp_i, + input acc_issue_rsp_t acc_qdata_rsp_i, input logic [1:0] acc_mem_finished_i, - input logic [1:0] acc_mem_str_finished_i + input logic [1:0] acc_mem_str_finished_i, /// TCDM Data Interface /// Write transactions do not return data on the `P Channel` /// Transactions need to be handled strictly in-order. @@ -190,6 +190,7 @@ module snitch logic [RegWidth-1:0] lsu_rd; logic [31:0] lsu_qaddr; + logic retire_load; // retire a load instruction logic retire_p; // retire from post-increment instructions logic retire_i; // retire the rest of the base instruction set @@ -1915,7 +1916,7 @@ module snitch riscv_instr::FNMADD_S: begin if (FP_EN && RVF && (!(inst_data_i inside {riscv_instr::FDIV_S, riscv_instr::FSQRT_S}) || XDivSqrt)) begin write_rd = 1'b0; - x_issue_valid_o = valid_instr; + acc_qvalid_o = valid_instr; end else begin illegal_inst = 1'b1; end @@ -1969,11 +1970,11 @@ module snitch riscv_instr::FMIN_H, riscv_instr::FMAX_H: begin if (FP_EN && XF16 && fcsr_q.fmode.dst == 1'b0 && - (!(inst_data_i inside {FDIV_H, FSQRT_H}) || XDivSqrt)) begin + (!(inst_data_i inside {riscv_instr::FDIV_H, riscv_instr::FSQRT_H}) || XDivSqrt)) begin write_rd = 1'b0; acc_qvalid_o = valid_instr; end else if (FP_EN && XF16ALT && fcsr_q.fmode.dst == 1'b1 && - (!(inst_data_i inside {FDIV_H, FSQRT_H}) || XDivSqrt)) begin + (!(inst_data_i inside {riscv_instr::FDIV_H, riscv_instr::FSQRT_H}) || XDivSqrt)) begin write_rd = 1'b0; acc_qvalid_o = valid_instr; end else begin @@ -2040,7 +2041,7 @@ module snitch riscv_instr::FMSUB_B, riscv_instr::FNMSUB_B, riscv_instr::FNMADD_B: begin - if (FP_EN && && XF8 && fcsr_q.fmode.dst == 1'b0 && (!(inst_data_i inside {riscv_instr::FDIV_B, riscv_instr::FSQRT_B}) || XDivSqrt)) begin + if (FP_EN && XF8 && fcsr_q.fmode.dst == 1'b0 && (!(inst_data_i inside {riscv_instr::FDIV_B, riscv_instr::FSQRT_B}) || XDivSqrt)) begin write_rd = 1'b0; acc_qvalid_o = valid_instr; end else begin @@ -2229,7 +2230,7 @@ module snitch opa_select = Reg; opb_select = IImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready; + acc_qvalid_o = valid_instr; ls_size = Word; is_fp_load = 1'b1; end else begin @@ -2241,7 +2242,7 @@ module snitch opa_select = Reg; opb_select = SFImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready; + acc_qvalid_o = valid_instr; ls_size = Word; is_fp_store = 1'b1; end else begin @@ -2254,7 +2255,7 @@ module snitch opa_select = Reg; opb_select = IImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready; + acc_qvalid_o = valid_instr; ls_size = Double; is_fp_load = 1'b1; end else begin @@ -2266,7 +2267,7 @@ module snitch opa_select = Reg; opb_select = SFImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready; + acc_qvalid_o = valid_instr; ls_size = Double; is_fp_store = 1'b1; end else begin @@ -2279,7 +2280,7 @@ module snitch opa_select = Reg; opb_select = IImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready; + acc_qvalid_o = valid_instr; ls_size = HalfWord; is_fp_load = 1'b1; end else begin @@ -2291,7 +2292,7 @@ module snitch opa_select = Reg; opb_select = SFImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready; + acc_qvalid_o = valid_instr; ls_size = HalfWord; is_fp_store = 1'b1; end else begin @@ -2304,7 +2305,7 @@ module snitch opa_select = Reg; opb_select = IImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready; + acc_qvalid_o = valid_instr; ls_size = Byte; is_fp_load = 1'b1; end else begin @@ -2316,7 +2317,7 @@ module snitch opa_select = Reg; opb_select = SFImmediate; write_rd = 1'b0; - acc_qvalid_o = valid_instr & trans_ready; + acc_qvalid_o = valid_instr; ls_size = Byte; is_fp_store = 1'b1; end else begin diff --git a/hardware/src/mempool_cc.sv b/hardware/src/mempool_cc.sv index ef3daff45..17b957e4b 100644 --- a/hardware/src/mempool_cc.sv +++ b/hardware/src/mempool_cc.sv @@ -55,50 +55,136 @@ module mempool_cc logic acc_req_d_valid, acc_req_d_ready, acc_resp_d_valid, acc_resp_d_ready; logic acc_req_q_valid, acc_req_q_ready, acc_resp_q_valid, acc_resp_q_ready; - // Snitch Integer Core - snitch #( - .BootAddr ( BootAddr ), - .MTVEC ( MTVEC ), - .RVE ( RVE ), - .RVM ( RVM ) + // // Snitch Integer Core + // snitch #( + // .BootAddr ( BootAddr ), + // .MTVEC ( MTVEC ), + // .RVE ( RVE ), + // .RVM ( RVM ) + // ) i_snitch ( + // .clk_i , + // .rst_i , + // .hart_id_i , + // .inst_addr_o , + // .inst_data_i , + // .inst_valid_o , + // .inst_ready_i , + // .acc_qaddr_o ( acc_req_d.addr ), + // .acc_qid_o ( acc_req_d.id ), + // .acc_qdata_op_o ( acc_req_d.data_op ), + // .acc_qdata_arga_o ( acc_req_d.data_arga ), + // .acc_qdata_argb_o ( acc_req_d.data_argb ), + // .acc_qdata_argc_o ( acc_req_d.data_argc ), + // .acc_qvalid_o ( acc_req_d_valid ), + // .acc_qready_i ( acc_req_d_ready ), + // .acc_pdata_i ( acc_resp_q.data ), + // .acc_pid_i ( acc_resp_q.id ), + // .acc_perror_i ( acc_resp_q.error ), + // .acc_pvalid_i ( acc_resp_q_valid ), + // .acc_pready_o ( acc_resp_q_ready ), + // .data_qaddr_o ( data_req_d.addr ), + // .data_qwrite_o ( data_req_d.write ), + // .data_qamo_o ( data_req_d.amo ), + // .data_qdata_o ( data_req_d.data ), + // .data_qstrb_o ( data_req_d.strb ), + // .data_qid_o ( data_req_d.id ), + // .data_qvalid_o ( data_req_d_valid ), + // .data_qready_i ( data_req_d_ready ), + // .data_pdata_i ( data_resp_q.data ), + // .data_perror_i ( data_resp_q.error ), + // .data_pid_i ( data_resp_q.id ), + // .data_pvalid_i ( data_resp_q_valid ), + // .data_pready_o ( data_resp_q_ready ), + // .wake_up_sync_i ( wake_up_sync_i ), + // .core_events_o ( core_events_o ) + // ); + + typedef struct packed { + logic accept; + logic writeback; + logic loadstore; + logic exception; + logic isfloat; + } acc_issue_rsp_t; + + fpnew_pkg::roundmode_e fpu_rnd_mode; + fpnew_pkg::fmt_mode_t fpu_fmt_mode; + fpnew_pkg::status_t fpu_status; + acc_issue_rsp_t acc_req_rsp; + // Spatz Memory consistency signals + logic [1:0] spatz_mem_finished; + logic [1:0] spatz_mem_str_finished; + + + snitch_md #( + .BootAddr ( BootAddr ), + .MTVEC ( MTVEC ), + .RVE ( RVE ), + .RVM ( RVM ), + .XFVEC ( 0 ), + .XFDOTP ( 0 ), + .XFAUX ( 0 ), + .RVF ( 0 ), + .RVD ( 0 ), + .XF16 ( 0 ), + .XF16ALT ( 0 ), + .XF8 ( 0 ), + .XF8ALT ( 0 ), + .acc_issue_rsp_t ( acc_issue_rsp_t ) ) i_snitch ( - .clk_i , - .rst_i , - .hart_id_i , - .inst_addr_o , - .inst_data_i , - .inst_valid_o , - .inst_ready_i , - .acc_qaddr_o ( acc_req_d.addr ), - .acc_qid_o ( acc_req_d.id ), - .acc_qdata_op_o ( acc_req_d.data_op ), - .acc_qdata_arga_o ( acc_req_d.data_arga ), - .acc_qdata_argb_o ( acc_req_d.data_argb ), - .acc_qdata_argc_o ( acc_req_d.data_argc ), - .acc_qvalid_o ( acc_req_d_valid ), - .acc_qready_i ( acc_req_d_ready ), - .acc_pdata_i ( acc_resp_q.data ), - .acc_pid_i ( acc_resp_q.id ), - .acc_perror_i ( acc_resp_q.error ), - .acc_pvalid_i ( acc_resp_q_valid ), - .acc_pready_o ( acc_resp_q_ready ), - .data_qaddr_o ( data_req_d.addr ), - .data_qwrite_o ( data_req_d.write ), - .data_qamo_o ( data_req_d.amo ), - .data_qdata_o ( data_req_d.data ), - .data_qstrb_o ( data_req_d.strb ), - .data_qid_o ( data_req_d.id ), - .data_qvalid_o ( data_req_d_valid ), - .data_qready_i ( data_req_d_ready ), - .data_pdata_i ( data_resp_q.data ), - .data_perror_i ( data_resp_q.error ), - .data_pid_i ( data_resp_q.id ), - .data_pvalid_i ( data_resp_q_valid ), - .data_pready_o ( data_resp_q_ready ), - .wake_up_sync_i ( wake_up_sync_i ), - .core_events_o ( core_events_o ) + .clk_i ( clk_i ), // checked + .rst_i ( rst_i ), // checked + .hart_id_i ( hart_id_i ), // checked + .inst_addr_o ( inst_addr_o ), // checked + .inst_data_i ( inst_data_i ), // checked + .inst_valid_o ( inst_valid_o ), // checked + .inst_ready_i ( inst_ready_i ), // checked + .acc_qaddr_o ( acc_req_d.addr ), // checked + .acc_qid_o ( acc_req_d.id ), // checked + .acc_qdata_op_o ( acc_req_d.data_op ), // checked, 32 bits, HW + .acc_qdata_arga_o ( acc_req_d.data_arga ), // checked, 32 bits, HW + .acc_qdata_argb_o ( acc_req_d.data_argb ), // checked, 32 bits, HW + .acc_qdata_argc_o ( acc_req_d.data_argc ), // checked, 32 bits, HW + .acc_qvalid_o ( acc_req_d_valid ), // checked + .acc_qready_i ( acc_req_d_ready ), // checked + .acc_pdata_i ( acc_resp_q.data ), // checked, 32 bits, HW + .acc_pid_i ( acc_resp_q.id ), // checked, 4:0 + .acc_perror_i ( acc_resp_q.error ), // checked + .acc_pvalid_i ( acc_resp_q_valid ), // checked + .acc_pready_o ( acc_resp_q_ready ), // checked + .acc_qdata_rsp_i ( acc_req_rsp ), // used by spatz decoder + .acc_mem_finished_i ( spatz_mem_finished ), // used by spatz mem + .acc_mem_str_finished_i ( spatz_mem_str_finished ), // used by spatz mem + .data_qaddr_o ( data_req_d.addr ), // checked, 32 bits, HW + .data_qwrite_o ( data_req_d.write ), // checked + .data_qamo_o ( data_req_d.amo ), // checked + .data_qdata_o ( data_req_d.data ), // checked, 32 bits, HW + .data_qstrb_o ( data_req_d.strb ), // checked + .data_qid_o ( data_req_d.id ), // MetaIdWidth + .data_qvalid_o ( data_req_d_valid ), // checked + .data_qready_i ( data_req_d_ready ), // checked + .data_pdata_i ( data_resp_q.data ), // checked, 32 bits, HW + .data_perror_i ( data_resp_q.error ), // chekced + .data_pid_i ( data_resp_q.id ), // MetaIdWidth + .data_pvalid_i ( data_resp_q_valid ), // checked + .data_pready_o ( data_resp_q_ready ), // checked + .wake_up_sync_i ( wake_up_sync_i ), // checked + .fpu_fmt_mode_o ( fpu_fmt_mode ), // checked + .fpu_rnd_mode_o ( fpu_rnd_mode ), // used by spatz, FPU + .fpu_status_i ( fpu_status ), // used by spatz, FPU + .core_events_o ( core_events_o ) // checked ); + assign spatz_mem_finished = '0; + assign spatz_mem_str_finished = '0; + assign fpu_status = '0; + assign acc_req_rsp.accept = 1'b1; + assign acc_req_rsp.writeback = 1'b1; + assign acc_req_rsp.loadstore = 1'b0; + assign acc_req_rsp.exception = 1'b0; + assign acc_req_rsp.isfloat = 1'b0; + + // Cut off-loading request path spill_register #( .T ( snitch_pkg::acc_req_t ),