diff --git a/hardware/src/mempool_cluster.sv b/hardware/src/mempool_cluster.sv index deb8d6af7..91c8a2f52 100644 --- a/hardware/src/mempool_cluster.sv +++ b/hardware/src/mempool_cluster.sv @@ -125,6 +125,21 @@ module mempool_cluster .meta_i (dma_meta_q ) ); + for (genvar g = 0; unsigned'(g) < NumGroups; g++) begin: gen_dma_req_group_register + spill_register #( + .T(dma_req_t) + ) i_dma_req_group_register ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .data_i (dma_req_group[g] ), + .valid_i(dma_req_group_valid[g] ), + .ready_o(dma_req_group_ready[g] ), + .data_o (dma_req_group_q[g] ), + .valid_o(dma_req_group_q_valid[g]), + .ready_i(dma_req_group_q_ready[g]) + ); + end : gen_dma_req_group_register + `ifdef TERAPOOL /********************* * TeraPool Section * @@ -282,15 +297,15 @@ module mempool_cluster .wake_up_i (wake_up_q[g*NumCoresPerGroup +: NumCoresPerGroup] ), .ro_cache_ctrl_i (ro_cache_ctrl_q[g] ), // DMA request - .dma_req_i (dma_req[g] ), - .dma_req_valid_i (dma_req_valid[g] ), - .dma_req_ready_o (dma_req_ready[g] ), + .dma_req_i (dma_req_group_q[g] ), + .dma_req_valid_i (dma_req_group_q_valid[g] ), + .dma_req_ready_o (dma_req_group_q_ready[g] ), // DMA status - .dma_meta_o_backend_idle_ (dma_meta[g][1] ), - .dma_meta_o_trans_complete_ (dma_meta[g][0] ), + .dma_meta_o_backend_idle_ (dma_meta[g][1] ), + .dma_meta_o_trans_complete_ (dma_meta[g][0] ), // AXI interface - .axi_mst_req_o (axi_mst_req[g*NumAXIMastersPerGroup +: NumAXIMastersPerGroup] ), - .axi_mst_resp_i (axi_mst_resp[g*NumAXIMastersPerGroup +: NumAXIMastersPerGroup]) + .axi_mst_req_o (axi_mst_req[g*NumAXIMastersPerGroup +: NumAXIMastersPerGroup] ), + .axi_mst_resp_i (axi_mst_resp[g*NumAXIMastersPerGroup +: NumAXIMastersPerGroup] ) ); end else if ((PostLayoutGr == 0) & PostLayoutSg & (g == 0)) begin: gen_rtl_group_postly_sg mempool_group #( @@ -323,9 +338,9 @@ module mempool_cluster .wake_up_i (wake_up_q[g*NumCoresPerGroup +: NumCoresPerGroup] ), .ro_cache_ctrl_i (ro_cache_ctrl_q[g] ), // DMA request - .dma_req_i (dma_req[g] ), - .dma_req_valid_i (dma_req_valid[g] ), - .dma_req_ready_o (dma_req_ready[g] ), + .dma_req_i (dma_req_group_q[g] ), + .dma_req_valid_i (dma_req_group_q_valid[g] ), + .dma_req_ready_o (dma_req_group_q_ready[g] ), // DMA status .dma_meta_o (dma_meta[g] ), // AXI interface @@ -361,9 +376,9 @@ module mempool_cluster .wake_up_i (wake_up_q[g*NumCoresPerGroup +: NumCoresPerGroup] ), .ro_cache_ctrl_i (ro_cache_ctrl_q[g] ), // DMA request - .dma_req_i (dma_req[g] ), - .dma_req_valid_i (dma_req_valid[g] ), - .dma_req_ready_o (dma_req_ready[g] ), + .dma_req_i (dma_req_group_q[g] ), + .dma_req_valid_i (dma_req_group_q_valid[g] ), + .dma_req_ready_o (dma_req_group_q_ready[g] ), // DMA status .dma_meta_o (dma_meta[g] ), // AXI interface @@ -444,9 +459,9 @@ module mempool_cluster .wake_up_i (wake_up_q[g*NumCoresPerGroup +: NumCoresPerGroup] ), .ro_cache_ctrl_i (ro_cache_ctrl_q[g] ), // DMA request - .dma_req_i (dma_req[g] ), - .dma_req_valid_i (dma_req_valid[g] ), - .dma_req_ready_o (dma_req_ready[g] ), + .dma_req_i (dma_req_group_q[g] ), + .dma_req_valid_i (dma_req_group_q_valid[g] ), + .dma_req_ready_o (dma_req_group_q_ready[g] ), // DMA status .dma_meta_o (dma_meta[g] ), // AXI interface diff --git a/hardware/src/mempool_tile.sv b/hardware/src/mempool_tile.sv index f9521623e..a8f93705c 100644 --- a/hardware/src/mempool_tile.sv +++ b/hardware/src/mempool_tile.sv @@ -549,41 +549,25 @@ module mempool_tile * Remote Interconnects * ****************************/ -<<<<<<< HEAD - tcdm_master_req_t [NumCoresPerTile-1:0] remote_req_interco; - logic [NumCoresPerTile-1:0] remote_req_interco_valid; - logic [NumCoresPerTile-1:0] remote_req_interco_ready; - tcdm_master_resp_t [NumCoresPerTile-1:0] remote_resp_interco; - logic [NumCoresPerTile-1:0] remote_resp_interco_valid; - logic [NumCoresPerTile-1:0] remote_resp_interco_ready; -======= tcdm_master_req_t [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_req_interco; logic [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_req_interco_valid; logic [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_req_interco_ready; - group_id_t [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_req_interco_tgt_sel; tcdm_master_resp_t [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_resp_interco; logic [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_resp_interco_valid; logic [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_resp_interco_ready; ->>>>>>> e17227cc... WIP:[MemPool-Spatz] `ifdef TERAPOOL - tile_remote_sel_t [NumCoresPerTile-1:0] remote_req_interco_tgt_sel; - group_id_t [NumCoresPerTile-1:0] remote_req_interco_tgt_g_sel_tmp; - sgroup_group_id_t [NumCoresPerTile-1:0] remote_req_interco_tgt_sg_sel_tmp; + tile_remote_sel_t [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_req_interco_tgt_sel; + group_id_t [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_req_interco_tgt_g_sel_tmp; + sgroup_group_id_t [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_req_interco_tgt_sg_sel_tmp; `else - group_id_t [NumCoresPerTile-1:0] remote_req_interco_tgt_sel; + group_id_t [NumCoresPerTile*NumDataPortsPerCore-1:0] remote_req_interco_tgt_sel; `endif stream_xbar #( -<<<<<<< HEAD - .NumInp (NumCoresPerTile ), - .NumOut (NumGroups+NumSubGroupsPerGroup-1), - .payload_t(tcdm_master_req_t ) -======= .NumInp (NumCoresPerTile*NumDataPortsPerCore ), - .NumOut (NumGroups ), + .NumOut (NumGroups+NumSubGroupsPerGroup-1 ), .payload_t(tcdm_master_req_t ) ->>>>>>> e17227cc... WIP:[MemPool-Spatz] ) i_remote_req_interco ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -603,15 +587,9 @@ module mempool_tile ); stream_xbar #( -<<<<<<< HEAD - .NumInp (NumGroups+NumSubGroupsPerGroup-1), - .NumOut (NumCoresPerTile ), - .payload_t(tcdm_master_resp_t ) -======= - .NumInp (NumGroups ), + .NumInp (NumGroups+NumSubGroupsPerGroup-1 ), .NumOut (NumCoresPerTile*NumDataPortsPerCore ), .payload_t(tcdm_master_resp_t ) ->>>>>>> e17227cc... WIP:[MemPool-Spatz] ) i_remote_resp_interco ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -641,33 +619,18 @@ module mempool_tile logic [NumCoresPerTile*NumDataPortsPerCore-1:0] local_resp_interco_ready; tcdm_slave_resp_t [NumCoresPerTile*NumDataPortsPerCore-1:0] local_resp_interco_payload; -<<<<<<< HEAD - logic [NumCoresPerTile+NumGroups+NumSubGroupsPerGroup-1-1:0][idx_width(NumBanksPerTile)-1:0] local_req_interco_tgt_sel; - for (genvar j = 0; unsigned'(j) < NumCoresPerTile; j++) begin: gen_local_req_interco_tgt_sel_local + logic [NumCoresPerTile*NumDataPortsPerCore+NumGroups+NumSubGroupsPerGroup-1-1:0][idx_width(NumBanksPerTile)-1:0] local_req_interco_tgt_sel; + for (genvar j = 0; unsigned'(j) < NumCoresPerTile*NumDataPortsPerCore; j++) begin: gen_local_req_interco_tgt_sel_local assign local_req_interco_tgt_sel[j] = local_req_interco_payload[j].tgt_addr[idx_width(NumBanksPerTile)-1:0]; end: gen_local_req_interco_tgt_sel_local for (genvar j = 0; unsigned'(j) < NumGroups+NumSubGroupsPerGroup-1; j++) begin: gen_local_req_interco_tgt_sel_remote - assign local_req_interco_tgt_sel[j + NumCoresPerTile] = postreg_tcdm_slave_req[j].tgt_addr[idx_width(NumBanksPerTile)-1:0]; + assign local_req_interco_tgt_sel[j + NumCoresPerTile*NumDataPortsPerCore] = postreg_tcdm_slave_req[j].tgt_addr[idx_width(NumBanksPerTile)-1:0]; end: gen_local_req_interco_tgt_sel_remote stream_xbar #( - .NumInp (NumCoresPerTile+NumGroups+NumSubGroupsPerGroup-1), + .NumInp (NumCoresPerTile*NumDataPortsPerCore+NumGroups+NumSubGroupsPerGroup-1), .NumOut (NumBanksPerTile ), .payload_t(tcdm_slave_req_t ) -======= - logic [NumCoresPerTile*NumDataPortsPerCore+NumGroups-1:0][idx_width(NumBanksPerTile)-1:0] local_req_interco_tgt_sel; - for (genvar j = 0; unsigned'(j) < NumCoresPerTile*NumDataPortsPerCore; j++) begin: gen_local_req_interco_tgt_sel_local - assign local_req_interco_tgt_sel[j] = local_req_interco_payload[j].tgt_addr[idx_width(NumBanksPerTile)-1:0]; - end: gen_local_req_interco_tgt_sel_local - for (genvar j = 0; unsigned'(j) < NumGroups; j++) begin: gen_local_req_interco_tgt_sel_remote - assign local_req_interco_tgt_sel[j + NumCoresPerTile*NumDataPortsPerCore] = postreg_tcdm_slave_req[j].tgt_addr[idx_width(NumBanksPerTile)-1:0]; - end: gen_local_req_interco_tgt_sel_remote - - stream_xbar #( - .NumInp (NumCoresPerTile*NumDataPortsPerCore+NumGroups), - .NumOut (NumBanksPerTile ), - .payload_t(tcdm_slave_req_t ) ->>>>>>> e17227cc... WIP:[MemPool-Spatz] ) i_local_req_interco ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -687,15 +650,9 @@ module mempool_tile ); stream_xbar #( -<<<<<<< HEAD - .NumInp (NumBanksPerTile ), - .NumOut (NumCoresPerTile+NumGroups+NumSubGroupsPerGroup-1), - .payload_t(tcdm_slave_resp_t ) -======= - .NumInp (NumBanksPerTile ), - .NumOut (NumCoresPerTile*NumDataPortsPerCore+NumGroups), - .payload_t(tcdm_slave_resp_t ) ->>>>>>> e17227cc... WIP:[MemPool-Spatz] + .NumInp (NumBanksPerTile ), + .NumOut (NumCoresPerTile*NumDataPortsPerCore+NumGroups+NumSubGroupsPerGroup-1), + .payload_t(tcdm_slave_resp_t ) ) i_local_resp_interco ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -750,38 +707,32 @@ module mempool_tile } }; + for (genvar c = 0; c < NumCoresPerTile; c++) begin: gen_core_mux -<<<<<<< HEAD - `ifdef TERAPOOL - // Remove tile index from local_req_interco_addr_int, since it will not be used for routing. - addr_t local_req_interco_addr_int; - assign local_req_interco_payload[c].tgt_addr = -======= for (genvar p = 0; p < NumDataPortsPerCore; p++) begin: gen_core_port_mux localparam int unsigned idx = NumDataPortsPerCore*c + p; +`ifdef TERAPOOL // Remove tile index from local_req_interco_addr_int, since it will not be used for routing. addr_t local_req_interco_addr_int; assign local_req_interco_payload[idx].tgt_addr = ->>>>>>> e17227cc... WIP:[MemPool-Spatz] tcdm_addr_t'({local_req_interco_addr_int[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumTiles) +: TCDMAddrMemWidth], // Bank address local_req_interco_addr_int[ByteOffset +: idx_width(NumBanksPerTile)]}); // Bank // Switch tile and bank indexes for correct upper level routing, and remove the group index addr_t prescramble_tcdm_req_tgt_addr; if (NumTilesPerGroup == 1) begin : gen_remote_req_interco_tgt_addr -<<<<<<< HEAD - assign remote_req_interco[c].tgt_addr = + assign remote_req_interco[idx].tgt_addr = tcdm_addr_t'({prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumGroups) +: TCDMAddrMemWidth], // Bank address prescramble_tcdm_req_tgt_addr[ByteOffset +: idx_width(NumBanksPerTile)]}); // Tile end else begin : gen_remote_req_interco_tgt_addr always_comb begin - if (remote_req_interco_tgt_g_sel_tmp[c] == 'b0) begin - remote_req_interco[c].tgt_addr = + if (remote_req_interco_tgt_g_sel_tmp[idx] == 'b0) begin + remote_req_interco[idx].tgt_addr = tcdm_addr_t'({prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumTilesPerGroup) + $clog2(NumGroups) +: TCDMAddrMemWidth], // Bank address prescramble_tcdm_req_tgt_addr[ByteOffset +: idx_width(NumBanksPerTile)], // Bank prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) +: $clog2(NumTilesPerSubGroup)]}); // Tile end else begin - remote_req_interco[c].tgt_addr = + remote_req_interco[idx].tgt_addr = tcdm_addr_t'({prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumTilesPerGroup) + $clog2(NumGroups) +: TCDMAddrMemWidth], // Bank address prescramble_tcdm_req_tgt_addr[ByteOffset +: idx_width(NumBanksPerTile)], // Bank prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) +: $clog2(NumTilesPerGroup)]}); // Tile @@ -792,173 +743,54 @@ module mempool_tile // Remote selection signal if (NumGroups == 1) begin : gen_remote_req_interco_tgt_sel if (NumSubGroupsPerGroup == 1) begin : gen_const_sel - assign remote_req_interco_tgt_sel[c] = 1'b0; + assign remote_req_interco_tgt_sel[idx] = 1'b0; end else begin : gen_const_sel - assign remote_req_interco_tgt_sel[c] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerSubGroup) +: $clog2(NumSubGroupsPerGroup)]) ^ sub_group_id; + assign remote_req_interco_tgt_sel[idx] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerSubGroup) +: $clog2(NumSubGroupsPerGroup)]) ^ sub_group_id; end end else begin : gen_remote_req_interco_tgt_sel // Output port depends on both the target and initiator group and sub-group if (NumSubGroupsPerGroup == 1) begin : gen_remote_group_sel - assign remote_req_interco_tgt_sel[c] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerGroup) +: $clog2(NumGroups)]) ^ group_id; + assign remote_req_interco_tgt_sel[idx] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerGroup) +: $clog2(NumGroups)]) ^ group_id; end else begin : gen_remote_group_sel - assign remote_req_interco_tgt_g_sel_tmp[c] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerGroup) +: $clog2(NumGroups)]) ^ group_id; - assign remote_req_interco_tgt_sg_sel_tmp[c] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerSubGroup) +: $clog2(NumSubGroupsPerGroup)]) ^ sub_group_id; + assign remote_req_interco_tgt_g_sel_tmp[idx] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerGroup) +: $clog2(NumGroups)]) ^ group_id; + assign remote_req_interco_tgt_sg_sel_tmp[idx] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerSubGroup) +: $clog2(NumSubGroupsPerGroup)]) ^ sub_group_id; always_comb begin : gen_remote_sub_group_sel - if (remote_req_interco_tgt_g_sel_tmp[c] == 'b0) begin: gen_local_group_sel - remote_req_interco_tgt_sel[c] = remote_req_interco_tgt_sg_sel_tmp[c]; + if (remote_req_interco_tgt_g_sel_tmp[idx] == 'b0) begin: gen_local_group_sel + remote_req_interco_tgt_sel[idx] = remote_req_interco_tgt_sg_sel_tmp[idx]; end else begin: gen_remote_group_sel - remote_req_interco_tgt_sel[c] = remote_req_interco_tgt_g_sel_tmp[c] + {(idx_width(NumSubGroupsPerGroup)){1'b1}}; + remote_req_interco_tgt_sel[idx] = remote_req_interco_tgt_g_sel_tmp[idx] + {(idx_width(NumSubGroupsPerGroup)){1'b1}}; end end end end - `else +`else // Remove tile index from local_req_interco_addr_int, since it will not be used for routing. addr_t local_req_interco_addr_int; - assign local_req_interco_payload[c].tgt_addr = + assign local_req_interco_payload[idx].tgt_addr = tcdm_addr_t'({local_req_interco_addr_int[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumTiles) +: TCDMAddrMemWidth], // Bank address local_req_interco_addr_int[ByteOffset +: idx_width(NumBanksPerTile)]}); // Bank // Switch tile and bank indexes for correct upper level routing, and remove the group index addr_t prescramble_tcdm_req_tgt_addr; if (NumTilesPerGroup == 1) begin : gen_remote_req_interco_tgt_addr - assign remote_req_interco[c].tgt_addr = - tcdm_addr_t'({prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumGroups) +: TCDMAddrMemWidth], // Bank address - prescramble_tcdm_req_tgt_addr[ByteOffset +: idx_width(NumBanksPerTile)]}); // Tile - end else begin : gen_remote_req_interco_tgt_addr - assign remote_req_interco[c].tgt_addr = -======= assign remote_req_interco[idx].tgt_addr = tcdm_addr_t'({prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumGroups) +: TCDMAddrMemWidth], // Bank address prescramble_tcdm_req_tgt_addr[ByteOffset +: idx_width(NumBanksPerTile)]}); // Tile end else begin : gen_remote_req_interco_tgt_addr assign remote_req_interco[idx].tgt_addr = ->>>>>>> e17227cc... WIP:[MemPool-Spatz] tcdm_addr_t'({prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) + $clog2(NumTilesPerGroup) + $clog2(NumGroups) +: TCDMAddrMemWidth], // Bank address prescramble_tcdm_req_tgt_addr[ByteOffset +: idx_width(NumBanksPerTile)], // Bank prescramble_tcdm_req_tgt_addr[ByteOffset + idx_width(NumBanksPerTile) +: $clog2(NumTilesPerGroup)]}); // Tile end if (NumGroups == 1) begin : gen_remote_req_interco_tgt_sel -<<<<<<< HEAD - assign remote_req_interco_tgt_sel[c] = 1'b0; - end else begin : gen_remote_req_interco_tgt_sel - // Output port depends on both the target and initiator group - assign remote_req_interco_tgt_sel[c] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerGroup) +: $clog2(NumGroups)]) ^ group_id; - end - `endif - - // We don't care about these - assign local_req_interco_payload[c].wdata.core_id = '0; - assign local_req_interco_payload[c].ini_addr = '0; - assign soc_data_q[c].id = '0; - - // Constant value - assign remote_req_interco[c].wdata.core_id = c[idx_width(NumCoresPerTile)-1:0]; - - // Scramble address before entering TCDM shim for sequential+interleaved memory map - addr_t snitch_data_qaddr_scrambled; - address_scrambler #( - .AddrWidth (AddrWidth ), - .ByteOffset (ByteOffset ), - .NumTiles (NumTiles ), - .NumBanksPerTile (NumBanksPerTile ), - .Bypass (0 ), - .SeqMemSizePerTile (SeqMemSizePerTile) - ) i_address_scrambler ( - .address_i (snitch_data_qaddr[c] ), - .address_o (snitch_data_qaddr_scrambled) - ); - - if (!TrafficGeneration) begin: gen_tcdm_shim - tcdm_shim #( - .AddrWidth (AddrWidth ), - .DataWidth (DataWidth ), - .MaxOutStandingTrans (snitch_pkg::NumIntOutstandingLoads), - .NrTCDM (2 ), - .NrSoC (1 ), - .NumRules (3 ) - ) i_tcdm_shim ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - // to TCDM --> FF Connection to outside of tile - .tcdm_req_valid_o ({local_req_interco_valid[c], remote_req_interco_valid[c]} ), - .tcdm_req_tgt_addr_o({local_req_interco_addr_int, prescramble_tcdm_req_tgt_addr} ), - .tcdm_req_wen_o ({local_req_interco_payload[c].wen, remote_req_interco[c].wen} ), - .tcdm_req_wdata_o ({local_req_interco_payload[c].wdata.data, remote_req_interco[c].wdata.data} ), - .tcdm_req_amo_o ({local_req_interco_payload[c].wdata.amo, remote_req_interco[c].wdata.amo} ), - .tcdm_req_id_o ({local_req_interco_payload[c].wdata.meta_id, remote_req_interco[c].wdata.meta_id} ), - .tcdm_req_be_o ({local_req_interco_payload[c].be, remote_req_interco[c].be} ), - .tcdm_req_ready_i ({local_req_interco_ready[c], remote_req_interco_ready[c]} ), - .tcdm_resp_valid_i ({local_resp_interco_valid[c], remote_resp_interco_valid[c]} ), - .tcdm_resp_ready_o ({local_resp_interco_ready[c], remote_resp_interco_ready[c]} ), - .tcdm_resp_rdata_i ({local_resp_interco_payload[c].rdata.data, remote_resp_interco[c].rdata.data} ), - .tcdm_resp_id_i ({local_resp_interco_payload[c].rdata.meta_id, remote_resp_interco[c].rdata.meta_id}), - // to SoC - .soc_qaddr_o (soc_data_q[c].addr ), - .soc_qwrite_o (soc_data_q[c].write ), - .soc_qamo_o (soc_data_q[c].amo ), - .soc_qdata_o (soc_data_q[c].data ), - .soc_qstrb_o (soc_data_q[c].strb ), - .soc_qvalid_o (soc_data_qvalid[c] ), - .soc_qready_i (soc_data_qready[c] ), - .soc_pdata_i (soc_data_p[c].data ), - .soc_pwrite_i (soc_data_p[c].write ), - .soc_perror_i (soc_data_p[c].error ), - .soc_pvalid_i (soc_data_pvalid[c] ), - .soc_pready_o (soc_data_pready[c] ), - // from core - .data_qaddr_i (snitch_data_qaddr_scrambled ), - .data_qwrite_i (snitch_data_qwrite[c] ), - .data_qamo_i (snitch_data_qamo[c] ), - .data_qdata_i (snitch_data_qdata[c] ), - .data_qstrb_i (snitch_data_qstrb[c] ), - .data_qid_i (snitch_data_qid[c] ), - .data_qvalid_i (snitch_data_qvalid[c] ), - .data_qready_o (snitch_data_qready[c] ), - .data_pdata_o (snitch_data_pdata[c] ), - .data_perror_o (snitch_data_perror[c] ), - .data_pid_o (snitch_data_pid[c] ), - .data_pvalid_o (snitch_data_pvalid[c] ), - .data_pready_i (snitch_data_pready[c] ), - .address_map_i (mask_map ) - ); - end else begin: gen_traffic_generator - traffic_generator #( - .NumRules (3 ), - .TCDMBaseAddr (TCDMBaseAddr ), - .MaxOutStandingReads(snitch_pkg::NumIntOutstandingLoads) - ) i_traffic_gen ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), - .core_id_i ({tile_id_i, c[idx_width(NumCoresPerTile)-1:0]} ), - // Address map - .address_map_i (mask_map ), - // To TCDM - .tcdm_req_valid_o ({local_req_interco_valid[c], remote_req_interco_valid[c]} ), - .tcdm_req_tgt_addr_o({local_req_interco_addr_int, prescramble_tcdm_req_tgt_addr} ), - .tcdm_req_wen_o ({local_req_interco_payload[c].wen, remote_req_interco[c].wen}), - .tcdm_req_wdata_o ({local_req_interco_payload[c].wdata.data, - remote_req_interco[c].wdata.data}), - .tcdm_req_amo_o({local_req_interco_payload[c].wdata.amo, - remote_req_interco[c].wdata.amo}), - .tcdm_req_id_o({local_req_interco_payload[c] - .wdata.meta_id, remote_req_interco[c].wdata.meta_id}), - .tcdm_req_be_o ({local_req_interco_payload[c].be, remote_req_interco[c].be}), - .tcdm_req_ready_i ({local_req_interco_ready[c], remote_req_interco_ready[c]} ), - .tcdm_resp_valid_i({local_resp_interco_valid[c], remote_resp_interco_valid[c]}), - .tcdm_resp_ready_o({local_resp_interco_ready[c], remote_resp_interco_ready[c]}), - .tcdm_resp_rdata_i({local_resp_interco_payload[c].rdata.data, - remote_resp_interco[c].rdata.data} ), - .tcdm_resp_id_i ({local_resp_interco_payload[c].rdata.meta_id, - remote_resp_interco[c].rdata.meta_id}) -======= assign remote_req_interco_tgt_sel[idx] = 1'b0; end else begin : gen_remote_req_interco_tgt_sel // Output port depends on both the target and initiator group assign remote_req_interco_tgt_sel[idx] = (prescramble_tcdm_req_tgt_addr[ByteOffset + $clog2(NumBanksPerTile) + $clog2(NumTilesPerGroup) +: $clog2(NumGroups)]) ^ group_id; end +`endif - // We don't care about these + // We don't care about these assign local_req_interco_payload[idx].wdata.core_id = idx; assign local_req_interco_payload[idx].ini_addr = '0; assign soc_data_q[idx].id = '0; @@ -979,7 +811,6 @@ module mempool_tile ) i_address_scrambler ( .address_i (snitch_data_qaddr[c][p] ), .address_o (snitch_data_qaddr_scrambled) ->>>>>>> e17227cc... WIP:[MemPool-Spatz] ); if (!TrafficGeneration) begin: gen_tcdm_shim @@ -991,11 +822,11 @@ module mempool_tile .NrSoC (1 ), .NumRules (3 ) ) i_tcdm_shim ( - .clk_i (clk_i ), - .rst_ni (rst_ni ), + .clk_i (clk_i ), + .rst_ni (rst_ni ), // to TCDM --> FF Connection to outside of tile .tcdm_req_valid_o ({local_req_interco_valid[idx], remote_req_interco_valid[idx]} ), - .tcdm_req_tgt_addr_o({local_req_interco_addr_int, prescramble_tcdm_req_tgt_addr} ), + .tcdm_req_tgt_addr_o({local_req_interco_addr_int, prescramble_tcdm_req_tgt_addr} ), .tcdm_req_wen_o ({local_req_interco_payload[idx].wen, remote_req_interco[idx].wen} ), .tcdm_req_wdata_o ({local_req_interco_payload[idx].wdata.data, remote_req_interco[idx].wdata.data} ), .tcdm_req_amo_o ({local_req_interco_payload[idx].wdata.amo, remote_req_interco[idx].wdata.amo} ), @@ -1020,20 +851,20 @@ module mempool_tile .soc_pvalid_i (soc_data_pvalid[idx] ), .soc_pready_o (soc_data_pready[idx] ), // from core - .data_qaddr_i (snitch_data_qaddr_scrambled ), - .data_qwrite_i (snitch_data_qwrite[c][p] ), - .data_qamo_i (snitch_data_qamo[c][p] ), - .data_qdata_i (snitch_data_qdata[c][p] ), - .data_qstrb_i (snitch_data_qstrb[c][p] ), - .data_qid_i (snitch_data_qid[c][p] ), - .data_qvalid_i (snitch_data_qvalid[c][p] ), - .data_qready_o (snitch_data_qready[c][p] ), - .data_pdata_o (snitch_data_pdata[c][p] ), - .data_perror_o (snitch_data_perror[c][p] ), - .data_pid_o (snitch_data_pid[c][p] ), - .data_pvalid_o (snitch_data_pvalid[c][p] ), - .data_pready_i (snitch_data_pready[c][p] ), - .address_map_i (mask_map ) + .data_qaddr_i (snitch_data_qaddr_scrambled ), + .data_qwrite_i (snitch_data_qwrite[c][p] ), + .data_qamo_i (snitch_data_qamo[c][p] ), + .data_qdata_i (snitch_data_qdata[c][p] ), + .data_qstrb_i (snitch_data_qstrb[c][p] ), + .data_qid_i (snitch_data_qid[c][p] ), + .data_qvalid_i (snitch_data_qvalid[c][p] ), + .data_qready_o (snitch_data_qready[c][p] ), + .data_pdata_o (snitch_data_pdata[c][p] ), + .data_perror_o (snitch_data_perror[c][p] ), + .data_pid_o (snitch_data_pid[c][p] ), + .data_pvalid_o (snitch_data_pvalid[c][p] ), + .data_pready_i (snitch_data_pready[c][p] ), + .address_map_i (mask_map ) ); end else begin: gen_traffic_generator traffic_generator #( diff --git a/hardware/tb/mempool_tb.sv b/hardware/tb/mempool_tb.sv index 3261ff3dc..ad4e4f2a7 100644 --- a/hardware/tb/mempool_tb.sv +++ b/hardware/tb/mempool_tb.sv @@ -350,6 +350,7 @@ module mempool_tb; **************************************/ `ifndef TARGET_SYNTHESIS `ifndef TARGET_VERILATOR +`ifndef POSTLAYOUT // Cores logic [NumCores-1:0] instruction_handshake, lsu_request, lsu_handshake; @@ -357,29 +358,68 @@ module mempool_tb; assign snitch_utilization = $countones(instruction_handshake); assign lsu_utilization = $countones(lsu_handshake); assign lsu_pressure = $countones(lsu_request); - for (genvar g = 0; g < NumGroups; g++) begin - for (genvar t = 0; t < NumTilesPerGroup; t++) begin - for (genvar c = 0; c < NumCoresPerTile; c++) begin - logic valid_instr, stall; - logic lsu_valid, lsu_ready; - // Snitch - assign valid_instr = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.valid_instr; - assign stall = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.stall; - assign instruction_handshake[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = valid_instr & !stall; - // Interconnect - assign lsu_valid = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qvalid_o; - assign lsu_ready = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qready_i; - assign lsu_request[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = lsu_valid & !lsu_ready; - assign lsu_handshake[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = lsu_valid & lsu_ready; + `ifdef TERAPOOL + for (genvar g = 0; g < NumGroups; g++) begin + for (genvar sg = 0; sg < NumSubGroupsPerGroup; sg++) begin + for (genvar t = 0; t < NumTilesPerSubGroup; t++) begin + for (genvar c = 0; c < NumCoresPerTile; c++) begin + logic valid_instr, stall; + logic lsu_valid, lsu_ready; + // Snitch + assign valid_instr = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.valid_instr; + assign stall = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.stall; + assign instruction_handshake[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = valid_instr & !stall; + // Interconnect + assign lsu_valid = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qvalid_o; + assign lsu_ready = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qready_i; + assign lsu_request[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = lsu_valid & !lsu_ready; + assign lsu_handshake[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = lsu_valid & lsu_ready; + end + end end end - end + `else + for (genvar g = 0; g < NumGroups; g++) begin + for (genvar t = 0; t < NumTilesPerGroup; t++) begin + for (genvar c = 0; c < NumCoresPerTile; c++) begin + logic valid_instr, stall; + logic lsu_valid, lsu_ready; + // Snitch + assign valid_instr = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.valid_instr; + assign stall = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.stall; + assign instruction_handshake[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = valid_instr & !stall; + // Interconnect + assign lsu_valid = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qvalid_o; + assign lsu_ready = dut.i_mempool_cluster.gen_groups[g].i_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch.data_qready_i; + assign lsu_request[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = lsu_valid & !lsu_ready; + assign lsu_handshake[g*NumTilesPerGroup*NumCoresPerTile+t*NumCoresPerTile+c] = lsu_valid & lsu_ready; + end + end + end + `endif + // DSPU if (snitch_pkg::XPULPIMG) begin: gen_utilization logic [NumCores-1:0] dspu_handshake, dspu_mac; int unsigned dspu_utilization, mac_utilization; assign dspu_utilization = $countones(dspu_handshake); assign mac_utilization = $countones(dspu_mac); + `ifdef TERAPOOL + for (genvar g = 0; g < NumGroups; g++) begin + for (genvar sg = 0; sg < NumSubGroupsPerGroup; sg++) begin + for (genvar t = 0; t < NumTilesPerSubGroup; t++) begin + for (genvar c = 0; c < NumCoresPerTile; c++) begin + logic dsp_valid, dsp_ready, mac; + assign dsp_valid = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch_ipu.gen_xpulpimg.i_dspu.in_valid_i; + assign dsp_ready = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch_ipu.gen_xpulpimg.i_dspu.in_ready_o; + assign mac = dut.i_mempool_cluster.gen_groups[g].gen_rtl_group.i_group.gen_sub_groups[sg].gen_rtl_sg.i_sub_group.gen_tiles[t].i_tile.gen_cores[c].gen_mempool_cc.riscv_core.i_snitch_ipu.gen_xpulpimg.i_dspu.operator_i ==? riscv_instr::P_MAC; + assign dspu_handshake[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = dsp_valid & dsp_ready; + assign dspu_mac[g*NumSubGroupsPerGroup*NumTilesPerSubGroup*NumCoresPerTile + sg*NumTilesPerSubGroup*NumCoresPerTile + t*NumCoresPerTile + c] = dsp_valid & dsp_ready & mac; + end + end + end + end + `else for (genvar g = 0; g < NumGroups; g++) begin for (genvar t = 0; t < NumTilesPerGroup; t++) begin for (genvar c = 0; c < NumCoresPerTile; c++) begin @@ -392,7 +432,9 @@ module mempool_tb; end end end + `endif end + // AXI logic [NumGroups*NumAXIMastersPerGroup-1:0] w_valid, w_ready, r_ready, r_valid; int unsigned axi_w_utilization, axi_r_utilization; @@ -407,5 +449,6 @@ module mempool_tb; `endif `endif +`endif endmodule : mempool_tb diff --git a/software/apps/sp-axpy/main.c b/software/apps/sp-axpy/main.c index d8897b1d8..af1845e3e 100644 --- a/software/apps/sp-axpy/main.c +++ b/software/apps/sp-axpy/main.c @@ -159,7 +159,7 @@ int main() { int error = verify_matrix(y, r, esize, cid); printf("Errors: %d\n", error); if (error != 0) - return error + return error; } // Wait for core 0 to finish displaying results