diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 21b7a01d..a086edc7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -55,6 +55,9 @@ tests: PARAM1: [-GN=1, -GN=2, -GN=3, -GN=4, -GN=8, -GN=16] - TOPLEVEL: fifo_tb PARAM1: [-GDEPTH=1, -GDEPTH=13, -GDEPTH=32 -GFALL_THROUGH=1] + - TOPLEVEL: mem_multibank_pwrgate_tb + PARAM1: [-gNumLogicBanks=1, -gNumLogicBanks=2, -gNumLogicBanks=4, -gNumLogicBanks=8] + PARAM2: [-gLatency=0, -gLatency=1, -gLatency=2] # - TOPLEVEL: [cdc_2phase_tb, cdc_2phase_clearable_tb] # PARAM1: -GUNTIL=1000000 # - TOPLEVEL: cdc_fifo_tb diff --git a/Bender.yml b/Bender.yml index f5a5a21b..6e065cdf 100644 --- a/Bender.yml +++ b/Bender.yml @@ -46,6 +46,7 @@ sources: - src/lfsr_8bit.sv - src/lossy_valid_to_stream.sv - src/mv_filter.sv + - src/mem_multibank_pwrgate.sv - src/onehot_to_bin.sv - src/plru_tree.sv - src/passthrough_stream_fifo.sv @@ -129,6 +130,7 @@ sources: - test/fifo_tb.sv - test/graycode_tb.sv - test/id_queue_tb.sv + - test/mem_multibank_pwrgate_tb.sv - test/passthrough_stream_fifo_tb.sv - test/popcount_tb.sv - test/rr_arb_tree_tb.sv diff --git a/README.md b/README.md index 6c65d6a5..d846894e 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,7 @@ Please note that cells with status *deprecated* are not to be used for new desig | `popcount` | Combinatorial popcount (hamming weight) | active | | | `mem_to_banks_detailed` | Split memory access over multiple parallel banks with detailed response signals | active | | | `mem_to_banks` | Split memory access over multiple parallel banks | active | | +| `mem_multibank_pwrgate` | Power-aware wrapper for memory bank with bank retention and power-off capabilities | active | | ### Data Structures diff --git a/common_cells.core b/common_cells.core index 7be80e26..c5a8be71 100644 --- a/common_cells.core +++ b/common_cells.core @@ -30,6 +30,7 @@ filesets: - src/lfsr_8bit.sv - src/multiaddr_decode.sv - src/mv_filter.sv + - src/mem_multibank_pwrgate.sv - src/onehot_to_bin.sv - src/plru_tree.sv - src/passthrough_stream_fifo.sv diff --git a/lint/common_cells.style.waiver b/lint/common_cells.style.waiver index 570a6656..6773e4f1 100644 --- a/lint/common_cells.style.waiver +++ b/lint/common_cells.style.waiver @@ -4,6 +4,8 @@ waive --rule=typedef-structs-unions --line=29 --location="src/ecc_encode.sv" # That is a known issue with string parameter in Synopsys DC waive --rule=explicit-parameter-storage-type --line=19 --location="src/stream_arbiter.sv" waive --rule=explicit-parameter-storage-type --line=19 --location="src/stream_arbiter_flushable.sv" +waive --rule=explicit-parameter-storage-type --line=29 --location="src/mem_multibank_pwrgate.sv" +waive --rule=explicit-parameter-storage-type --line=31 --location="src/mem_multibank_pwrgate.sv" waive --rule=always-ff-non-blocking --line=290 --location="src/clk_int_div.sv" waive --rule=always-ff-non-blocking --line=293 --location="src/clk_int_div.sv" waive --rule=always-ff-non-blocking --line=302 --location="src/clk_int_div.sv" diff --git a/src/mem_multibank_pwrgate.sv b/src/mem_multibank_pwrgate.sv new file mode 100644 index 00000000..55ff5e7c --- /dev/null +++ b/src/mem_multibank_pwrgate.sv @@ -0,0 +1,198 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Lorenzo Leone + +// ## Description: +// A wrapper for `tc_sram_impl` that instantiates logic banks with retention mode +// or power-off capability. +// This module can be used for power-aware simulations, with control signals driven +// directly by UPF signals. +// +// ## Goal: +// In a memory with multiple banks that support power gating and retention, +// each bank’s addressing must ensure that interleaving remains intact. During retention +// or power-off states, only contiguous addresses should be switched. +// The memory should always appear as a set of contiguous addresses, with no gaps in the +// address mapping. +// This module is responsible for managing the correct memory addressing +// +`include "common_cells/assertions.svh" +module mem_multibank_pwrgate #( + parameter int unsigned NumWords = 32'd1024, // Number of Words in data array + parameter int unsigned DataWidth = 32'd128, // Data signal width + parameter int unsigned ByteWidth = 32'd8, // Width of a data byte + parameter int unsigned NumPorts = 32'd2, // Number of read and write ports + parameter int unsigned Latency = 32'd1, // Latency when the read data is available + parameter int unsigned NumLogicBanks = 32'd1, // Logic bank for Power Management + parameter SimInit = "none", // Simulation initialization + parameter bit PrintSimCfg = 1'b0, // Print configuration + parameter ImplKey = "none", // Reference to specific implementation + // DEPENDENT PARAMETERS, DO NOT OVERWRITE! + parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1, + parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div + parameter type addr_t = logic [AddrWidth-1:0], + parameter type data_t = logic [DataWidth-1:0], + parameter type be_t = logic [BeWidth-1:0] +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // input ports + input logic [ NumPorts-1:0] req_i, // request + input logic [ NumPorts-1:0] we_i, // write enable + input addr_t [ NumPorts-1:0] addr_i, // request address + input data_t [ NumPorts-1:0] wdata_i, // write data + input be_t [ NumPorts-1:0] be_i, // write byte enable + input logic [NumLogicBanks-1:0] deepsleep_i, // deep sleep enable + input logic [NumLogicBanks-1:0] powergate_i, // power gate enable + // output ports + output data_t [ NumPorts-1:0] rdata_o // read data +); + + // Implementation type for Power Gating and Deppesleep ports + typedef struct packed { + logic deepsleep; + logic powergate; + } impl_in_t; + + + if (NumLogicBanks == 32'd0) begin : gen_no_logic_bank + $fatal("Error: %d logic banks are not supported", NumLogicBanks); + end else if (NumLogicBanks == 32'd1) begin : gen_simple_sram + tc_sram_impl #( + .NumWords (NumWords), + .DataWidth (DataWidth), + .ByteWidth (ByteWidth), + .NumPorts (NumPorts), + .Latency (Latency), + .SimInit (SimInit), + .PrintSimCfg(PrintSimCfg), + .ImplKey (ImplKey), + .impl_in_t (impl_in_t), + .impl_out_t (impl_in_t) + ) i_tc_sram_impl ( + .clk_i, + .rst_ni, + .impl_i({deepsleep_i, powergate_i}), + .impl_o(), + .req_i, + .we_i, + .addr_i, + .wdata_i, + .be_i, + .rdata_o + ); + + end else begin : gen_logic_bank // block: gen_simple_sram + localparam int unsigned LogicBankSize = NumWords / NumLogicBanks; + localparam int unsigned BankSelWidth = (NumLogicBanks > 32'd1) ? + $clog2(NumLogicBanks) : 32'd1; + + if (LogicBankSize != 2 ** (AddrWidth - BankSelWidth)) + $error("Logic Bank size is not a power of two: UNSUPPORTED!"); + + // Signals from/to logic banks + logic [NumLogicBanks-1:0][NumPorts-1:0] req_cut; + logic [NumLogicBanks-1:0][NumPorts-1:0] we_cut; + logic [NumLogicBanks-1:0][NumPorts-1:0][AddrWidth-BankSelWidth-1:0] addr_cut; + data_t [NumLogicBanks-1:0][NumPorts-1:0] wdata_cut; + be_t [NumLogicBanks-1:0][NumPorts-1:0] be_cut; + data_t [NumLogicBanks-1:0][NumPorts-1:0] rdata_cut; + + // Signals to select the right bank + logic [NumPorts-1:0][BankSelWidth-1:0] bank_sel; + + // Identify bank looking at the BankSelWidth-th MSBs of the Address + for (genvar PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin : gen_bank_sel + assign bank_sel[PortIdx] = addr_i[PortIdx][AddrWidth-1-:BankSelWidth]; + end + + // Read Data Mux Logic: + // + // If the memory has Latency != 0, the read data will arive after a certain delay. + // During this time, the bank_select signal must be stored in order to + // correctly select the output bank after the expected latency. + if (Latency == 32'd0) begin : gen_no_latency + for (genvar PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin : gen_read_mux_signals + assign rdata_o[PortIdx] = rdata_cut[bank_sel[PortIdx]][PortIdx]; + end + end else begin : gen_read_latency + // Define input/output registers to hold the read value + logic [NumPorts-1:0][Latency-1:0][BankSelWidth-1:0] out_mux_sel_d, out_mux_sel_q; + + always_comb begin + for (int PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin : gen_read_mux_signals + rdata_o[PortIdx] = rdata_cut[out_mux_sel_q[PortIdx][0]][PortIdx]; + for (int shift_idx = 0; shift_idx < (Latency - 1); shift_idx++) begin : gen_shift + out_mux_sel_d[PortIdx][shift_idx] = out_mux_sel_q[PortIdx][shift_idx+1]; + end + out_mux_sel_d[PortIdx][Latency-1] = bank_sel[PortIdx]; + end + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + out_mux_sel_q <= '0; + end else begin + out_mux_sel_q <= out_mux_sel_d; + end + end + end : gen_read_latency + + // Write data Mux Logic + // + for (genvar BankIdx = 0; BankIdx < NumLogicBanks; BankIdx++) begin : gen_logic_bank + for (genvar PortIdx = 0; PortIdx < NumPorts; PortIdx++) begin: gen_port_write_logic + // DEMUX the input signals to the correct logic bank + // Assign req channel to the correct logic bank + assign req_cut[BankIdx][PortIdx] = req_i[PortIdx] && (bank_sel[PortIdx] == BankIdx); + // Assign lowest part of the address to the correct logic bank + assign addr_cut[BankIdx][PortIdx] = req_cut[BankIdx][PortIdx] ? + addr_i[PortIdx][AddrWidth-BankSelWidth-1:0] : '0; + // Assign data to the correct logic bank + assign wdata_cut[BankIdx][PortIdx] = req_cut[BankIdx][PortIdx] ? wdata_i[PortIdx] : '0; + assign we_cut[BankIdx][PortIdx] = req_cut[BankIdx][PortIdx] ? we_i[PortIdx] : '0; + assign be_cut[BankIdx][PortIdx] = req_cut[BankIdx][PortIdx] ? be_i[PortIdx] : '0; + end + + tc_sram_impl #( + .NumWords (LogicBankSize), + .DataWidth (DataWidth), + .ByteWidth (ByteWidth), + .NumPorts (NumPorts), + .Latency (Latency), + .SimInit (SimInit), + .PrintSimCfg(PrintSimCfg), + .ImplKey (ImplKey), + .impl_in_t (impl_in_t), + .impl_out_t (impl_in_t) + ) i_tc_sram_impl ( + .clk_i, + .rst_ni, + .impl_i ({deepsleep_i[BankIdx], powergate_i[BankIdx]}), + .impl_o (), + .req_i (req_cut[BankIdx]), + .we_i (we_cut[BankIdx]), + .addr_i (addr_cut[BankIdx]), + .wdata_i(wdata_cut[BankIdx]), + .be_i (be_cut[BankIdx]), + .rdata_o(rdata_cut[BankIdx]) + ); + end : gen_logic_bank + end + + // Trigger warnings when power signals (deepsleep_i and powergate_i) are not connected. + // Usually those signals must be linked through the UPF. +`ifndef VERILATOR +`ifndef SYNTHESIS + initial begin + assert (!$isunknown(deepsleep_i)) + else $warning("deepsleep_i has some unconnected signals"); + assert (!$isunknown(powergate_i)) + else $warning("powergate_i has some unconnected signals"); + end +`endif +`endif + +endmodule //endmodule: mem_multibank_pwrgate diff --git a/test/mem_multibank_pwrgate_tb.sv b/test/mem_multibank_pwrgate_tb.sv new file mode 100644 index 00000000..5266e7bc --- /dev/null +++ b/test/mem_multibank_pwrgate_tb.sv @@ -0,0 +1,212 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Wolfgang Roenninger , ETH Zurich +// +// ## Description: +// Test to address the multibanked powergated SRAM and checlk correct address handling. + +module mem_multibank_pwrgate_tb #( + parameter int unsigned NumPorts = 32'd1, + parameter int unsigned Latency = 32'd1, + parameter int unsigned NumWords = 32'd1024, + parameter int unsigned DataWidth = 32'd64, + parameter int unsigned ByteWidth = 32'd8, + parameter int unsigned NoReq = 32'd200000, + parameter int unsigned NumLogicBanks = 32'd1, + parameter string SimInit = "zeros", + parameter time CyclTime = 10ns, + parameter time ApplTime = 2ns, + parameter time TestTime = 8ns +); + + //----------------------------------- + // Clock generator + //----------------------------------- + logic clk, rst_n; + clk_rst_gen #( + .ClkPeriod (CyclTime), + .RstClkCycles(5) + ) i_clk_gen ( + .clk_o (clk), + .rst_no(rst_n) + ); + + logic [NumPorts-1:0] done; + + localparam int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1; + localparam int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth; + + typedef logic [AddrWidth-1:0] addr_t; + typedef logic [DataWidth-1:0] data_t; + typedef logic [BeWidth-1:0] be_t; + + // signal declarations for each sram + logic [NumPorts-1:0] req, we; + addr_t [NumPorts-1:0] addr; + data_t [NumPorts-1:0] wdata, rdata; + be_t [NumPorts-1:0] be; + + // golden model + data_t memory [NumWords-1:0]; + longint unsigned failed_test; + + // This process drives the requests on the port with random data. + for (genvar i = 0; i < NumPorts; i++) begin : gen_stimuli + initial begin : proc_drive_port + automatic logic stim_write; + automatic addr_t stim_addr; + automatic data_t stim_data; + automatic be_t stim_be; + + done[i] <= 1'b0; + req[i] <= 1'b0; + we[i] <= 1'b0; + addr[i] <= addr_t'(0); + wdata[i] <= data_t'(0); + be[i] <= be_t'(0); + + @(posedge rst_n); + repeat (10) @(posedge clk); + + for (int unsigned j = 0; j < NoReq; j++) begin + stim_write = bit'($urandom()); + for (int unsigned k = 0; k < AddrWidth; k++) begin + stim_addr[k] = bit'($urandom()); + end + // this statement makes sure that only valid addresses are in a request + while (stim_addr >= NumWords) begin + for (int unsigned k = 0; k < AddrWidth; k++) begin + stim_addr[k] = bit'($urandom()); + end + end + for (int unsigned k = 0; k < DataWidth; k++) begin + stim_data[k] = bit'($urandom()); + end + for (int unsigned k = 0; k < BeWidth; k++) begin + stim_be[k] = bit'($urandom()); + end + + req[i] <= #ApplTime 1'b1; + we[i] <= #ApplTime stim_write; + addr[i] <= #ApplTime stim_addr; + wdata[i] <= #ApplTime stim_data; + be[i] <= #ApplTime stim_be; + @(posedge clk); + req[i] <= #ApplTime 1'b0; + we[i] <= #ApplTime 1'b0; + addr[i] <= #ApplTime addr_t'(0); + wdata[i] <= #ApplTime data_t'(0); + be[i] <= #ApplTime be_t'(0); + + repeat ($urandom_range(0, 5)) @(posedge clk); + end + done[i] <= 1'b1; + end + end + + // This process controls the golden model + // - The memory array is initialized according to the parameter + // - Data is written exactly at the clock edge, if there is a write request on a port. + // - At `TestTime` a process is launched on read requests which lives for `Latency` cycles. + // This process asserts the expected read output at `TestTime` in the respective cycle. + initial begin : proc_golden_model + failed_test = 0; + for (int unsigned i = 0; i < NumWords; i++) begin + for (int unsigned j = 0; j < DataWidth; j++) begin + case (SimInit) + "zeros": memory[i][j] = 1'b0; + "ones": memory[i][j] = 1'b1; + default: memory[i][j] = 1'bx; + endcase + end + end + + @(posedge rst_n); + + forever begin + @(posedge clk); + // writes get latched at clock in golden model array + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req[i] && we[i]) begin + for (int unsigned j = 0; j < DataWidth; j++) begin + if (be[i][j/ByteWidth]) begin + memory[addr[i]][j] = wdata[i][j]; + end + end + end + end + + // read test process is launched at `TestTime` + #TestTime; + fork + for (int unsigned i = 0; i < NumPorts; i++) begin + check_read(i, addr[i]); + end + join_none + end + end + + // Read test process. This task lives for a number of cycles determined by `Latency`. + task automatic check_read(input int unsigned port, input addr_t read_addr); + // only continue if there is a read request at this port + if (req[port] && !we[port]) begin + data_t exp_data = memory[read_addr]; + + if (Latency > 0) begin + repeat (Latency) @(posedge clk); + #TestTime; + end + + for (int unsigned i = 0; i < DataWidth; i++) begin + if (!$isunknown(exp_data[i])) begin + assert (exp_data[i] === rdata[port][i]) + else begin + $warning("Port: %0d unexpected bit[%0h], Addr: %0h expected: %0h, measured: %0h", + port, i, read_addr, exp_data[i], rdata[port][i]); + failed_test++; + end + end + end + end + endtask : check_read + + // Stop the simulation at the end. + initial begin : proc_stop + @(posedge rst_n); + wait (&done); + repeat (10) @(posedge clk); + $info("Simulation done, errors: %0d", failed_test); + $stop(); + end + + mem_multibank_pwrgate #( + .NumWords (NumWords), // Number of Words in data array + .DataWidth (DataWidth), // Data signal width + .ByteWidth (ByteWidth), // Width of a data byte + .NumPorts (NumPorts), // Number of read and write ports + .Latency (Latency), // Latency when the read data is available + .NumLogicBanks(NumLogicBanks), // Number of Logic Banks for power gating/retention + .SimInit (SimInit), // Simulation initialization + .PrintSimCfg (1'b1) // Print configuration + ) i_tc_sram_dut ( + .clk_i (clk), // Clock + .rst_ni (rst_n), // Asynchronous reset active low + .req_i (req), // request + .we_i (we), // write enable + .addr_i (addr), // request address + .wdata_i (wdata), // write data + .be_i (be), // write byte enable + .deepsleep_i('0), // Tied to zero to suppress Warnings + .powergate_i('0), // Tied to zero to suppress Warnings + .rdata_o (rdata) // read data + ); + +endmodule diff --git a/test/simulate.sh b/test/simulate.sh index e0544fe9..a15d578b 100755 --- a/test/simulate.sh +++ b/test/simulate.sh @@ -23,6 +23,21 @@ call_vsim() { grep "Errors: 0," vsim.log } +for PORTS in 1 2; do + for LATENCY in 0 1 2; do + for WORDS in 16 256 512 1024; do + for DWIDTH in 1 42 64; do + for BYTEWIDTH in 1 8 9; do + for BANKS in 1 2 4 8; do + call_vsim mem_multibank_pwrgate_tb -gNumPorts=$PORTS -gLatency=$LATENCY -gNumWords=$WORDS -gDataWidth=$DWIDTH -gByteWidth=$BYTEWIDTH -gNumLogicBanks=$BANKS + done + done + done + done + done +done + + #call_vsim cdc_fifo_tb # currently broken for tb in cdc_2phase_tb fifo_tb graycode_tb id_queue_tb popcount_tb stream_register_tb addr_decode_tb; do call_vsim $tb