Skip to content

Commit

Permalink
Changed s_rdma_qp_ctx to transport aes-key, compression- and dpi-bit …
Browse files Browse the repository at this point in the history
…to the roce_stack
  • Loading branch information
Maximilian committed Nov 15, 2024
1 parent e07602a commit bc1fb17
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 25 deletions.
53 changes: 51 additions & 2 deletions hw/hdl/network/rdma/rdma_mux_retrans.sv
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ logic [0:0] state_C, state_N;

logic rd_C, rd_N;
logic actv_C, actv_N;
logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N;
logic [LEN_BITS-BEAT_LOG_BITS:0] cnt_C, cnt_N, cnt_ddr_wr;

logic tr_done;

Expand Down Expand Up @@ -274,6 +274,22 @@ always_comb begin: DP
endcase
end

// Counting the outgoing data transmissions to the retrans buffer
always_ff @ (posedge aclk) begin

if(aresetn == 1'b0) begin
cnt_ddr_wr <= 1'b0;
end else begin
if(s_req_net.valid) begin
// Once a new command comes in, set the transmission counter to the length transmitted via the command interface
cnt_ddr_wr <= s_req_net.data.len[LEN_BITS-1:0]/64;
end else begin
// Decrement the counter with every successfull write to the retrans-memory
cnt_ddr_wr <= (axis_ddr_wr.tvalid & axis_ddr_wr.tready) ? (cnt_ddr_wr-1) : cnt_ddr_wr;
end
end
end

// Mux
always_comb begin
if(state_C == ST_MUX) begin
Expand Down Expand Up @@ -320,12 +336,45 @@ assign axis_net.tlast = actv_C ? (rd_C ? s_axis_user_rsp.tlast : s_axis_user_req

assign axis_ddr_wr.tdata = s_axis_user_req.tdata;
assign axis_ddr_wr.tkeep = s_axis_user_req.tkeep;
assign axis_ddr_wr.tlast = s_axis_user_req.tlast;
assign axis_ddr_wr.tlast = (cnt_ddr_wr == 1);

//
// DEBUG
//

/* ila_retrans inst_ila_retrans (
.clk(aclk),
.probe0(s_req_net.valid),
.probe1(s_req_net.data), // 128
.probe2(s_req_net.ready),
.probe3(s_axis_user_req.tvalid),
.probe4(s_axis_user_req.tdata), // 512
.probe5(s_axis_user_req.tkeep), // 64
.probe6(s_axis_user_req.tready),
.probe7(s_axis_user_req.tlast),
.probe8(m_axis_net.tvalid),
.probe9(m_axis_net.tdata), // 512
.probe10(m_axis_net.tkeep), // 64
.probe11(m_axis_net.tready),
.probe12(m_axis_net.tlast),
.probe13(m_req_ddr_wr.valid),
.probe14(m_req_ddr_wr.data), // 128
.probe15(m_req_ddr_wr.ready),
.probe16(m_axis_ddr.tvalid),
.probe17(m_axis_ddr.tdata), // 512
.probe18(m_axis_ddr.tkeep), // 64
.probe19(m_axis_ddr.tready),
.probe20(m_axis_ddr.tlast),
.probe21(seq_snk_valid),
.probe22(seq_snk_ready),
.probe23(rd_snk),
.probe24(actv_snk),
.probe25(cnt_C), // 26
.probe26(state_C),
.probe27(cnt_ddr_wr), // 26
.probe28(tr_done)
); */

/*
create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_retrans
set_property -dict [list CONFIG.C_PROBE29_WIDTH {22} CONFIG.C_PROBE23_WIDTH {28} CONFIG.C_NUM_OF_PROBES {35} CONFIG.Component_Name {ila_retrans} CONFIG.C_EN_STRG_QUAL {1} CONFIG.C_PROBE34_MU_CNT {2} CONFIG.C_PROBE33_MU_CNT {2} CONFIG.C_PROBE32_MU_CNT {2} CONFIG.C_PROBE31_MU_CNT {2} CONFIG.C_PROBE30_MU_CNT {2} CONFIG.C_PROBE29_MU_CNT {2} CONFIG.C_PROBE28_MU_CNT {2} CONFIG.C_PROBE27_MU_CNT {2} CONFIG.C_PROBE26_MU_CNT {2} CONFIG.C_PROBE25_MU_CNT {2} CONFIG.C_PROBE24_MU_CNT {2} CONFIG.C_PROBE23_MU_CNT {2} CONFIG.C_PROBE22_MU_CNT {2} CONFIG.C_PROBE21_MU_CNT {2} CONFIG.C_PROBE20_MU_CNT {2} CONFIG.C_PROBE19_MU_CNT {2} CONFIG.C_PROBE18_MU_CNT {2} CONFIG.C_PROBE17_MU_CNT {2} CONFIG.C_PROBE16_MU_CNT {2} CONFIG.C_PROBE15_MU_CNT {2} CONFIG.C_PROBE14_MU_CNT {2} CONFIG.C_PROBE13_MU_CNT {2} CONFIG.C_PROBE12_MU_CNT {2} CONFIG.C_PROBE11_MU_CNT {2} CONFIG.C_PROBE10_MU_CNT {2} CONFIG.C_PROBE9_MU_CNT {2} CONFIG.C_PROBE8_MU_CNT {2} CONFIG.C_PROBE7_MU_CNT {2} CONFIG.C_PROBE6_MU_CNT {2} CONFIG.C_PROBE5_MU_CNT {2} CONFIG.C_PROBE4_MU_CNT {2} CONFIG.C_PROBE3_MU_CNT {2} CONFIG.C_PROBE2_MU_CNT {2} CONFIG.C_PROBE1_MU_CNT {2} CONFIG.C_PROBE0_MU_CNT {2} CONFIG.ALL_PROBE_SAME_MU_CNT {2}] [get_ips ila_retrans]
Expand Down
35 changes: 28 additions & 7 deletions hw/hdl/network/rdma/roce_stack.sv
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,27 @@ assign m_rdma_wr_req.valid = rdma_wr_req.valid;
assign m_rdma_wr_req.data = rdma_wr_req.data;
assign rdma_wr_req.ready = m_rdma_wr_req.ready;


//
// QP_CTX Remapping
//

metaIntf #(.STYPE(rdma_qp_ctx_old_t)) s_rdma_qp_interface_old ();

// Signals for ready and valid are assigned right away
assign s_rdma_qp_interface.ready = s_rdma_qp_interface_old.ready;
assign s_rdma_qp_interface_old.valid = s_rdma_qp_interface.valid;

// Data remapping
assign s_rdma_qp_interface_old.data.vaddr = s_rdma_qp_interface.data.vaddr;
assign s_rdma_qp_interface_old.data.r_key = s_rdma_qp_interface.data.r_key;
assign s_rdma_qp_interface_old.data.local_psn = s_rdma_qp_interface.data.local_psn;
assign s_rdma_qp_interface_old.data.remote_psn = s_rdma_qp_interface.data.remote_psn;
assign s_rdma_qp_interface_old.data.qp_num = s_rdma_qp_interface.data.qp_num;
assign s_rdma_qp_interface_old.data.new_state = s_rdma_qp_interface.data.new_state;



//
// RoCE stack
//
Expand Down Expand Up @@ -278,7 +299,7 @@ assign rdma_wr_req.ready = m_rdma_wr_req.ready;
.probe9(m_axis_tx.tlast),
.probe10(s_rdma_qp_interface.valid),
.probe11(s_rdma_qp_interface.ready),
.probe12(s_rdma_qp_interface.data), // 184
.probe12(s_rdma_qp_interface.data), // 314
.probe13(s_rdma_conn_interface.valid),
.probe14(s_rdma_conn_interface.ready),
.probe15(s_rdma_conn_interface.data), // 184
Expand Down Expand Up @@ -369,9 +390,9 @@ rocev2_ip rocev2_inst(
.s_axis_mem_read_data_TLAST(axis_rdma_rd.tlast),

// QP intf
.s_axis_qp_interface_TVALID(s_rdma_qp_interface.valid),
.s_axis_qp_interface_TREADY(s_rdma_qp_interface.ready),
.s_axis_qp_interface_TDATA(s_rdma_qp_interface.data),
.s_axis_qp_interface_TVALID(s_rdma_qp_interface_old.valid),
.s_axis_qp_interface_TREADY(s_rdma_qp_interface_old.ready),
.s_axis_qp_interface_TDATA(s_rdma_qp_interface_old.data),
.s_axis_qp_conn_interface_TVALID(s_rdma_conn_interface.valid),
.s_axis_qp_conn_interface_TREADY(s_rdma_conn_interface.ready),
.s_axis_qp_conn_interface_TDATA(s_rdma_conn_interface.data),
Expand Down Expand Up @@ -446,9 +467,9 @@ rocev2_ip rocev2_inst(
.s_axis_mem_read_data_TLAST(axis_rdma_rd.tlast),

// QP intf
.s_axis_qp_interface_V_TVALID(s_rdma_qp_interface.valid),
.s_axis_qp_interface_V_TREADY(s_rdma_qp_interface.ready),
.s_axis_qp_interface_V_TDATA(s_rdma_qp_interface.data),
.s_axis_qp_interface_V_TVALID(s_rdma_qp_interface_old.valid),
.s_axis_qp_interface_V_TREADY(s_rdma_qp_interface_old.ready),
.s_axis_qp_interface_V_TDATA(s_rdma_qp_interface_old.data),
.s_axis_qp_conn_interface_V_TVALID(s_rdma_conn_interface.valid),
.s_axis_qp_conn_interface_V_TREADY(s_rdma_conn_interface.ready),
.s_axis_qp_conn_interface_V_TDATA(s_rdma_conn_interface.data),
Expand Down
36 changes: 25 additions & 11 deletions hw/hdl/shell/cnfg_slave_avx.sv
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ localparam integer SYNC_STAT_REG = 8;
localparam integer NET_ARP_REG = 9;
// RDMA
// 49 (RW) : Write QP context
localparam integer RDMA_CTX_REG = 10;
localparam integer RDMA_CTX_REG_1 = 10;
// 50 (RW) : Write QP connection
localparam integer RDMA_CONN_REG = 11;
// TCP
Expand All @@ -364,6 +364,9 @@ localparam integer TCP_OPEN_PORT_STAT_REG = 13;
localparam integer TCP_OPEN_CONN_REG = 14;
localparam integer TCP_OPEN_CONN_STAT_REG = 15;

// Add another register for RDMA CTX to cover more than 256 bits
localparam integer RDMA_CTX_REG_2 = 16;

// 64 (RO) : Status DMA completion
localparam integer STAT_DMA_REG = 2**PID_BITS;
//
Expand Down Expand Up @@ -636,10 +639,16 @@ always_ff @(posedge aclk) begin
`endif

`ifdef EN_RDMA
RDMA_CTX_REG: // Context
RDMA_CTX_REG_1: // Context
for (int i = 0; i < AVX_DATA_BITS/8; i++) begin
if(s_axim_ctrl.wstrb[i]) begin
slv_reg[RDMA_CTX_REG_1][(i*8)+:8] <= s_axim_ctrl.wdata[(i*8)+:8];
end
end
RDMA_CTX_REG_2:
for (int i = 0; i < AVX_DATA_BITS/8; i++) begin
if(s_axim_ctrl.wstrb[i]) begin
slv_reg[RDMA_CTX_REG][(i*8)+:8] <= s_axim_ctrl.wdata[(i*8)+:8];
slv_reg[RDMA_CTX_REG_2][(i*8)+:8] <= s_axim_ctrl.wdata[(i*8)+:8];
m_rdma_qp_interface.valid <= 1'b1;
end
end
Expand Down Expand Up @@ -747,8 +756,10 @@ always_ff @(posedge aclk) begin
`endif

`ifdef EN_RDMA
[RDMA_CTX_REG:RDMA_CTX_REG]:
axi_rdata[0] <= m_rdma_qp_interface.ready;
[RDMA_CTX_REG_1:RDMA_CTX_REG_1]:
axi_rdata <= slv_reg[RDMA_CTX_REG_1];
[RDMA_CTX_REG_2:RDMA_CTX_REG_2]:
axi_rdata[0] <= m_rdma_qp_interface.ready;
[RDMA_CONN_REG:RDMA_CONN_REG]:
axi_rdata[0] <= m_rdma_conn_interface.ready;
`endif
Expand Down Expand Up @@ -1225,13 +1236,16 @@ ram_tp_nc #(
.b_data_out(b_data_out_rdma_wr)
);

// RDMA qp interface
// RDMA qp interface - add the new bits for AES-key, compression- and DPI-requirements
assign m_rdma_qp_interface.data.new_state = 0;
assign m_rdma_qp_interface.data.qp_num = slv_reg[RDMA_CTX_REG][0+:24]; // qpn
assign m_rdma_qp_interface.data.r_key = slv_reg[RDMA_CTX_REG][32+:32]; // r_key
assign m_rdma_qp_interface.data.local_psn = slv_reg[RDMA_CTX_REG][64+:24];
assign m_rdma_qp_interface.data.remote_psn = slv_reg[RDMA_CTX_REG][64+24+:24]; // psns
assign m_rdma_qp_interface.data.vaddr = slv_reg[RDMA_CTX_REG][128+:VADDR_BITS]; // vaddr
assign m_rdma_qp_interface.data.qp_num = slv_reg[RDMA_CTX_REG_1][0+:24]; // qpn
assign m_rdma_qp_interface.data.r_key = slv_reg[RDMA_CTX_REG_1][32+:32]; // r_key
assign m_rdma_qp_interface.data.local_psn = slv_reg[RDMA_CTX_REG_1][64+:24];
assign m_rdma_qp_interface.data.remote_psn = slv_reg[RDMA_CTX_REG_1][64+24+:24]; // psns
assign m_rdma_qp_interface.data.vaddr = slv_reg[RDMA_CTX_REG_1][128+:VADDR_BITS]; // vaddr
assign m_rdma_qp_interface.data.aes_key = slv_reg[RDMA_CTX_REG_2][0+:128]; // aes_key
assign m_rdma_qp_interface.data.compression_enabled = slv_reg[RDMA_CTX_REG_2][128+:1]; // compression-bit
assign m_rdma_qp_interface.data.dpi_enabled = slv_reg[RDMA_CTX_REG_2][129+:1]; // dpi-bit

// RDMA connection interface
assign m_rdma_conn_interface.data.local_qpn = slv_reg[RDMA_CONN_REG][0+:16];
Expand Down
14 changes: 14 additions & 0 deletions scripts/wr_hdl/template_gen/lynx_pkg_tmplt.txt
Original file line number Diff line number Diff line change
Expand Up @@ -536,15 +536,29 @@ package lynxTypes;
// RDMA
//

// Signal used for establishing a new QP. Send first, before rdma_qp_conn_t. Now also includes information about the Balboa-capabilities.
typedef struct packed {
logic [47:0] vaddr;
logic [31:0] r_key;
logic [23:0] local_psn;
logic [23:0] remote_psn;
logic [23:0] qp_num;
logic [31:0] new_state;
logic [127:0] aes_key;
logic compression_enabled;
logic dpi_enabled;
} rdma_qp_ctx_t;

typedef struct packed {
logic [47:0] vaddr;
logic [31:0] r_key;
logic [23:0] local_psn;
logic [23:0] remote_psn;
logic [23:0] qp_num;
logic [31:0] new_state;
} rdma_qp_ctx_old_t;

// Signal used for establishing a new connection. Send after the rdma_qp_ctx_t. Adds the network-specific information required for a QP.
typedef struct packed {
logic [15:0] remote_udp_port;
logic [127:0] remote_ip_address;
Expand Down
4 changes: 3 additions & 1 deletion sw/include/cDefs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ enum class CoyoteAlloc {
/* AVX regs */
// Control regs that get memory-mapped for controlling operations of the FPGA
// These are the ones used for AVX-systems. Why is there a difference between AVX and legacy systems?
// Added a second RDMA_CTX_REG to cover the full width (which is larger than 256 bits)
enum class CnfgAvxRegs : uint32_t {
CTRL_REG = 0,
ISR_REG = 1,
Expand All @@ -209,12 +210,13 @@ enum class CnfgAvxRegs : uint32_t {
SYNC_CTRL_REG = 7,
SYNC_STAT_REG = 8,
NET_ARP_REG = 9,
RDMA_CTX_REG = 10,
RDMA_CTX_REG_1 = 10,
RDMA_CONN_REG = 11,
TCP_OPEN_PORT_REG = 12,
TCP_OPEN_PORT_STAT_REG = 13,
TCP_OPEN_CONN_REG = 14,
TCP_OPEN_CONN_STAT_REG = 15,
RDMA_CTX_REG_2 = 16,
STAT_DMA_REG = 64
};

Expand Down
26 changes: 22 additions & 4 deletions sw/src/bThread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,7 @@ bool bThread::doArpLookup(uint32_t ip_addr) {
*/
bool bThread::writeQpContext(uint32_t port) {
// Basic idea: Get information from the previously created qp-struct and write it to configuration memory
uint64_t offs[3];
uint64_t offs[6];
if(fcnfg.en_rdma) {
// Write QP context - QPN, rkey, local and remote PSN, vaddr
offs[0] = ((static_cast<uint64_t>(qpair->local.qpn) & 0xffffff) << qpContextQpnOffs) |
Expand All @@ -1029,6 +1029,21 @@ bool bThread::writeQpContext(uint32_t port) {

offs[2] = ((static_cast<uint64_t>((uint64_t)qpair->remote.vaddr) & 0xffffffffffff) << qpContextVaddrOffs);

// Splitting up the 128 Bit AES-key to write it into two distinct 64-bit parts
uint64_t aes_key_high = static_cast<uint64_t>(qpair->local.aes_key >> 64);
uint64_t aes_key_low = static_cast<uint64_t>(qpair->local.aes_key & 0xFFFFFFFFFFFFFFFF);

offs[3] = aes_key_high;
offs[4] = aes_key_low;

// Writing the compression- and dpi-bits in the 2 LSBs of the sixth FPGA slave-register
const uint8_t bool1Offset = 0;
const uint8_t bool2Offset = 1;

offs[5] = 0;
offs[5] |= (qpair->local.compression_enabled ? 1ULL : 0ULL) << bool1Offset; // Compression-bit is stored in LSB
offs[5] |= (qpair->local.dpi_enabled ? 1ULL : 0ULL) << bool2Offset; // DPI-Bit is stored in the next bit after the LSB

# ifdef VERBOSE
std::cout << "bThread: Called writeQpContext on a RDMA-enabled design." << std::endl;
std::cout << " - bThread - offs[0] " << offs[0] << std::endl;
Expand All @@ -1039,10 +1054,13 @@ bool bThread::writeQpContext(uint32_t port) {
// Write this information obtained from the QP-struct into configuration memory / registers
#ifdef EN_AVX
if(fcnfg.en_avx) {
if(_mm256_extract_epi32(cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::RDMA_CTX_REG)], 0))
cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::RDMA_CTX_REG)] = _mm256_set_epi64x(0, offs[2], offs[1], offs[0]);
else
if(_mm256_extract_epi32(cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::RDMA_CTX_REG)], 0)) {
// Write to the upper and the lower register to transmit the data at over 256 bits
cnfg_reg_avx[static_cast<uint32_t>(CnfgAvxRegs::RDMA_CTX_REG_1)] = _mm256_set_epi64x(0, offs[2], offs[1], offs[0]);
cnfg_reg_avx[static_cast<uint32_t](CnfgAvxRegs::RDMA_CTX_REG_2)] = _mm256_set_epi64x(0, offs[5], offs[4], offs[3]);
} else {
return false;
}
} else
#endif
{
Expand Down

0 comments on commit bc1fb17

Please sign in to comment.