Skip to content

Example: DDR3 Memory Buffer UART Loopback

Julian Kemmerer edited this page Dec 8, 2024 · 10 revisions

This is a break down of a UART loopback design example that uses DDR3 memory to buffer messages.

This example is from a series of examples designed for the Arty Board, see those instructions for building the project.

The PipelineC source for this design can be found here. A C test program for exercising the loopback can be found here.

diagram

Source

#include "compiler.h"
#include "wire.h"
#include "../leds/led0_3.c"
#include "../uart/uart_msg_fifos.c"
#include "../ddr3/xil_mig.c"
#include "test.h" // Constants shared with software

// Write stream of messages from uart to DDR3, and once done
// read those same messages back from DDR3 stream out over uart

// State machine that waits for incoming message over uart (async fifos)
// Then writes it to DDR memory at a specific address
typedef enum uart_to_mem_state_t
{
  RESET,
  WAIT_MSG,
  SER_MSG,
}uart_to_mem_state_t;
typedef struct uart_to_mem_t
{
  uint1_t ready;
  xil_app_to_mig_t to_mem;
  uint1_t done; 
}uart_to_mem_t;
uart_to_mem_t uart_to_mem(uint1_t start, test_count_t msg_index, xil_mig_to_app_t from_mem)
{
  // Registers
  static uart_to_mem_state_t state; // FSM state
  static uart_msg_t msg; // Message from uart / memory deserializer buffer
  
  // Outputs
  uart_to_mem_t o;
  o.to_mem = XIL_APP_TO_MIG_T_NULL();
  o.done = 0;
  o.ready = 0;
  
  if(state==RESET)
  {
    o.ready = 1;
    if(start)
    {
      state = WAIT_MSG;
    }
  }
  else if(state==WAIT_MSG)
  {
    // Wait for valid message from uart
    uart_rx_msg_fifo_receiver_t msg_rx = uart_rx_msg_fifo_receiver(1);
    if(msg_rx.done)
    {
      // Then begin serializing it
      msg = msg_rx.msg;
      state = SER_MSG;
    }
  }
  else if(state==SER_MSG)
  {
    // Begin ddr serializer 
    xil_mig_addr_t byte_addr = msg_index * UART_MSG_SIZE; // TODO shifts?
    mig_write_256_t ser = mig_write_256(1, byte_addr, msg.data, from_mem);
    o.to_mem = ser.to_mem;
    msg.data = ser.data;
    // Wait until serializer done
    if(ser.done)
    {
      // Then all the way done, back to start
      o.done = 1;
      state = RESET;
    }
  }
  
  return o;
}

// State machine controlling memory to read a message from a specific address
// and then waits for the message to be outgoing over uart (async fifo)
typedef enum mem_to_uart_state_t
{
  RESET,
  DESER_MSG,
  WAIT_MSG
}mem_to_uart_state_t;
typedef struct mem_to_uart_t
{
  uint1_t ready;
  xil_app_to_mig_t to_mem;
  uint1_t done; 
}mem_to_uart_t;
mem_to_uart_t mem_to_uart(uint1_t start, test_count_t msg_index, xil_mig_to_app_t from_mem)
{
  // Registers
  static mem_to_uart_state_t state; // FSM state
  static uart_msg_t msg; // Message from memory deserializer buffer / into uar
  
  // Drive leds
  WIRE_WRITE(uint1_t, led1, state==WAIT_MSG)
  
  // Outputs
  mem_to_uart_t o;
  o.to_mem = XIL_APP_TO_MIG_T_NULL();
  o.done = 0;
  o.ready = 0;
  
  if(state==RESET)
  {
    o.ready = 1;
    if(start)
    {
      state = DESER_MSG;
    }
  }
  else if(state==DESER_MSG)
  {
    // Begin ddr deserializer 
    xil_mig_addr_t byte_addr = msg_index * UART_MSG_SIZE; // TODO shifts?
    mig_read_256_t deser = mig_read_256(1, byte_addr, msg.data, from_mem);
    msg.data = deser.data;
    o.to_mem = deser.to_mem;
    // Wait until deserializer done and we have full message
    if(deser.done)
    {
      // Then wait until message goes out over uart
      state = WAIT_MSG;
    }
  }
  else if(state==WAIT_MSG)
  {
    // Begin trying to send msg out
    uart_tx_msg_fifo_sender_t msg_tx = uart_tx_msg_fifo_sender(1, msg);
    // Wait for message to go out over uart
    if(msg_tx.done)
    {
      // Then all the way done, back to start
      o.done = 1;
      state = RESET;
    }
  }
  
  return o;
}

// Uses above state machines to transfer messages to/from DDR memory
typedef enum msg_ctrl_state_t
{
  WAIT_RESET,
  UART_TO_MEM, // N messages into memory
  MEM_TO_UART // N messages out of memory
}msg_ctrl_state_t;
// The main process, same clock as generated memory interface
#pragma MAIN_MHZ app xil_mig_module
void app()
{
  // Input port: read outputs wires from memory controller
  xil_mig_to_app_t from_mem;
  WIRE_READ(xil_mig_to_app_t, from_mem, xil_mig_to_app)
  
  // Output port wire: into memory controller
  xil_app_to_mig_t to_mem = XIL_APP_TO_MIG_T_NULL();
  
  // Registers
  static msg_ctrl_state_t state;
  static test_count_t num_msgs;
  
  // Drive leds
  WIRE_WRITE(uint1_t, led0, state==MEM_TO_UART)

  // MEM CTRL FSM
  if(state==WAIT_RESET)
  {
    // Wait for DDR reset to be done
    uint1_t mem_rst_done = !from_mem.ui_clk_sync_rst & from_mem.init_calib_complete;
    if(mem_rst_done)
    {
      // Start things with writes first
      state = UART_TO_MEM;
    }
    num_msgs = 0;
  }
  else if(state==UART_TO_MEM)
  {
    // Keep starting the uart_to_mem fsm until N messages have been written to mem
    uart_to_mem_t writer = uart_to_mem(1, num_msgs, from_mem);
    to_mem = writer.to_mem;   
    if(writer.done)
    {
      // next message ?
      if(num_msgs<(NUM_MSGS_TEST-1))
      {
        // Do next message
        num_msgs += 1;
      }
      else
      {
        // Done writing messages, onto reads
        state = MEM_TO_UART;
        num_msgs = 0;
      }
    }
  }
  else if(state==MEM_TO_UART)
  {
    // Keep starting the mem_to_uart fsm until N messages have been read from mem
    mem_to_uart_t reader = mem_to_uart(1, num_msgs, from_mem);
    to_mem = reader.to_mem;
    if(reader.done)
    {
      // next message ?
      if(num_msgs<(NUM_MSGS_TEST-1))
      {
        // Do next message
        num_msgs += 1;
      }
      else
      {
        // Done read messages, repeat from reset
        state = WAIT_RESET;
      }
    }
  }
 
  // Resets
  if(from_mem.ui_clk_sync_rst)
  {
    state = WAIT_RESET;
  }
   
  // Drive wires into memory controller
  WIRE_WRITE(xil_app_to_mig_t, xil_app_to_mig, to_mem)  
}

PipelineC Tool

Throughput Sweep (Uneventful for simple state machines...)

In more advanced examples you will want to use the auto-pipelining features of PipelineC. This stateful function example cannot be further pipelined, no further latency can be traded off for throughput. Luckily as-written it is expected to meet the UART and DDR clock timings as needed, no further design changes are needed.

================== Beginning Throughput Sweep ================================
Function: led0_module Target MHz: 83.33
Function: led1_module Target MHz: 83.33
Function: led2_module Target MHz: 83.33
Function: led3_module Target MHz: 83.33
Function: uart_module Target MHz: 25.0
Function: uart_rx_mac Target MHz: 25.0
Function: uart_tx_mac Target MHz: 25.0
Function: uart_rx_msg Target MHz: 25.0
Function: uart_tx_msg Target MHz: 25.0
Function: uart_rx_msg_fifo_module Target MHz: 25.0
Function: uart_tx_msg_fifo_module Target MHz: 25.0
Function: xil_mig_module Target MHz: 83.33
Function: app Target MHz: 83.33
Function: app_tieoff Target MHz: 25.0
WARNING: uart_tx_msg_fifo async fifo depth increased to minimum allowed = 16
WARNING: uart_rx_msg_fifo async fifo depth increased to minimum allowed = 16
Starting with blank sweep state...
...determining slicing information for each main function...
led0_module : 0 clocks latency, sliced coarsely...
led1_module : 0 clocks latency, sliced coarsely...
led2_module : 0 clocks latency, sliced coarsely...
led3_module : 0 clocks latency, sliced coarsely...
uart_module : 0 clocks latency, sliced coarsely...
uart_rx_mac : 0 clocks latency, sliced coarsely...
uart_tx_mac : 0 clocks latency, sliced coarsely...
uart_rx_msg : 0 clocks latency, sliced coarsely...
uart_tx_msg : 0 clocks latency, sliced coarsely...
uart_rx_msg_fifo_module : 0 clocks latency, sliced coarsely...
uart_tx_msg_fifo_module : 0 clocks latency, sliced coarsely...
xil_mig_module : 0 clocks latency, sliced coarsely...
app : 0 clocks latency, sliced coarsely...
app_tieoff : 0 clocks latency, sliced coarsely...
Running: /media/1TB/Programs/Linux/Xilinx/Vivado/2019.2/bin/vivado -journal /home/julian/pipelinec_syn_output/top/vivado.jou -log /home/julian/pipelinec_syn_output/top/vivado_2784.log -mode batch -source "/home/julian/pipelinec_syn_output/top/top_2784.tcl"
Clock Goal (MHz): 25.0 , Current MHz: 159.6169193934557 ( 6.265000000000001 ns)
Clock Goal (MHz): 83.33333333333333 , Current MHz: 129.87012987012986 ( 7.7 ns)
Found maximum pipeline latencies...
================== Writing Results of Throughput Sweep ================================
Done.

VHDL wrapper for Vivado

-- Top level file connecting board to PipelineC generated code

library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
library UNISIM;
use UNISIM.VCOMPONENTS.ALL;

-- PipelineC packages
use work.c_structs_pkg.all;

-- Connections to the board, see xdc files, un/commment things as needed
entity board is
  port (
    CLK100MHZ : in std_logic;
    sw : in std_logic_vector(3 downto 0);
    led : out std_logic_vector(3 downto 0);
    uart_rxd_out : out std_logic;
    uart_txd_in : in std_logic;
    ddr3_dq       : inout std_logic_vector(15 downto 0);
    ddr3_dqs_p    : inout std_logic_vector(1 downto 0);
    ddr3_dqs_n    : inout std_logic_vector(1 downto 0);
    ddr3_addr     : out   std_logic_vector(13 downto 0);
    ddr3_ba       : out   std_logic_vector(2 downto 0);
    ddr3_ras_n    : out   std_logic;
    ddr3_cas_n    : out   std_logic;
    ddr3_we_n     : out   std_logic;
    ddr3_reset_n  : out   std_logic;
    ddr3_ck_p     : out   std_logic_vector(0 downto 0); -- Uncomment to use DDR3
    ddr3_ck_n     : out   std_logic_vector(0 downto 0); -- Uncomment to use DDR3
    ddr3_cke      : out   std_logic_vector(0 downto 0);
    ddr3_cs_n     : out   std_logic_vector(0 downto 0);
    ddr3_dm       : out   std_logic_vector(1 downto 0);
    ddr3_odt      : out   std_logic_vector(0 downto 0)
  );
end board;

architecture arch of board is

-- General clocks based off of the board's CLK100MHZ
signal clk_25, clk_50, clk_100, clk_200, clk_400 : std_logic;
signal clks_ready: std_logic;
signal rst : std_logic;
component clks_sys_clk_100
port
 (
  -- Clock out ports
  clk_25          : out    std_logic;
  clk_50          : out    std_logic;
  clk_100         : out    std_logic;
  clk_200         : out    std_logic;
  clk_400         : out    std_logic;
  -- Status and control signals
  locked          : out    std_logic;
  -- Clock in ports
  sys_clk_100     : in     std_logic
 );
end component;

-- DDR clocks based off of the board's CLK100MHZ
signal ddr_sys_clk : std_logic; -- 166.66MHz 
signal clk_166p66 : std_logic;
signal ddr_clks_ready: std_logic;
signal ddr_sys_rst_n : std_logic;
signal ddr_sys_rst : std_logic;
component ddr_clks_sys_clk_100
port
 (
  -- Clock out ports
  ddr_sys_clk          : out    std_logic;
  -- Status and control signals
  locked            : out    std_logic;
  -- Clock in ports
  sys_clk_100           : in     std_logic
 );
end component;

-- The board's DDR3 controller
signal app_addr                  :     std_logic_vector(27 downto 0);
signal app_cmd                   :     std_logic_vector(2 downto 0);
signal app_en                    :     std_logic;
signal app_wdf_data              :     std_logic_vector(127 downto 0);
signal app_wdf_end               :     std_logic;
signal app_wdf_mask              :     std_logic_vector(15 downto 0);
signal app_wdf_wren              :     std_logic;
signal app_rd_data               :    std_logic_vector(127 downto 0);
signal app_rd_data_end           :    std_logic;
signal app_rd_data_valid         :    std_logic;
signal app_rdy                   :    std_logic;
signal app_wdf_rdy               :    std_logic;
signal app_sr_req                :     std_logic;
signal app_ref_req               :     std_logic;
signal app_zq_req                :     std_logic;
signal app_sr_active             :    std_logic;
signal app_ref_ack               :    std_logic;
signal app_zq_ack                :    std_logic;
signal ui_clk                    :    std_logic; -- 83.33MHz 
signal clk_83p33                 :    std_logic;
signal ui_clk_sync_rst           :    std_logic;
signal init_calib_complete       :    std_logic;
component ddr3_0
  port (
      ddr3_dq       : inout std_logic_vector(15 downto 0);
      ddr3_dqs_p    : inout std_logic_vector(1 downto 0);
      ddr3_dqs_n    : inout std_logic_vector(1 downto 0);
      ddr3_addr     : out   std_logic_vector(13 downto 0);
      ddr3_ba       : out   std_logic_vector(2 downto 0);
      ddr3_ras_n    : out   std_logic;
      ddr3_cas_n    : out   std_logic;
      ddr3_we_n     : out   std_logic;
      ddr3_reset_n  : out   std_logic;
      ddr3_ck_p     : out   std_logic_vector(0 downto 0);
      ddr3_ck_n     : out   std_logic_vector(0 downto 0);
      ddr3_cke      : out   std_logic_vector(0 downto 0);
	  ddr3_cs_n     : out   std_logic_vector(0 downto 0);
      ddr3_dm       : out   std_logic_vector(1 downto 0);
      ddr3_odt      : out   std_logic_vector(0 downto 0);
      app_addr                  : in    std_logic_vector(27 downto 0);
      app_cmd                   : in    std_logic_vector(2 downto 0);
      app_en                    : in    std_logic;
      app_wdf_data              : in    std_logic_vector(127 downto 0);
      app_wdf_end               : in    std_logic;
      app_wdf_mask              : in    std_logic_vector(15 downto 0);
      app_wdf_wren              : in    std_logic;
      app_rd_data               : out   std_logic_vector(127 downto 0);
      app_rd_data_end           : out   std_logic;
      app_rd_data_valid         : out   std_logic;
      app_rdy                   : out   std_logic;
      app_wdf_rdy               : out   std_logic;
      app_sr_req                : in    std_logic;
      app_ref_req               : in    std_logic;
      app_zq_req                : in    std_logic;
      app_sr_active             : out   std_logic;
      app_ref_ack               : out   std_logic;
      app_zq_ack                : out   std_logic;
      ui_clk                    : out   std_logic;
      ui_clk_sync_rst           : out   std_logic;
      init_calib_complete       : out   std_logic;
      -- System Clock Ports
      sys_clk_i                 : in    std_logic;
      -- Reference Clock Ports
      clk_ref_i                 : in    std_logic;
      sys_rst                   : in    std_logic -- ACTIVE LOW - PORT NAME IS INCORRECT
  );
end component ddr3_0;
 
-- Internal signals
-- Clocks
signal sys_clk_100 : std_logic;
-- Switches
signal switches_wire : unsigned(3 downto 0);
-- LEDs
signal leds_wire : unsigned(3 downto 0);
-- UART
signal uart_data_in : unsigned(0 downto 0);
signal uart_data_out : unsigned(0 downto 0);
-- DDR3
signal mig_to_app : xil_mig_to_app_t;
signal app_to_mig : xil_app_to_mig_t;

begin

-- Connect board's CLK100MHZ pin to internal global clock buffer network
CLK100MHZ_bufg_inst: BUFG 
port map (
    I => CLK100MHZ, 
    O => sys_clk_100
);

-- General clocks based off of the board's CLK100MHZ
clks_sys_clk_100_inst : clks_sys_clk_100
   port map ( 
  -- Clock out ports  
   clk_25 => clk_25,
   clk_50 => clk_50,
   clk_100 => clk_100,
   clk_200 => clk_200,
   clk_400 => clk_400,
  -- Status and control signals                
   locked => clks_ready,
   -- Clock in ports
   sys_clk_100 => sys_clk_100
 );
-- Hold in reset until clocks are ready
rst <= not clks_ready;

-- DDR clocks based off of the board's CLK100MHZ 
ddr_clks_sys_clk_100_inst : ddr_clks_sys_clk_100
   port map ( 
   ddr_sys_clk => ddr_sys_clk, -- 166.66MHz 
   locked => ddr_clks_ready,
   sys_clk_100 => sys_clk_100
 );
clk_166p66 <= ddr_sys_clk;
-- Hold in reset until clocks are ready
ddr_sys_rst <= rst or not ddr_clks_ready;
ddr_sys_rst_n <= not ddr_sys_rst;
 
-- The board's DDR3 controller
 ddr3_0_inst : ddr3_0
     port map (
        -- Memory interface ports
        ddr3_addr                      => ddr3_addr,
        ddr3_ba                        => ddr3_ba,
        ddr3_cas_n                     => ddr3_cas_n,
        ddr3_ck_n                      => ddr3_ck_n,
        ddr3_ck_p                      => ddr3_ck_p,
        ddr3_cke                       => ddr3_cke,
        ddr3_ras_n                     => ddr3_ras_n,
        ddr3_reset_n                   => ddr3_reset_n,
        ddr3_we_n                      => ddr3_we_n,
        ddr3_dq                        => ddr3_dq,
        ddr3_dqs_n                     => ddr3_dqs_n,
        ddr3_dqs_p                     => ddr3_dqs_p,
        init_calib_complete            => init_calib_complete,
 	   ddr3_cs_n                      => ddr3_cs_n,
        ddr3_dm                        => ddr3_dm,
        ddr3_odt                       => ddr3_odt,
        -- Application interface ports
        app_addr                       => app_addr,
        app_cmd                        => app_cmd,
        app_en                         => app_en,
        app_wdf_data                   => app_wdf_data,
        app_wdf_end                    => app_wdf_end,
        app_wdf_wren                   => app_wdf_wren,
        app_rd_data                    => app_rd_data,
        app_rd_data_end                => app_rd_data_end,
        app_rd_data_valid              => app_rd_data_valid,
        app_rdy                        => app_rdy,
        app_wdf_rdy                    => app_wdf_rdy,
        app_sr_req                     => app_sr_req,
        app_ref_req                    => app_ref_req,
        app_zq_req                     => app_zq_req,
        app_sr_active                  => app_sr_active,
        app_ref_ack                    => app_ref_ack,
        app_zq_ack                     => app_zq_ack,
        ui_clk                         => ui_clk, -- 83.33MHz
        ui_clk_sync_rst                => ui_clk_sync_rst,
        app_wdf_mask                   => app_wdf_mask,
        -- System Clock Ports
        sys_clk_i                      => ddr_sys_clk, -- 166.66MHz 
        -- Reference Clock Ports
        clk_ref_i                      => clk_200, -- Ref always 200MHz
        sys_rst                        => ddr_sys_rst_n -- ACTIVE LOW - PORT NAME IS INCORRECT
     );
clk_83p33 <= ui_clk;

-- Un/pack IO struct types to/from flattened SLV board pins
-- TODO Code gen this...
-- Commented out wires as necessary
process(all) begin
    -- LEDs
    led <= std_logic_vector(leds_wire);       
    -- Switches
    switches_wire <= unsigned(sw);
    -- UART
    uart_data_in(0) <= uart_txd_in;
    uart_rxd_out <= uart_data_out(0);
    -- DDR3
    app_addr <= std_logic_vector(app_to_mig.addr);
    app_cmd  <= std_logic_vector(app_to_mig.cmd);
    app_en  <= std_logic(app_to_mig.en(0));
    for byte_i in 0 to app_wdf_mask'length-1 loop
		app_wdf_data(((byte_i+1)*8)-1 downto (byte_i*8)) <= std_logic_vector(app_to_mig.wdf_data(byte_i));
	end loop;
    app_wdf_end  <= std_logic(app_to_mig.wdf_end(0));
    for byte_i in 0 to app_wdf_mask'length-1 loop
		app_wdf_mask(byte_i) <= std_logic(app_to_mig.wdf_mask(byte_i)(0));
	end loop;
    app_wdf_wren <= std_logic(app_to_mig.wdf_wren(0));
    for byte_i in 0 to app_wdf_mask'length-1 loop
        mig_to_app.rd_data(byte_i) <= unsigned(app_rd_data(((byte_i+1)*8)-1 downto (byte_i*8)));
	end loop;
    mig_to_app.rd_data_end(0) <= app_rd_data_end; 
    mig_to_app.rd_data_valid(0) <= app_rd_data_valid;
    mig_to_app.rdy(0) <= app_rdy;
    mig_to_app.wdf_rdy(0) <= app_wdf_rdy;
    app_sr_req   <= std_logic(app_to_mig.sr_req(0));
    app_ref_req  <= std_logic(app_to_mig.ref_req(0));
    app_zq_req   <= std_logic(app_to_mig.zq_req(0));
    mig_to_app.sr_active(0) <= app_sr_active;
    mig_to_app.ref_ack(0) <= app_ref_ack;
    mig_to_app.zq_ack(0)  <= app_zq_ack;
    mig_to_app.ui_clk_sync_rst(0) <= ui_clk_sync_rst;
    mig_to_app.init_calib_complete(0) <= init_calib_complete;
end process;
    
-- The PipelineC generated entity
top_inst : entity work.top port map (    
    -- Main function clocks
    clk_25p0 => clk_25,
    --clk_50p0 => clk_50,
    clk_83p33 => clk_83p33,
    --clk_100p0 => clk_100,
    --clk_166p66 => clk_166p66,
    --clk_200p0 => clk_200,
    --clk_400p0 => clk_400,
        
    -- Each main funciton's inputs and outputs
    
    -- LEDs
    led0_module_return_output(0) => leds_wire(0),
    led1_module_return_output(0) => leds_wire(1),
    led2_module_return_output(0) => leds_wire(2),
    led3_module_return_output(0) => leds_wire(3),
    
    -- Switches
    --switches_module_sw => switches_wire
    
    -- UART
    uart_module_data_in => uart_data_in,
    uart_module_return_output => uart_data_out,
    
    -- DDR3
    xil_mig_module_mig_to_app => mig_to_app,
    xil_mig_module_return_output => app_to_mig
);

end arch;

Vivado Results

Resource usage: resources

Clone this wiki locally