Skip to content

Commit

Permalink
Merge branch 'valek-feat-card_tweaks' into 'devel'
Browse files Browse the repository at this point in the history
fix(tx_dma_calypte): fix assign to LBE register when creating BE vector

See merge request ndk/ndk-fpga!136
  • Loading branch information
jakubcabal committed Jan 13, 2025
2 parents fc40cfe + b70fc96 commit 826edc4
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 181 deletions.
4 changes: 3 additions & 1 deletion comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ architecture FULL of RX_DMA_CALYPTE is
constant BUFFERED_DATA_SIZE : natural := 128;
--=============================================================================================================

constant IS_INTEL_DEV : boolean := (DEVICE = "STRATIX10" or DEVICE = "AGILEX");

constant MI_SPLIT_PORTS : natural := 2;
constant MI_SPLIT_BASES : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH-1 downto 0) := (
0 => x"00000000",
Expand Down Expand Up @@ -675,7 +677,7 @@ begin

META_WIDTH => 0,
LNG_WIDTH => log2(PKT_SIZE_MAX+1),
REG_BITMAP => "0000")
REG_BITMAP => tsel(IS_INTEL_DEV, "1111", "0000"))
port map (
CLK => CLK,
RESET => RESET,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -466,8 +466,6 @@ architecture FULL of TX_DMA_DEBUG_CORE is
signal comp_val_curr : std_logic_vector(TX_MFB_DATA'range);
signal comp_res_imm : std_logic_vector(TX_MFB_DATA'length/8 -1 downto 0);
signal comp_res_reg : std_logic_vector(TX_MFB_DATA'length/8 -1 downto 0);
signal comp_res_imm_vld : std_logic_vector(TX_MFB_DATA'length/8 -1 downto 0);
signal comp_res_reg_vld : std_logic_vector(TX_MFB_DATA'length/8 -1 downto 0);
signal comp_res_imm_diff : std_logic_vector(TX_MFB_DATA'length/8 -1 downto 0);
signal comp_res_reg_diff : std_logic_vector(TX_MFB_DATA'length/8 -1 downto 0);

Expand Down Expand Up @@ -1044,6 +1042,11 @@ begin
aux_sig_mfb_meta_hdr_meta <= aux_sig_mfb_meta(log2(CHANNELS) + DMA_META_WIDTH-1 downto log2(CHANNELS));
aux_sig_mfb_meta_pkt_size <= aux_sig_mfb_meta(log2(PKT_SIZE_MAX+1) + log2(CHANNELS) + DMA_META_WIDTH-1 downto log2(CHANNELS) + DMA_META_WIDTH);

-- This copies the pattern over whole word
patter_copy_val_g : for i in 0 to (aux_sig_mfb_data'length/32 -1) generate
pattern_copy_val(i*32 + 31 downto i*32) <= aux_sig_mfb_data(31 downto 0);
end generate;

pattern_comp_state_reg_p : process (CLK) is
begin
if (rising_edge(CLK)) then
Expand All @@ -1065,17 +1068,8 @@ begin

-- Tells which comparison results are valid and keeps their value. The reason for validaton is
-- because the comparators funcion also in part of a word, where no packet is located.
comp_res_imm_vld <= comp_res_imm and aux_sig_mfb_item_vld;
comp_res_reg_vld <= comp_res_reg and aux_sig_mfb_item_vld;

-- Tells which bytes differ in the comparison vector
comp_res_imm_diff <= comp_res_imm_vld xor aux_sig_mfb_item_vld;
comp_res_reg_diff <= comp_res_reg_vld xor aux_sig_mfb_item_vld;

-- This copies the pattern over whole word
patter_copy_val_g : for i in 0 to (aux_sig_mfb_data'length/32 -1) generate
pattern_copy_val(i*32 + 31 downto i*32) <= aux_sig_mfb_data(31 downto 0);
end generate;
comp_res_imm_diff <= (not comp_res_imm) and aux_sig_mfb_item_vld;
comp_res_reg_diff <= (not comp_res_reg) and aux_sig_mfb_item_vld;

pattern_comp_nst_logic_p : process (all) is
variable comp_res_diff_v : std_logic;
Expand Down Expand Up @@ -1104,12 +1098,14 @@ begin

when S_COMP_MIDDLE_PKT =>

pattern_match_cntr_incr <= (not (or comp_res_reg_diff)) and aux_sig_mfb_dst_rdy;
pattern_mismatch_cntr_incr <= (or comp_res_reg_diff) and aux_sig_mfb_dst_rdy;
if (aux_sig_mfb_src_rdy = '1') then
pattern_match_cntr_incr <= (not (or comp_res_reg_diff)) and aux_sig_mfb_dst_rdy;
pattern_mismatch_cntr_incr <= (or comp_res_reg_diff) and aux_sig_mfb_dst_rdy;

comp_res_diff_v := (or comp_res_reg_diff);
if (aux_sig_mfb_eof = "1" or comp_res_diff_v = '1') then
pattern_comp_nst <= S_IDLE;
comp_res_diff_v := (or comp_res_reg_diff);
if (aux_sig_mfb_eof = "1" or comp_res_diff_v = '1') then
pattern_comp_nst <= S_IDLE;
end if;
end if;
end case;
end process;
Expand Down
2 changes: 2 additions & 0 deletions comp/dma/dma_calypte/comp/tx/Modules.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ set PACKET_DISPATCHER_BASE "$ENTITY_BASE/comp/packet_dispatcher"
set PCIE_TRANS_BUFFER_BASE "$ENTITY_BASE/comp/pcie_trans_buffer"
set MVB_FIFOX_BASE "$OFM_PATH/comp/mvb_tools/storage/fifox"
set FIFOX_MULTI_BASE "$OFM_PATH/comp/base/fifo/fifox_multi"
set MFB_PIPE_BASE "$OFM_PATH/comp/mfb_tools/flow/pipe"

lappend COMPONENTS [ list "TX_DMA_METADATA_EXTRACTOR" $META_EXTRACTOR_BASE "FULL"]
lappend COMPONENTS [ list "TX_DMA_CHAN_START_STOP_CTRL" $CHAN_START_STOP_CTRL_BASE "FULL"]
Expand All @@ -22,5 +23,6 @@ lappend COMPONENTS [ list "TX_DMA_PKT_DISPATCHER" $PACKET_DISPATCHER_BASE
lappend COMPONENTS [ list "TX_DMA_PCIE_TRANS_BUFFER" $PCIE_TRANS_BUFFER_BASE "FULL"]
lappend COMPONENTS [ list "MVB_FIFOX" $MVB_FIFOX_BASE "FULL"]
lappend COMPONENTS [ list "FIFOX_MULTI" $FIFOX_MULTI_BASE "FULL"]
lappend COMPONENTS [ list "MFB_PIPE" $MFB_PIPE_BASE "FULL"]

lappend MOD "$ENTITY_BASE/tx_dma_calypte.vhd"
Original file line number Diff line number Diff line change
Expand Up @@ -549,11 +549,16 @@ begin
end generate;
end generate;

ST_SP_DBG_CHAN <= PCIE_MFB_META(META_CHAN_NUM);
ST_SP_DBG_META(0) <= (or dma_hdr_out_of_order_chan);
ST_SP_DBG_META(1) <= (or meta_is_dma_hdr_int) and PCIE_MFB_DST_RDY;
ST_SP_DBG_META(2) <= (or dma_frame_lng_correct) and PCIE_MFB_DST_RDY;
ST_SP_DBG_META(3) <= (or dma_frame_lng_incorrect) and PCIE_MFB_DST_RDY;
dbg_signal_reg_p: process (CLK) is
begin
if (rising_edge(CLK)) then
ST_SP_DBG_CHAN <= PCIE_MFB_META(META_CHAN_NUM);
ST_SP_DBG_META(0) <= (or dma_hdr_out_of_order_chan);
ST_SP_DBG_META(1) <= (or meta_is_dma_hdr_int) and PCIE_MFB_DST_RDY;
ST_SP_DBG_META(2) <= (or dma_frame_lng_correct) and PCIE_MFB_DST_RDY;
ST_SP_DBG_META(3) <= (or dma_frame_lng_incorrect) and PCIE_MFB_DST_RDY;
end if;
end process;

-- One region debug (The "PCIE_MFB_SOF = "1"" is not that compatible)
pkt_statistics_g: if PCIE_MFB_REGIONS = 1 generate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,14 @@ begin
FBE_OUT => fbe_decoded(i),
LBE_OUT => lbe_decoded(i));

pcie_mfb_meta_int(i) <= lbe_decoded(i) & fbe_decoded(i) & pcie_tr_byte_cnt(i) & (META_BE_W -1 downto 0 => '0') & chan_num_int(i) & pcie_addr_masked(i)(63 downto 2) & is_dma_hdr(i);
pcie_mfb_meta_int(i) <=
lbe_decoded(i) &
fbe_decoded(i) &
pcie_tr_byte_cnt(i) &
(META_BE_W -1 downto 0 => '0') &
chan_num_int(i) &
pcie_addr_masked(i)(63 downto 2) &
is_dma_hdr(i);
end generate;

-- Cutter is used only for Xilinx devices
Expand Down Expand Up @@ -444,7 +451,7 @@ begin
else
-- Higher takes
for i in 0 to PCIE_MFB_REGIONS - 1 loop
if (aux_mfb_sof(i) = '1' and aux_mfb_eof(i) = '0') then
if (aux_mfb_src_rdy = '1' and aux_mfb_sof(i) = '1' and aux_mfb_eof(i) = '0') then
usr_mfb_lbe_reg <= aux_mfb_meta_arr(i)(META_LBE);
end if;
end loop;
Expand All @@ -471,23 +478,23 @@ begin
mfb_aux_item_be <= (others => (others => (others => '0')));

if (aux_mfb_src_rdy = '1') then
for i in 0 to PCIE_MFB_REGIONS - 1 loop
for j in 0 to (PCIE_MFB_REGION_SIZE*PCIE_MFB_BLOCK_SIZE -1) loop
mfb_aux_item_be(i)(j) <= (others => mfb_aux_item_vld_int_arr(i)(j));
for reg_idx in 0 to PCIE_MFB_REGIONS - 1 loop
for item_idx in 0 to (PCIE_MFB_REGION_SIZE*PCIE_MFB_BLOCK_SIZE -1) loop
mfb_aux_item_be(reg_idx)(item_idx) <= (others => mfb_aux_item_vld_int_arr(reg_idx)(item_idx));
end loop;
end loop;

-- apply FBE to the BE vector
for i in 0 to PCIE_MFB_REGIONS - 1 loop
if (aux_mfb_sof(i) = '1') then
mfb_aux_item_be(i)(0) <= aux_mfb_meta_arr(i)(META_FBE);
for reg_idx in 0 to PCIE_MFB_REGIONS - 1 loop
if (aux_mfb_sof(reg_idx) = '1') then
mfb_aux_item_be(reg_idx)(0) <= aux_mfb_meta_arr(reg_idx)(META_FBE);
end if;

-- apply LBE to the BE vector
if (aux_mfb_eof(i) = '1' and aux_mfb_sof(i) = '0') then
mfb_aux_item_be(i)(to_integer(unsigned(aux_mfb_eof_pos_arr(i)))) <= usr_mfb_lbe_sel;
elsif (aux_mfb_eof(i) = '1' and aux_mfb_sof(i) = '1' and unsigned(aux_mfb_eof_pos_arr(i)) > 0) then
mfb_aux_item_be(i)(to_integer(unsigned(aux_mfb_eof_pos_arr(i)))) <= aux_mfb_meta_arr(i)(META_LBE);
if (aux_mfb_eof(reg_idx) = '1' and aux_mfb_sof(reg_idx) = '0') then
mfb_aux_item_be(reg_idx)(to_integer(unsigned(aux_mfb_eof_pos_arr(reg_idx)))) <= usr_mfb_lbe_sel;
elsif (aux_mfb_eof(reg_idx) = '1' and aux_mfb_sof(reg_idx) = '1' and unsigned(aux_mfb_eof_pos_arr(reg_idx)) > 0) then
mfb_aux_item_be(reg_idx)(to_integer(unsigned(aux_mfb_eof_pos_arr(reg_idx)))) <= aux_mfb_meta_arr(reg_idx)(META_LBE);
end if;
end loop;
end if;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,11 @@ architecture FULL of TX_DMA_PCIE_TRANS_BUFFER is
constant BRAM_REG_NUM : natural := 2;
-- Number of registers between BRAMs and barrel shifter
constant INP_REG_NUM : natural := 1;
constant IS_INTEL_DEV : boolean := (DEVICE = "STRATIX10" or DEVICE = "AGILEX");
-- a maximum depth of a BRAM block (in 1B items) depends on a vendor
constant MAX_BRAM_DEPTH : natural := tsel(IS_INTEL_DEV, 2048, 4096);
-- The amount of channels that fits to one array
constant CHANS_PER_ARRAY : natural := minimum(CHANNELS, 4096/BUFFER_DEPTH);
constant CHANS_PER_ARRAY : natural := minimum(CHANNELS, MAX_BRAM_DEPTH/BUFFER_DEPTH);
-- Number of memory arrays since one array can contain multiple channels
constant MEM_ARRAYS : natural := CHANNELS/CHANS_PER_ARRAY;

Expand Down Expand Up @@ -155,6 +158,12 @@ architecture FULL of TX_DMA_PCIE_TRANS_BUFFER is

signal addr_sel : slv_array_t(MEM_ARRAYS -1 downto 0)(MFB_REGIONS - 1 downto 0);

-- =============================================================================================
-- DEBUG signals (verification or ILA)
-- =============================================================================================
signal wr_addr_collision_detected : slv_array_t(MEM_ARRAYS -1 downto 0)(MFB_BYTES -1 downto 0);
signal rdwr_collision_detected : slv_array_2d_t(MEM_ARRAYS -1 downto 0)(MFB_REGIONS -1 downto 0)(MFB_BYTES -1 downto 0);

begin
-- =============================================================================================
-- Input shift registers
Expand Down Expand Up @@ -657,6 +666,37 @@ begin
DIB => wr_data_bram_shifter_reg(BRAM_REG_NUM)(1)(wbyte*8 +7 downto wbyte*8),
DOB => rd_data_bram(mem_arr_idx)(1)(wbyte*8 +7 downto wbyte*8),
DOB_DV => open);

-- DEBUG process for simulation
wr_addr_collision_detection_p: process (all) is
begin

wr_addr_collision_detected(mem_arr_idx)(wbyte) <= '0';
rdwr_collision_detected(mem_arr_idx)(0)(wbyte) <= '0';
rdwr_collision_detected(mem_arr_idx)(1)(wbyte) <= '0';

-- dual concurrent write on the same address
if (wr_be_bram_demux_reg(BRAM_REG_NUM)(mem_arr_idx)(0)(wbyte) = '1' and
wr_be_bram_demux_reg(BRAM_REG_NUM)(mem_arr_idx)(1)(wbyte) = '1' and
rw_addr_bram_by_mux(mem_arr_idx)(0)(wbyte) = rw_addr_bram_by_mux(mem_arr_idx)(1)(wbyte)
) then
wr_addr_collision_detected(mem_arr_idx)(wbyte) <= '1';
end if;

-- concurrent read and write on port A
if (wr_be_bram_demux_reg(BRAM_REG_NUM)(mem_arr_idx)(0)(wbyte) = '1' and
rd_en_pch(mem_arr_idx)(0) = '1'
) then
rdwr_collision_detected(mem_arr_idx)(0)(wbyte) <= '1';
end if;

-- concurrent read and write on port B
if (wr_be_bram_demux_reg(BRAM_REG_NUM)(mem_arr_idx)(1)(wbyte) = '1' and
rd_en_pch(mem_arr_idx)(1) = '1'
) then
rdwr_collision_detected(mem_arr_idx)(1)(wbyte) <= '1';
end if;
end process;
end generate;
end generate;

Expand Down
65 changes: 42 additions & 23 deletions comp/dma/dma_calypte/comp/tx/tx_dma_calypte.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,9 @@ architecture FULL of TX_DMA_CALYPTE is
-- =============================================================================================
-- Constants and range definitions
-- =============================================================================================
constant PCIE_CQ_MFB_WIDTH : natural := PCIE_CQ_MFB_REGIONS*PCIE_CQ_MFB_REGION_SIZE*PCIE_CQ_MFB_BLOCK_SIZE*PCIE_CQ_MFB_ITEM_WIDTH;
constant USR_TX_MFB_WIDTH : natural := USR_TX_MFB_REGIONS*USR_TX_MFB_REGION_SIZE*USR_TX_MFB_BLOCK_SIZE*USR_TX_MFB_ITEM_WIDTH;
constant PCIE_CQ_MFB_WIDTH : natural := PCIE_CQ_MFB_REGIONS*PCIE_CQ_MFB_REGION_SIZE*PCIE_CQ_MFB_BLOCK_SIZE*PCIE_CQ_MFB_ITEM_WIDTH;
constant USR_TX_MFB_WIDTH : natural := USR_TX_MFB_REGIONS*USR_TX_MFB_REGION_SIZE*USR_TX_MFB_BLOCK_SIZE*USR_TX_MFB_ITEM_WIDTH;
constant USR_TX_MFB_META_WIDTH : natural := log2(PKT_SIZE_MAX+1) + HDR_META_WIDTH + log2(CHANNELS);

constant META_IS_DMA_HDR_W : natural := 1;
constant META_PCIE_ADDR_W : natural := 62;
Expand Down Expand Up @@ -201,8 +202,10 @@ architecture FULL of TX_DMA_CALYPTE is
signal pkt_disp_mfb_sof_pos : std_logic_vector(USR_TX_MFB_REGIONS*max(1, log2(USR_TX_MFB_REGION_SIZE)) -1 downto 0);
signal pkt_disp_mfb_eof_pos : std_logic_vector(USR_TX_MFB_REGIONS*max(1, log2(USR_TX_MFB_REGION_SIZE*USR_TX_MFB_BLOCK_SIZE)) -1 downto 0);
signal pkt_disp_mfb_src_rdy : std_logic;
signal pkt_disp_mfb_dst_rdy : std_logic;

signal enabled_chans : std_logic_vector(CHANNELS -1 downto 0);
signal usr_tx_mfb_meta_int : std_logic_vector(USR_TX_MFB_META_WIDTH -1 downto 0);

-- attribute mark_debug : string;

Expand Down Expand Up @@ -467,7 +470,7 @@ begin
ITEMS => (2**(DATA_POINTER_WIDTH-3)) * CHANNELS,
WRITE_PORTS => PCIE_CQ_MFB_REGIONS,
READ_PORTS => 1,
RAM_TYPE => "URAM",
RAM_TYPE => "AUTO",
DEVICE => DEVICE,
SAFE_READ_MODE => false)
port map (
Expand Down Expand Up @@ -521,7 +524,7 @@ begin
USR_MFB_SOF_POS => pkt_disp_mfb_sof_pos,
USR_MFB_EOF_POS => pkt_disp_mfb_eof_pos,
USR_MFB_SRC_RDY => pkt_disp_mfb_src_rdy,
USR_MFB_DST_RDY => USR_TX_MFB_DST_RDY,
USR_MFB_DST_RDY => pkt_disp_mfb_dst_rdy,

HDR_BUFF_ADDR => hdr_fifo_tx_data(62+log2(CHANNELS)+64 -1 downto log2(CHANNELS)+64),
HDR_BUFF_CHAN => hdr_fifo_tx_data(log2(CHANNELS)+64 -1 downto 64),
Expand Down Expand Up @@ -549,23 +552,39 @@ begin
UPD_HHP_DATA => upd_hhp_data,
UPD_HHP_EN => upd_hhp_en);

out_reg_p : process (CLK) is
begin
if (rising_edge(CLK)) then
if (RESET = '1') then
USR_TX_MFB_SRC_RDY <= '0';
elsif (USR_TX_MFB_DST_RDY = '1') then
USR_TX_MFB_META_HDR_META <= pkt_disp_mfb_meta_hdr_meta;
USR_TX_MFB_META_CHAN <= pkt_disp_mfb_meta_chan;
USR_TX_MFB_META_PKT_SIZE <= pkt_disp_mfb_meta_pkt_size;

USR_TX_MFB_DATA <= pkt_disp_mfb_data;
USR_TX_MFB_SOF <= pkt_disp_mfb_sof;
USR_TX_MFB_EOF <= pkt_disp_mfb_eof;
USR_TX_MFB_SOF_POS <= pkt_disp_mfb_sof_pos;
USR_TX_MFB_EOF_POS <= pkt_disp_mfb_eof_pos;
USR_TX_MFB_SRC_RDY <= pkt_disp_mfb_src_rdy;
end if;
end if;
end process;
out_pipe_i : entity work.MFB_PIPE
generic map (
REGIONS => USR_TX_MFB_REGIONS,
REGION_SIZE => USR_TX_MFB_REGION_SIZE,
BLOCK_SIZE => USR_TX_MFB_BLOCK_SIZE,
ITEM_WIDTH => USR_TX_MFB_ITEM_WIDTH,
META_WIDTH => USR_TX_MFB_META_WIDTH,

FAKE_PIPE => FALSE,
USE_DST_RDY => TRUE,
PIPE_TYPE => "REG",
DEVICE => DEVICE)
port map (
CLK => CLK,
RESET => RESET,

RX_DATA => pkt_disp_mfb_data,
RX_META => pkt_disp_mfb_meta_pkt_size & pkt_disp_mfb_meta_hdr_meta & pkt_disp_mfb_meta_chan,
RX_SOF_POS => pkt_disp_mfb_sof_pos,
RX_EOF_POS => pkt_disp_mfb_eof_pos,
RX_SOF => pkt_disp_mfb_sof,
RX_EOF => pkt_disp_mfb_eof,
RX_SRC_RDY => pkt_disp_mfb_src_rdy,
RX_DST_RDY => pkt_disp_mfb_dst_rdy,

TX_DATA => USR_TX_MFB_DATA,
TX_META => usr_tx_mfb_meta_int,
TX_SOF_POS => USR_TX_MFB_SOF_POS,
TX_EOF_POS => USR_TX_MFB_EOF_POS,
TX_SOF => USR_TX_MFB_SOF,
TX_EOF => USR_TX_MFB_EOF,
TX_SRC_RDY => USR_TX_MFB_SRC_RDY,
TX_DST_RDY => USR_TX_MFB_DST_RDY);

(USR_TX_MFB_META_PKT_SIZE, USR_TX_MFB_META_HDR_META, USR_TX_MFB_META_CHAN) <= usr_tx_mfb_meta_int;
end architecture;
4 changes: 4 additions & 0 deletions comp/dma/dma_calypte/comp/tx/uvm/signals.fdo
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,10 @@ proc trans_buffer {PATH META_IS_DMA_HDR_W META_PCIE_ADDR_W META_CHAN_NUM_W META_
add_wave "-noupdate -group \"$grp_name\"" $PATH/rd_data_bram_mux
add_wave "-noupdate -group \"$grp_name\"" $PATH/rd_data_bram
add_wave "-noupdate -group \"$grp_name\"" $PATH/rd_addr_bram_by_shift

add wave -noupdate -group "$grp_name" -divider "Debug_signals"
add_wave "-noupdate -group \"$grp_name\"" $PATH/wr_addr_collision_detected
add_wave "-noupdate -group \"$grp_name\"" $PATH/rdwr_collision_detected
}

proc pkt_dispatcher {PATH} {
Expand Down
Loading

0 comments on commit 826edc4

Please sign in to comment.