From 38ee8c29919c2af2212856a0768380c9c2d454f0 Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Mon, 7 Oct 2024 14:05:23 +0200 Subject: [PATCH 01/11] rx_dma_calypte [MAINTENANCE]: remove FIFO for buffered transactions This FIFO does disproportionately enlarge design while providing only little improvement in terms of throughput. --- .../dma_calypte/comp/rx/rx_dma_calypte.vhd | 124 +++++------------- .../dma/dma_calypte/comp/rx/uvm/tbench/dut.sv | 3 +- .../comp/rx/uvm/tbench/testbench.sv | 1 - .../comp/rx/uvm/tbench/tests/pkg.sv | 1 - comp/dma/dma_calypte/dma_calypte.vhd | 1 - 5 files changed, 32 insertions(+), 98 deletions(-) diff --git a/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd b/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd index c231e8f70..c7934d62f 100644 --- a/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd +++ b/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd @@ -54,7 +54,6 @@ entity RX_DMA_CALYPTE is -- * Maximum allowed value is 2**16 - 1 PKT_SIZE_MAX : natural := 2**16 - 1; - TRBUF_FIFO_EN : boolean := FALSE; TRBUF_REG_EN : boolean := FALSE ); @@ -169,21 +168,13 @@ architecture FULL of RX_DMA_CALYPTE is signal hdrm_pkt_disc_inc : std_logic; signal hdrm_pkt_sent_bytes : std_logic_vector((log2(PKT_SIZE_MAX+1)-1) downto 0); - signal trbuf_fifo_tx_data : std_logic_vector(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS*MFB_ITEM_WIDTH_TRBUF2INS-1 downto 0); - signal trbuf_fifo_tx_sof_pos : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS))-1 downto 0); - signal trbuf_fifo_tx_eof_pos : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS))-1 downto 0); - signal trbuf_fifo_tx_sof : std_logic; - signal trbuf_fifo_tx_eof : std_logic; - signal trbuf_fifo_tx_src_rdy : std_logic; - signal trbuf_fifo_tx_dst_rdy : std_logic; - - signal trbuf_fifo_rx_data : std_logic_vector(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS*MFB_ITEM_WIDTH_TRBUF2INS-1 downto 0); - signal trbuf_fifo_rx_sof_pos : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS))-1 downto 0); - signal trbuf_fifo_rx_eof_pos : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS))-1 downto 0); - signal trbuf_fifo_rx_sof : std_logic; - signal trbuf_fifo_rx_eof : std_logic; - signal trbuf_fifo_rx_src_rdy : std_logic; - signal trbuf_fifo_rx_dst_rdy : std_logic; + signal mfb_data_trbuf : std_logic_vector(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS*MFB_ITEM_WIDTH_TRBUF2INS-1 downto 0); + signal mfb_sof_pos_trbuf : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS))-1 downto 0); + signal mfb_eof_pos_trbuf : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS))-1 downto 0); + signal mfb_sof_trbuf : std_logic; + signal mfb_eof_trbuf : std_logic; + signal mfb_src_rdy_trbuf : std_logic; + signal mfb_dst_rdy_trbuf : std_logic; -- ============================================================================================= -- Frame length checker ---> Transaction buffer @@ -258,13 +249,13 @@ architecture FULL of RX_DMA_CALYPTE is -- attribute mark_debug of hdrm_dma_hdr_src_rdy : signal is "true"; -- attribute mark_debug of hdrm_dma_hdr_dst_rdy : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_data : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_sof : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_eof : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_sof_pos : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_eof_pos : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_src_rdy : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_dst_rdy : signal is "true"; + -- attribute mark_debug of mfb_data_trbuf : signal is "true"; + -- attribute mark_debug of mfb_sof_trbuf : signal is "true"; + -- attribute mark_debug of mfb_eof_trbuf : signal is "true"; + -- attribute mark_debug of mfb_sof_pos_trbuf : signal is "true"; + -- attribute mark_debug of mfb_eof_pos_trbuf : signal is "true"; + -- attribute mark_debug of mfb_src_rdy_trbuf : signal is "true"; + -- attribute mark_debug of mfb_dst_rdy_trbuf : signal is "true"; -- attribute mark_debug of mfb_src_rdy_inbuf : signal is "true"; -- attribute mark_debug of mfb_dst_rdy_inbuf : signal is "true"; @@ -443,11 +434,11 @@ begin CLK => CLK, RST => RESET, - RX_MFB_DATA => trbuf_fifo_tx_data, - RX_MFB_SOF => trbuf_fifo_tx_sof, - RX_MFB_EOF => trbuf_fifo_tx_eof, - RX_MFB_SRC_RDY => trbuf_fifo_tx_src_rdy, - RX_MFB_DST_RDY => trbuf_fifo_tx_dst_rdy, + RX_MFB_DATA => mfb_data_trbuf, + RX_MFB_SOF => mfb_sof_trbuf, + RX_MFB_EOF => mfb_eof_trbuf, + RX_MFB_SRC_RDY => mfb_src_rdy_trbuf, + RX_MFB_DST_RDY => mfb_dst_rdy_trbuf, TX_MFB_DATA => PCIE_UP_MFB_DATA, TX_MFB_META => PCIE_UP_MFB_META, @@ -473,66 +464,13 @@ begin HDRM_DMA_HDR_SRC_RDY => hdrm_dma_hdr_src_rdy, HDRM_DMA_HDR_DST_RDY => hdrm_dma_hdr_dst_rdy); - tr_buf_fifo_g: if (TRBUF_FIFO_EN) generate - - trbuf_fifo_i : entity work.MFB_FIFOX - generic map ( - REGIONS => 1, - REGION_SIZE => MFB_REGION_SIZE_TRBUF2INS, - BLOCK_SIZE => MFB_BLOCK_SIZE_TRBUF2INS, - ITEM_WIDTH => MFB_ITEM_WIDTH_TRBUF2INS, - META_WIDTH => 0, - FIFO_DEPTH => 32, - RAM_TYPE => "AUTO", - DEVICE => DEVICE, - ALMOST_FULL_OFFSET => 2, - ALMOST_EMPTY_OFFSET => 2) - port map ( - CLK => CLK, - RST => RESET, - - RX_DATA => trbuf_fifo_rx_data, - RX_META => (others => '0'), - RX_SOF_POS => trbuf_fifo_rx_sof_pos, - RX_EOF_POS => trbuf_fifo_rx_eof_pos, - RX_SOF(0) => trbuf_fifo_rx_sof, - RX_EOF(0) => trbuf_fifo_rx_eof, - RX_SRC_RDY => trbuf_fifo_rx_src_rdy, - RX_DST_RDY => trbuf_fifo_rx_dst_rdy, - - TX_DATA => trbuf_fifo_tx_data, - TX_META => open, - TX_SOF_POS => trbuf_fifo_tx_sof_pos, - TX_EOF_POS => trbuf_fifo_tx_eof_pos, - TX_SOF(0) => trbuf_fifo_tx_sof, - TX_EOF(0) => trbuf_fifo_tx_eof, - TX_SRC_RDY => trbuf_fifo_tx_src_rdy, - TX_DST_RDY => trbuf_fifo_tx_dst_rdy, - - FIFO_STATUS => open, - - FIFO_AFULL => open, - FIFO_AEMPTY => open); - - else generate - - trbuf_fifo_tx_data <= trbuf_fifo_rx_data; - trbuf_fifo_tx_sof_pos <= trbuf_fifo_rx_sof_pos; - trbuf_fifo_tx_eof_pos <= trbuf_fifo_rx_eof_pos; - trbuf_fifo_tx_sof <= trbuf_fifo_rx_sof; - trbuf_fifo_tx_eof <= trbuf_fifo_rx_eof; - trbuf_fifo_tx_src_rdy <= trbuf_fifo_rx_src_rdy; - trbuf_fifo_rx_dst_rdy <= trbuf_fifo_tx_dst_rdy; - - end generate; - tr_buff_g : if (BUFFERED_DATA_SIZE = MFB_REGION_SIZE_INBUF2TRBUF*MFB_BLOCK_SIZE_INBUF2TRBUF) generate - trbuf_fifo_rx_data <= mfb_data_inbuf; - trbuf_fifo_rx_sof <= mfb_sof_inbuf; - trbuf_fifo_rx_eof <= mfb_eof_inbuf; - trbuf_fifo_rx_src_rdy <= mfb_src_rdy_inbuf; - mfb_dst_rdy_inbuf <= trbuf_fifo_rx_dst_rdy; + mfb_data_trbuf <= mfb_data_inbuf; + mfb_sof_trbuf <= mfb_sof_inbuf; + mfb_eof_trbuf <= mfb_eof_inbuf; + mfb_src_rdy_trbuf <= mfb_src_rdy_inbuf; + mfb_dst_rdy_inbuf <= mfb_dst_rdy_trbuf; else generate @@ -555,13 +493,13 @@ begin RX_MFB_SRC_RDY => mfb_src_rdy_lng_check, RX_MFB_DST_RDY => mfb_dst_rdy_lng_check, - TX_MFB_DATA => trbuf_fifo_rx_data, - TX_MFB_SOF_POS => trbuf_fifo_rx_sof_pos, - TX_MFB_EOF_POS => trbuf_fifo_rx_eof_pos, - TX_MFB_SOF => trbuf_fifo_rx_sof, - TX_MFB_EOF => trbuf_fifo_rx_eof, - TX_MFB_SRC_RDY => trbuf_fifo_rx_src_rdy, - TX_MFB_DST_RDY => trbuf_fifo_rx_dst_rdy); + TX_MFB_DATA => mfb_data_trbuf, + TX_MFB_SOF_POS => mfb_sof_pos_trbuf, + TX_MFB_EOF_POS => mfb_eof_pos_trbuf, + TX_MFB_SOF => mfb_sof_trbuf, + TX_MFB_EOF => mfb_eof_trbuf, + TX_MFB_SRC_RDY => mfb_src_rdy_trbuf, + TX_MFB_DST_RDY => mfb_dst_rdy_trbuf); end generate; diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv index 19ad9b1e2..36d7496ca 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv @@ -5,7 +5,7 @@ //-- SPDX-License-Identifier: BSD-3-Clause -module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USER_ITEM_WIDTH, PCIE_UP_REGIONS, PCIE_UP_REGION_SIZE, PCIE_UP_BLOCK_SIZE, PCIE_UP_ITEM_WIDTH, CHANNELS, PKT_SIZE_MAX, SW_ADDR_WIDTH, POINTER_WIDTH, CNTRS_WIDTH, OPT_BUFF, TRBUF_REG_EN) +module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USER_ITEM_WIDTH, PCIE_UP_REGIONS, PCIE_UP_REGION_SIZE, PCIE_UP_BLOCK_SIZE, PCIE_UP_ITEM_WIDTH, CHANNELS, PKT_SIZE_MAX, SW_ADDR_WIDTH, POINTER_WIDTH, CNTRS_WIDTH, TRBUF_REG_EN) ( input logic CLK, input logic RST, @@ -47,7 +47,6 @@ module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USE .SW_ADDR_WIDTH (SW_ADDR_WIDTH), .CNTRS_WIDTH (CNTRS_WIDTH), .PKT_SIZE_MAX (PKT_SIZE_MAX), - .TRBUF_FIFO_EN (OPT_BUFF), .TRBUF_REG_EN (TRBUF_REG_EN) ) VHDL_DUT_U ( .CLK (CLK), diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv index c4e539584..8a21ad992 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv @@ -80,7 +80,6 @@ module testbench; .SW_ADDR_WIDTH (test::SW_ADDR_WIDTH), .POINTER_WIDTH (test::POINTER_WIDTH), .CNTRS_WIDTH (test::CNTRS_WIDTH), - .OPT_BUFF (test::OPT_BUFF), .TRBUF_REG_EN (test::TRBUF_REG_EN) ) DUT_U ( diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv index 1bfcd90c3..e4c068b8d 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv @@ -30,7 +30,6 @@ package test; parameter SW_ADDR_WIDTH = 64; parameter CNTRS_WIDTH = 64; parameter PKT_SIZE_MAX = 2**12; - parameter OPT_BUFF = 1'b0; parameter TRBUF_REG_EN = 1'b1; parameter DEVICE = "ULTRASCALE"; diff --git a/comp/dma/dma_calypte/dma_calypte.vhd b/comp/dma/dma_calypte/dma_calypte.vhd index 4ce156855..01b5dae10 100644 --- a/comp/dma/dma_calypte/dma_calypte.vhd +++ b/comp/dma/dma_calypte/dma_calypte.vhd @@ -266,7 +266,6 @@ begin CNTRS_WIDTH => DSP_CNT_WIDTH, HDR_META_WIDTH => HDR_META_WIDTH, PKT_SIZE_MAX => USR_RX_PKT_SIZE_MAX, - TRBUF_FIFO_EN => FALSE, TRBUF_REG_EN => TRBUF_REG_EN) port map ( From c540b6fb5b88c9a3a5cff95c267a246511f33ea0 Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Mon, 7 Oct 2024 14:31:45 +0200 Subject: [PATCH 02/11] dma_test_core [MAINTENANCE]: comment mark_debug attributes --- .../comp/tx_debug_core/debug_core.vhd | 42 +++++++++---------- .../comp/test_core/dma_test_core.vhd | 38 ++++++++--------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/comp/dma/dma_calypte/comp/test_core/comp/tx_debug_core/debug_core.vhd b/comp/dma/dma_calypte/comp/test_core/comp/tx_debug_core/debug_core.vhd index e7c7b0596..d709e892e 100644 --- a/comp/dma/dma_calypte/comp/test_core/comp/tx_debug_core/debug_core.vhd +++ b/comp/dma/dma_calypte/comp/test_core/comp/tx_debug_core/debug_core.vhd @@ -509,7 +509,7 @@ architecture FULL of TX_DMA_DEBUG_CORE is -- ============================================================================================= -- Debug signals -- ============================================================================================= - attribute mark_debug : string; + -- attribute mark_debug : string; signal aux_sig_mfb_meta_chan_int : std_logic_vector(log2(CHANNELS) -1 downto 0); signal aux_sig_mfb_meta_pkt_size : std_logic_vector(log2(PKT_SIZE_MAX+1) -1 downto 0); @@ -518,26 +518,26 @@ architecture FULL of TX_DMA_DEBUG_CORE is signal tx_mfb_meta_chan_int : std_logic_vector(log2(CHANNELS) -1 downto 0); signal tx_mfb_meta_pkt_size : std_logic_vector(log2(PKT_SIZE_MAX+1) -1 downto 0); signal tx_mfb_meta_hdr_meta : std_logic_vector(DMA_META_WIDTH -1 downto 0); - attribute mark_debug of tx_mfb_meta_chan_int : signal is "true"; - attribute mark_debug of tx_mfb_meta_pkt_size : signal is "true"; - attribute mark_debug of tx_mfb_meta_hdr_meta : signal is "true"; - attribute mark_debug of TX_MFB_DATA : signal is "true"; - attribute mark_debug of TX_MFB_META : signal is "true"; - attribute mark_debug of TX_MFB_SOF : signal is "true"; - attribute mark_debug of TX_MFB_EOF : signal is "true"; - attribute mark_debug of TX_MFB_SOF_POS : signal is "true"; - attribute mark_debug of TX_MFB_EOF_POS : signal is "true"; - attribute mark_debug of TX_MFB_SRC_RDY : signal is "true"; - attribute mark_debug of TX_MFB_DST_RDY : signal is "true"; - - attribute mark_debug of pattern_comp_pst : signal is "true"; - attribute mark_debug of pattern_match_cntr_incr : signal is "true"; - attribute mark_debug of pattern_mismatch_cntr_incr : signal is "true"; - attribute mark_debug of pattern_copy_val : signal is "true"; - attribute mark_debug of meta_pattern_comp_pst : signal is "true"; - attribute mark_debug of meta_pattern_match_cntr_incr : signal is "true"; - attribute mark_debug of meta_pattern_mismatch_cntr_incr : signal is "true"; - attribute mark_debug of meta_pattern_copy_val : signal is "true"; + -- attribute mark_debug of tx_mfb_meta_chan_int : signal is "true"; + -- attribute mark_debug of tx_mfb_meta_pkt_size : signal is "true"; + -- attribute mark_debug of tx_mfb_meta_hdr_meta : signal is "true"; + -- attribute mark_debug of TX_MFB_DATA : signal is "true"; + -- attribute mark_debug of TX_MFB_META : signal is "true"; + -- attribute mark_debug of TX_MFB_SOF : signal is "true"; + -- attribute mark_debug of TX_MFB_EOF : signal is "true"; + -- attribute mark_debug of TX_MFB_SOF_POS : signal is "true"; + -- attribute mark_debug of TX_MFB_EOF_POS : signal is "true"; + -- attribute mark_debug of TX_MFB_SRC_RDY : signal is "true"; + -- attribute mark_debug of TX_MFB_DST_RDY : signal is "true"; + + -- attribute mark_debug of pattern_comp_pst : signal is "true"; + -- attribute mark_debug of pattern_match_cntr_incr : signal is "true"; + -- attribute mark_debug of pattern_mismatch_cntr_incr : signal is "true"; + -- attribute mark_debug of pattern_copy_val : signal is "true"; + -- attribute mark_debug of meta_pattern_comp_pst : signal is "true"; + -- attribute mark_debug of meta_pattern_match_cntr_incr : signal is "true"; + -- attribute mark_debug of meta_pattern_mismatch_cntr_incr : signal is "true"; + -- attribute mark_debug of meta_pattern_copy_val : signal is "true"; begin tx_mfb_meta_chan_int <= TX_MFB_META(log2(CHANNELS) -1 downto 0); diff --git a/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd b/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd index a867f9875..39b78c29f 100644 --- a/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd +++ b/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd @@ -248,25 +248,25 @@ architecture FULL of DMA_TEST_CORE is -- ============================================================================================= -- Debug probes -- ============================================================================================= - attribute mark_debug : string; - - attribute mark_debug of data_logger_rst : signal is "true"; - attribute mark_debug of tst_gen_mux_sel : signal is "true"; - attribute mark_debug of meas_fsm_pst : signal is "true"; - attribute mark_debug of pkt_cnt_pst : signal is "true"; - attribute mark_debug of test_finished : signal is "true"; - - attribute mark_debug of mfb_gen_ctrl_pkt_cnt_clr : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_length : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_chan_val : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_chan_inc : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_en : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_pkt_cnt : signal is "true"; - - attribute mark_debug of lat_meas_val : signal is "true"; - attribute mark_debug of lat_meas_val_vld : signal is "true"; - attribute mark_debug of lat_meas_fifo_full : signal is "true"; - attribute mark_debug of lat_meas_fifo_items : signal is "true"; + -- attribute mark_debug : string; + + -- attribute mark_debug of data_logger_rst : signal is "true"; + -- attribute mark_debug of tst_gen_mux_sel : signal is "true"; + -- attribute mark_debug of meas_fsm_pst : signal is "true"; + -- attribute mark_debug of pkt_cnt_pst : signal is "true"; + -- attribute mark_debug of test_finished : signal is "true"; + + -- attribute mark_debug of mfb_gen_ctrl_pkt_cnt_clr : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_length : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_chan_val : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_chan_inc : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_en : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_pkt_cnt : signal is "true"; + + -- attribute mark_debug of lat_meas_val : signal is "true"; + -- attribute mark_debug of lat_meas_val_vld : signal is "true"; + -- attribute mark_debug of lat_meas_fifo_full : signal is "true"; + -- attribute mark_debug of lat_meas_fifo_items : signal is "true"; begin mi_async_i : entity work.MI_ASYNC generic map( From a1067a19fa7d400bbc4b6a26916e150b6a9b809f Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Wed, 9 Oct 2024 10:18:21 +0200 Subject: [PATCH 03/11] dts_templates.tcl [FEATURE]: create templates for DTS generation This is to avoid tedious repetition of append TCL macros for string generation. The main function dts_add_node encapsulates the general structure of a Device Tree node. Multiple properties can be added with the _appendprop_ procedures inside the node's body. --- build/DevTree.tcl | 1 + build/scripts/dts/dts_templates.tcl | 78 +++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 build/scripts/dts/dts_templates.tcl diff --git a/build/DevTree.tcl b/build/DevTree.tcl index 41b5fbd68..1f3da840e 100644 --- a/build/DevTree.tcl +++ b/build/DevTree.tcl @@ -1,5 +1,6 @@ source [file join [file dirname [info script]] "scripts" "dts" "packed_item.tcl"] source [file join [file dirname [info script]] "scripts" "dts" "ndp_header.tcl"] +source [file join [file dirname [info script]] "scripts" "dts" "dts_templates.tcl"] # ---------------------------------------------------------------------- diff --git a/build/scripts/dts/dts_templates.tcl b/build/scripts/dts/dts_templates.tcl new file mode 100644 index 000000000..a01e76533 --- /dev/null +++ b/build/scripts/dts/dts_templates.tcl @@ -0,0 +1,78 @@ +# dts_templates.tcl: templates for various node types within the DeviceTree +# Copyright (C) 2024 CESNET z. s. p. o. +# Author(s): Vladisav Valek +# +# SPDX-License-Identifier: BSD-3-Clause + +# Adds a string property to a Device Tree node +# 1. DTS - a reference to Device Tree string +# 2. name - name of a string property +# 3. value - value of a string property +proc dts_appendprop_string {DTS name value} { + upvar 1 $DTS dts + append dts "$name = \"$value\";\n" +} + +# Adds integer property to a Device Tree node +# 1. DTS - a reference to Device Tree string +# 2. name - name of an integer property +# 3. value - value of a integer property +proc dts_appendprop_int {DTS name value} { + upvar 1 $DTS dts + append dts "$name = <$value>;\n" +} + +# Adds register as a "reg" property to a Device Tree node +# 1. DTS - a reference to Device Tree string +# 2. addr - base address of a register +# 3. size - size of a register +proc dts_appendprop_reg {DTS addr size} { + upvar 1 $DTS dts + append dts "reg = <$addr $size>;\n" +} + +# Adds cells that specify processing of address and size values within reg properties of a DT node +# 1. DTS - a reference to Device Tree string +# 2. addr - specifies how many cells within the reg property represent the BASE ADDRESS of a register +# 3. size - specifies how many cells within the reg property represent the SIZE of a register +# NOTE: A cell within a reg property is a value of type uint32. +proc dts_add_cells {DTS {addr 1} {size 1}} { + upvar 1 $DTS dts + dts_appendprop_int dts "#address-cells" $addr + dts_appendprop_int dts "#size-cells" $size +} + +# Adds the minimal set of properties (compatble string and a register address) +# 1. DTS - a reference to DTS +# 2. base_addr - base address in the MI address space +# 3. size - size of the register space in the MI address space +# 4. compatible - compatible string +proc dts_appendprop_comp_node {DTS base_addr size compatible} { + upvar 1 $DTS dts + + dts_appendprop_string dts "compatible" "$compatible" + dts_appendprop_reg dts $base_addr $size +} + +# This creates a node within a DTS +# 1. DTS - a reference to Device Tree string +# 2. alias - (also called label) provides an alternative name used for cross-referencing within a +# Device Tree +# 3. name - a name of a node +# 4. body - a set of procedures that add properties to a node (see example within the documentation +# of a Build System) +proc dts_create_labeled_node {DTS alias name body} { + upvar 1 $DTS dts + + if {$alias ne ""} { + append dts "$alias: " + } + append dts "$name {\n" + uplevel 1 $body + append dts "};\n" +} + +# Wrapper over dts_create_labeled_node that creates a node without a label +proc dts_create_node {DTS name body} { + uplevel 1 [list dts_create_labeled_node $DTS "" $name $body] +} From f0aa3457a6ca27b4b028df558940407609e03588 Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Wed, 9 Oct 2024 10:23:59 +0200 Subject: [PATCH 04/11] dma_calypte - DevTree.tcl [MAINTENANCE]: rewrite DTS generator procedures for template usage This reflects the introduction of DTS templates for DT generation. --- comp/dma/dma_calypte/DevTree.tcl | 42 +++++++++++++++---------------- core/comp/dma/dma_mod/DevTree.tcl | 4 +-- core/top/DevTree.tcl | 4 +-- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/comp/dma/dma_calypte/DevTree.tcl b/comp/dma/dma_calypte/DevTree.tcl index 5150e4da9..99526d891 100644 --- a/comp/dma/dma_calypte/DevTree.tcl +++ b/comp/dma/dma_calypte/DevTree.tcl @@ -3,34 +3,32 @@ # 2. id - channel ID # 3. base - base address of channel # 4. pcie - index(es) of PCIe endpoint(s) which DMA controller uses. -proc dts_dma_calypte_ctrl {dir id base pcie} { - set ret "" - append ret "dma_ctrl_calypte" "_$dir$id {" - append ret "compatible = \"cesnet,dma_ctrl_calypte" "_" $dir "\";" - append ret "reg = <$base 0x80>;" - append ret "version = <0x00010000>;" - append ret "pcie = <$pcie>;" - if { $dir == "tx" } { - append ret "data_buff = <&dma_calypte_tx_data_buff$id>;" - append ret "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;" +proc dts_dma_calypte_ctrl {DTS dir id base pcie} { + upvar 1 $DTS dts + + dts_create_node dts "dma_ctrl_calypte_$dir$id" { + dts_appendprop_comp_node dts $base 0x80 "cesnet,dma_ctrl_calypte_$dir" + dts_appendprop_int dts "version" 0x10000 + dts_appendprop_int dts "pcie" $pcie + if { $dir == "tx" } { + append dts "data_buff = <&dma_calypte_tx_data_buff$id>;" + append dts "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;" + } + append dts "params = <&dma_params_$dir$pcie>;" } - append ret "params = <&dma_params_$dir$pcie>;" - append ret "};" - return $ret } # generates Device Tree entries for data buffers in DMA Calypte # 1. type - content of the buffer (header or data) # 2. id - channel ID -# 3. base - base address for the first buffer +# 3. base - base address for the buffer # 4. size - size of the buffer # 5. pcie - index(es) of PCIe endpoint(s) which DMA controller uses. -proc dts_dma_calypte_tx_buffer {type id base size pcie} { - set ret "" - append ret "dma_calypte_tx_${type}_buff${id}: dma_calypte_tx_${type}_buff${id} {" - append ret "compatible = \"cesnet,dma_calypte_tx_${type}_buff\";" - append ret "reg = <$base $size>;" - append ret "pcie = <$pcie>;" - append ret "};" - return $ret +proc dts_dma_calypte_tx_buffer {DTS type id base size pcie} { + upvar 1 $DTS dts + + dts_create_labeled_node dts "dma_calypte_tx_${type}_buff${id}" "dma_calypte_tx_${type}_buff${id}" { + dts_appendprop_comp_node dts $base $size "cesnet,dma_calypte_tx_${type}_buff" + dts_appendprop_int dts "pcie" $pcie + } } diff --git a/core/comp/dma/dma_mod/DevTree.tcl b/core/comp/dma/dma_mod/DevTree.tcl index 67692e91e..49c5b0060 100644 --- a/core/comp/dma/dma_mod/DevTree.tcl +++ b/core/comp/dma/dma_mod/DevTree.tcl @@ -40,7 +40,7 @@ proc dts_dmamod_open {base type rxn txn pcie rx_frame_size_max tx_frame_size_max } } elseif {$type == 4} { set var_base [expr $base + $i * 0x80] - append ret [dts_dma_calypte_ctrl "rx" $i $var_base $pcie] + dts_dma_calypte_ctrl ret "rx" $i $var_base $pcie } } @@ -51,7 +51,7 @@ proc dts_dmamod_open {base type rxn txn pcie rx_frame_size_max tx_frame_size_max append ret [dts_dma_medusa_ctrl "ndp" $type "tx" $i $var_base $pcie "dma_params_tx$pcie"] } elseif {$type == 4} { set var_base [expr $base + $i * 0x80 + $offset] - append ret [dts_dma_calypte_ctrl "tx" $i $var_base $pcie] + dts_dma_calypte_ctrl ret "tx" $i $var_base $pcie } } diff --git a/core/top/DevTree.tcl b/core/top/DevTree.tcl index a9bda9d12..8591ce439 100644 --- a/core/top/DevTree.tcl +++ b/core/top/DevTree.tcl @@ -201,7 +201,7 @@ proc dts_build_netcope {} { for {set i 0} {$i < $CHAN_PER_EP} {incr i} { set var_buff_base [expr $TX_DATA_BUFF_BASE + $i * $TX_BUFF_SIZE_HEX] - append ret [dts_dma_calypte_tx_buffer "data" $i $var_buff_base $TX_BUFF_SIZE_HEX "0"] + dts_dma_calypte_tx_buffer ret "data" $i $var_buff_base $TX_BUFF_SIZE_HEX "0" } set TX_HDR_BUFF_BASE [expr $TX_DATA_BUFF_BASE + $CHAN_PER_EP*$TX_BUFF_SIZE] @@ -209,7 +209,7 @@ proc dts_build_netcope {} { for {set i 0} {$i < $CHAN_PER_EP} {incr i} { set var_buff_base [expr $TX_HDR_BUFF_BASE + $i * $TX_BUFF_SIZE_HEX] - append ret [dts_dma_calypte_tx_buffer "hdr" $i $var_buff_base $TX_BUFF_SIZE_HEX "0"] + dts_dma_calypte_tx_buffer ret "hdr" $i $var_buff_base $TX_BUFF_SIZE_HEX "0" } append ret "};" } From 1db0471c8e0129a9ef56d17b5db1dafd3f3639a3 Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Thu, 17 Oct 2024 22:06:57 +0200 Subject: [PATCH 05/11] dma_calypte_test_core - DevTree.tcl [FEATURE]: create DT node for the component This was created in order to simplify the control over the test core that would otherwise require a detailed knowledge of its internal registers and, therefore, an activated mi_debug access in the kernel driver. --- .../dma_calypte/comp/test_core/DevTree.tcl | 39 +++++++++++++++++++ .../dma_calypte/comp/test_core/Modules.tcl | 1 + core/top/DevTree.tcl | 4 +- 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 comp/dma/dma_calypte/comp/test_core/DevTree.tcl diff --git a/comp/dma/dma_calypte/comp/test_core/DevTree.tcl b/comp/dma/dma_calypte/comp/test_core/DevTree.tcl new file mode 100644 index 000000000..fcdf3eb20 --- /dev/null +++ b/comp/dma/dma_calypte/comp/test_core/DevTree.tcl @@ -0,0 +1,39 @@ +# DevTree.tcl: generate nodes for the test core +# Copyright (C) 2024 CESNET z. s. p. o. +# Author(s): Vladisav Valek +# +# SPDX-License-Identifier: BSD-3-Clause + +proc dts_calypte_test_core {DTS base_addr {dbg_en False}} { + upvar 1 $DTS dts + + set LOOPBACK_BASE_ADDR [expr $base_addr + 0x0] + set TX_DBG_CORE_BASE_ADDR [expr $base_addr + 0x10000] + set LATENCY_METER_BASE_ADDR [expr $base_addr + 0x20000] + set RESET_FSM_BASE_ADDR [expr $base_addr + 0x30000] + + dts_create_node dts "dma_calypte_test_core0" { + + dts_create_node dts "mfb_loopback0" { + dts_appendprop_comp_node dts $LOOPBACK_BASE_ADDR 8 "cesnet,mfb_loopback" + } + + if ($dbg_en) { + dts_create_node dts "dma_calypte_debug_core0" { + dts_appendprop_comp_node dts $TX_DBG_CORE_BASE_ADDR 0x1600 "cesnet,dma_calypte_debug_core" + + dts_create_node dts "mfb_generator0" { + dts_appendprop_comp_node dts [expr $TX_DBG_CORE_BASE_ADDR+0x8000] 0x40 "cesnet,mfb_generator" + } + } + + dts_create_node dts "dma_calypte_latency_meter0" { + dts_appendprop_comp_node dts $LATENCY_METER_BASE_ADDR 0x30 "cesnet,dma_calypte_latency_meter" + } + } + + dts_create_node dts "dma_calypte_reset_fsm0" { + dts_appendprop_comp_node dts $RESET_FSM_BASE_ADDR 0x4 "cesnet,dma_calypte_reset_fsm" + } + } +} diff --git a/comp/dma/dma_calypte/comp/test_core/Modules.tcl b/comp/dma/dma_calypte/comp/test_core/Modules.tcl index ded8b6532..7c4911765 100644 --- a/comp/dma/dma_calypte/comp/test_core/Modules.tcl +++ b/comp/dma/dma_calypte/comp/test_core/Modules.tcl @@ -24,3 +24,4 @@ lappend COMPONENTS [ list "LATENCY_METER" $LATENCY_METER_BASE "FUL lappend COMPONENTS [ list "MFB_GENERATOR" $MFB_GENERATOR_BASE "FULL" ] lappend MOD "$ENTITY_BASE/dma_test_core.vhd" +lappend MOD "$ENTITY_BASE/DevTree.tcl" diff --git a/core/top/DevTree.tcl b/core/top/DevTree.tcl index 8591ce439..c96adb3fe 100644 --- a/core/top/DevTree.tcl +++ b/core/top/DevTree.tcl @@ -89,8 +89,8 @@ proc dts_build_netcope {} { } global DMA_DEBUG_ENABLE - if {$DMA_TYPE == 4 && $DMA_DEBUG_ENABLE} { - append ret [data_logger "0x1320000" 0 "dma_calypte_latency_meter"] + if {$DMA_TYPE == 4} { + dts_calypte_test_core ret "0x1300000" $DMA_DEBUG_ENABLE } # Network module From 8c655d4caa77f17b624cb02d4ef90ca3a5db0b10 Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Wed, 9 Oct 2024 10:28:57 +0200 Subject: [PATCH 06/11] dma_test_core.vhd [MAINTENANCE]: remove RX DMA Debug core The DEBUG_CORE has been generated solely for TX DMA and has been used one time (exceptionally) for the RX DMA but has not been used since then. --- .../comp/test_core/dma_test_core.vhd | 98 +++---------------- .../wrapper/dma_calypte_wrapper_arch.vhd | 2 - 2 files changed, 14 insertions(+), 86 deletions(-) diff --git a/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd b/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd index 39b78c29f..6179288c2 100644 --- a/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd +++ b/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd @@ -33,7 +33,6 @@ entity DMA_TEST_CORE is MFB_LOOPBACK_EN : boolean := TRUE; LATENCY_METER_EN : boolean := TRUE; TX_DMA_DBG_CORE_EN : boolean := TRUE; - RX_DMA_DBG_CORE_EN : boolean := TRUE; ST_SP_DBG_SIGNAL_W : natural := 2; -- Width of MI bus @@ -117,15 +116,14 @@ end entity; architecture FULL of DMA_TEST_CORE is - constant MI_SPLIT_PORTS : natural := 5; + constant MI_SPLIT_PORTS : natural := 4; constant MI_SPLIT_BASES : slv_array_t(MI_SPLIT_PORTS-1 downto 0)(MI_WIDTH-1 downto 0) := ( 0 => X"00000000", -- MFB Loopback 1 => X"00010000", -- TX DMA Debug Core 2 => X"00020000", -- Latency meter - 3 => X"00030000", -- Reset FSM - 4 => X"00040000" -- RX DMA Debug Core + 3 => X"00030000" -- Reset FSM ); - constant MI_SPLIT_ADDR_MASK : std_logic_vector(MI_WIDTH -1 downto 0) := X"00070000"; + constant MI_SPLIT_ADDR_MASK : std_logic_vector(MI_WIDTH -1 downto 0) := X"00030000"; -- MI Asynchronous crossing signal mi_dwr_sync : std_logic_vector(MI_WIDTH -1 downto 0); @@ -690,85 +688,17 @@ begin rx_mfb_dst_rdy_lbk <= rx_mfb_dst_rdy_gen_mux; end generate; - rx_dma_debug_core_g: if (RX_DMA_DBG_CORE_EN) generate - rx_debug_core_i : entity work.TX_DMA_DEBUG_CORE - generic map ( - DEVICE => DEVICE, - - MFB_REGIONS => MFB_REGIONS, - MFB_REGION_SIZE => MFB_REGION_SIZE, - MFB_BLOCK_SIZE => MFB_BLOCK_SIZE, - MFB_ITEM_WIDTH => MFB_ITEM_WIDTH, - - DMA_META_WIDTH => HDR_META_WIDTH, - PKT_SIZE_MAX => USR_RX_PKT_SIZE_MAX, - CHANNELS => RX_CHANNELS, - - DBG_CNTRS_WIDTH => 64, - ST_SP_DBG_SIGNAL_W => ST_SP_DBG_SIGNAL_W, - MI_WIDTH => MI_WIDTH, - MI_SAME_CLK => TRUE) - port map ( - CLK => CLK, - RESET => RESET, - - ST_SP_DBG_CHAN => (others => '0'), - ST_SP_DBG_META => (others => '0'), - - RX_MFB_META_PKT_SIZE => rx_mfb_meta_pkt_size_gen_mux, - RX_MFB_META_HDR_META => rx_mfb_meta_hdr_meta_gen_mux, - RX_MFB_META_CHAN => rx_mfb_meta_chan_gen_mux, - - RX_MFB_DATA => rx_mfb_data_gen_mux, - RX_MFB_SOF_POS => rx_mfb_sof_pos_gen_mux, - RX_MFB_EOF_POS => rx_mfb_eof_pos_gen_mux, - RX_MFB_SOF => rx_mfb_sof_gen_mux, - RX_MFB_EOF => rx_mfb_eof_gen_mux, - RX_MFB_SRC_RDY => rx_mfb_src_rdy_gen_mux, - RX_MFB_DST_RDY => rx_mfb_dst_rdy_gen_mux, - - TX_MFB_DATA => RX_MFB_DATA_OUT, - TX_MFB_META => rx_mfb_meta_dbg, - TX_MFB_SOF_POS => RX_MFB_SOF_POS_OUT, - TX_MFB_EOF_POS => RX_MFB_EOF_POS_OUT, - TX_MFB_SOF => RX_MFB_SOF_OUT, - TX_MFB_EOF => RX_MFB_EOF_OUT, - TX_MFB_SRC_RDY => RX_MFB_SRC_RDY_OUT, - TX_MFB_DST_RDY => RX_MFB_DST_RDY_OUT, - - MI_CLK => MI_CLK, - MI_RESET => MI_RESET, - - MI_ADDR => mi_addr_split(4), - MI_DWR => mi_dwr_split(4), - MI_BE => mi_be_split(4), - MI_RD => mi_rd_split(4), - MI_WR => mi_wr_split(4), - MI_DRD => mi_drd_split(4), - MI_ARDY => mi_ardy_split(4), - MI_DRDY => mi_drdy_split(4) - ); - - RX_MFB_META_PKT_SIZE_OUT <= rx_mfb_meta_dbg(log2(USR_RX_PKT_SIZE_MAX+1) + HDR_META_WIDTH + log2(RX_CHANNELS) -1 downto HDR_META_WIDTH + log2(RX_CHANNELS)); - RX_MFB_META_HDR_META_OUT <= rx_mfb_meta_dbg(HDR_META_WIDTH + log2(RX_CHANNELS) -1 downto log2(RX_CHANNELS)); - RX_MFB_META_CHAN_OUT <= rx_mfb_meta_dbg(log2(RX_CHANNELS) -1 downto 0); - else generate - mi_drd_split(4) <= X"DEADBEAD"; - mi_ardy_split(4) <= mi_rd_split(4) or mi_wr_split(4); - mi_drdy_split(4) <= mi_rd_split(4); - - RX_MFB_META_PKT_SIZE_OUT <= rx_mfb_meta_pkt_size_gen_mux; - RX_MFB_META_HDR_META_OUT <= rx_mfb_meta_hdr_meta_gen_mux; - RX_MFB_META_CHAN_OUT <= rx_mfb_meta_chan_gen_mux; - - RX_MFB_DATA_OUT <= rx_mfb_data_gen_mux; - RX_MFB_SOF_OUT <= rx_mfb_sof_gen_mux; - RX_MFB_EOF_OUT <= rx_mfb_eof_gen_mux; - RX_MFB_SOF_POS_OUT <= rx_mfb_sof_pos_gen_mux; - RX_MFB_EOF_POS_OUT <= rx_mfb_eof_pos_gen_mux; - RX_MFB_SRC_RDY_OUT <= rx_mfb_src_rdy_gen_mux; - rx_mfb_dst_rdy_gen_mux <= RX_MFB_DST_RDY_OUT; - end generate; + RX_MFB_META_PKT_SIZE_OUT <= rx_mfb_meta_pkt_size_gen_mux; + RX_MFB_META_HDR_META_OUT <= rx_mfb_meta_hdr_meta_gen_mux; + RX_MFB_META_CHAN_OUT <= rx_mfb_meta_chan_gen_mux; + + RX_MFB_DATA_OUT <= rx_mfb_data_gen_mux; + RX_MFB_SOF_OUT <= rx_mfb_sof_gen_mux; + RX_MFB_EOF_OUT <= rx_mfb_eof_gen_mux; + RX_MFB_SOF_POS_OUT <= rx_mfb_sof_pos_gen_mux; + RX_MFB_EOF_POS_OUT <= rx_mfb_eof_pos_gen_mux; + RX_MFB_SRC_RDY_OUT <= rx_mfb_src_rdy_gen_mux; + rx_mfb_dst_rdy_gen_mux <= RX_MFB_DST_RDY_OUT; -- ============================================================================================= -- Resetting FSM diff --git a/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd b/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd index 727f5ecd0..51fb43a4b 100644 --- a/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd +++ b/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd @@ -25,7 +25,6 @@ architecture CALYPTE of DMA_WRAPPER is constant MFB_LOOPBACK_EN : boolean := TRUE; constant LATENCY_METER_EN : boolean := DBG_CNTR_EN; constant TX_DMA_DBG_CORE_EN : boolean := DBG_CNTR_EN; - constant RX_DMA_DBG_CORE_EN : boolean := FALSE; constant ST_SP_DBG_META_WIDTH : natural := 4; @@ -599,7 +598,6 @@ begin MFB_LOOPBACK_EN => MFB_LOOPBACK_EN, LATENCY_METER_EN => LATENCY_METER_EN, TX_DMA_DBG_CORE_EN => TX_DMA_DBG_CORE_EN, - RX_DMA_DBG_CORE_EN => RX_DMA_DBG_CORE_EN, ST_SP_DBG_SIGNAL_W => ST_SP_DBG_META_WIDTH, MI_WIDTH => MI_WIDTH From 5f8d10d9b00da59b51121c5e29230d7aa6eab435 Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Wed, 9 Oct 2024 10:39:40 +0200 Subject: [PATCH 07/11] RX DMA Calypte [FEATURE]: add performance counters for blocking behaviour The performance counters are measuring the amount of empty clock periods when some data are ready and are waiting for a response/accept by the receiver. Three counters have been implemented: 1) The relative amount of idle clock periods where the PCIe IP is not able to receive data that are ready on the RX DMA controller's output, e.g. the DST_RDY on the MFB bus between the RX DMA and the PCIe IP is deasserted. 2) The relative amount of idling clock periods before receiving a valid address on which the DMA Header can be written. 3) The relative amount of idling clock periods before receiving a valid address on which the DATA can be written. 4) The sum of requests for the PCIE address for the DATA transaction. 5) The sum of requests for the PCIE address for the DMA HEADER transaction. 6) The sum of clock periods where data are being transmitted on the MFB bus towards the PCIe IP Additionally, two histograms have been created where each bin is a counter for a channel within the controller. These count the amount of buffer fills where either DATA buffer or DMA HDR buffer gets filled and the controller is not able to write data anymore. This is different counting than the waiting for the available PCIE address. --- comp/dma/dma_calypte/DevTree.tcl | 11 ++ comp/dma/dma_calypte/comp/rx/Modules.tcl | 19 +- .../rx_dma_calypte_hdr_manager.vhd | 36 +++- .../rx_dma_calypte_sw_manager.vhd | 52 ++++- .../dma_calypte/comp/rx/rx_dma_calypte.vhd | 185 ++++++++++++++++-- comp/dma/dma_calypte/comp/rx/sw/perf_cntrs.py | 120 ++++++++++++ comp/dma/dma_calypte/comp/rx/uvm/signals.fdo | 19 +- .../dma/dma_calypte/comp/rx/uvm/tbench/dut.sv | 5 +- .../comp/rx/uvm/tbench/testbench.sv | 3 +- .../comp/rx/uvm/tbench/tests/pkg.sv | 1 + comp/dma/dma_calypte/dma_calypte.vhd | 5 +- core/comp/dma/dma_mod/DevTree.tcl | 4 + .../wrapper/dma_calypte_wrapper_arch.vhd | 1 + 13 files changed, 433 insertions(+), 28 deletions(-) create mode 100755 comp/dma/dma_calypte/comp/rx/sw/perf_cntrs.py diff --git a/comp/dma/dma_calypte/DevTree.tcl b/comp/dma/dma_calypte/DevTree.tcl index 99526d891..d6338ddba 100644 --- a/comp/dma/dma_calypte/DevTree.tcl +++ b/comp/dma/dma_calypte/DevTree.tcl @@ -32,3 +32,14 @@ proc dts_dma_calypte_tx_buffer {DTS type id base size pcie} { dts_appendprop_int dts "pcie" $pcie } } + +# Adds a node to the Device Tree for performance counters within DMA Calypte +# 1. DTS - reference to DeviceTree string +# 2. Base - base address of the registers in the MI address space +proc dts_dma_perf_cntrs {DTS base} { + upvar 1 $DTS dts + + dts_create_node dts "dma_calypte_rx_perf_cntrs0" { + dts_appendprop_comp_node dts $base 0x30 "cesnet,dma_calypte_rx_perf_cntrs" + } +} diff --git a/comp/dma/dma_calypte/comp/rx/Modules.tcl b/comp/dma/dma_calypte/comp/rx/Modules.tcl index 739ab6e4e..90e81e722 100644 --- a/comp/dma/dma_calypte/comp/rx/Modules.tcl +++ b/comp/dma/dma_calypte/comp/rx/Modules.tcl @@ -8,13 +8,15 @@ lappend PACKAGES "$OFM_PATH/comp/base/pkg/math_pack.vhd" lappend PACKAGES "$OFM_PATH/comp/base/pkg/type_pack.vhd" lappend PACKAGES "$OFM_PATH/comp/base/pkg/pcie_meta_pack.vhd" -set HDR_INSERTOR_BASE "$ENTITY_BASE/comp/hdr_insertor" -set HDR_MANAGER_BASE "$ENTITY_BASE/comp/hdr_manager" -set TRANS_BUFFER_BASE "$ENTITY_BASE/comp/trans_buffer" -set INPUT_BUFFER_BASE "$ENTITY_BASE/comp/input_buffer" -set SW_MANAGER_BASE "$ENTITY_BASE/comp/software_manager" -set MFB_FIFOX_BASE "$OFM_PATH/comp/mfb_tools/storage/fifox" -set MFB_FRAME_LNG_CHECK_BASE "$OFM_PATH/comp/mfb_tools/logic/frame_lng_check" +set HDR_INSERTOR_BASE "$ENTITY_BASE/comp/hdr_insertor" +set HDR_MANAGER_BASE "$ENTITY_BASE/comp/hdr_manager" +set TRANS_BUFFER_BASE "$ENTITY_BASE/comp/trans_buffer" +set INPUT_BUFFER_BASE "$ENTITY_BASE/comp/input_buffer" +set SW_MANAGER_BASE "$ENTITY_BASE/comp/software_manager" +set MFB_FIFOX_BASE "$OFM_PATH/comp/mfb_tools/storage/fifox" +set MFB_FRAME_LNG_CHECK_BASE "$OFM_PATH/comp/mfb_tools/logic/frame_lng_check" +set DATA_LOGGER_BASE "$OFM_PATH/comp/debug/data_logger" +set MI_SPLITTER_PLUS_GEN_BASE "$OFM_PATH/comp/mi_tools/splitter_plus_gen" lappend COMPONENTS \ [ list "RX_DMA_CALYPTE_HDR_INSERTOR" $HDR_INSERTOR_BASE "FULL"] \ @@ -24,6 +26,7 @@ lappend COMPONENTS \ [ list "RX_DMA_CALYPTE_SW_MANAGER" $SW_MANAGER_BASE "FULL"] \ [ list "MFB_FIFOX" $MFB_FIFOX_BASE "FULL"] \ [ list "MFB_FRAME_LNG_CHECK" $MFB_FRAME_LNG_CHECK_BASE "FULL"] \ - + [ list "DATA_LOGGER" $DATA_LOGGER_BASE "FULL"] \ + [ list "MI_SPLITTER_PLUS_GEN" $MI_SPLITTER_PLUS_GEN_BASE "FULL"] lappend MOD "$ENTITY_BASE/rx_dma_calypte.vhd" diff --git a/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd b/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd index fc6fe3c32..865aab17c 100644 --- a/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd +++ b/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd @@ -137,7 +137,15 @@ entity RX_DMA_CALYPTE_HDR_MANAGER is PKT_CNTR_CHAN : out std_logic_vector(log2(CHANNELS) -1 downto 0); PKT_CNTR_SENT_INC : out std_logic; PKT_CNTR_DISC_INC : out std_logic; - PKT_CNTR_PKT_SIZE : out std_logic_vector(log2(PKT_MTU+1) -1 downto 0) + PKT_CNTR_PKT_SIZE : out std_logic_vector(log2(PKT_MTU+1) -1 downto 0); + + -- ========================================================================================= + -- Performance counter outputs + -- ========================================================================================= + DATA_ADDR_REQ_CNTR_INC : out std_logic; + DMA_HDR_ADDR_REQ_CNTR_INC : out std_logic; + DATA_ADDR_STALL_INC : out std_logic; + DMA_HDR_ADDR_STALL_INC : out std_logic ); end entity; @@ -311,6 +319,12 @@ architecture FULL of RX_DMA_CALYPTE_HDR_MANAGER is signal ptr_fifo_rd : std_logic; signal ptr_fifo_empty : std_logic; + -- ============================================================================================= + -- Perfofmance counter logic + -- ============================================================================================= + signal data_addr_next_reg : std_logic; + signal dma_hdr_addr_next_reg : std_logic; + -- ============================================================================================= -- Debug signals and probes (either for verification or ILA/SignalTap) -- ============================================================================================= @@ -1025,4 +1039,24 @@ begin -- signal is valid and set to 1. DMA_HDR_SRC_RDY <= ((not hdr_meta_fifo_empty) and (not ptr_fifo_empty) and (not pkt_size_fifo_empty) and (not discard_fifo_empty) and (not discard_fifo_do(0))) or ((not discard_fifo_empty) and discard_fifo_do(0)); + + -- ============================================================================================= + -- Performance counter logic + -- ============================================================================================= + addr_next_reg_p: process (CLK) is + begin + if (rising_edge(CLK)) then + data_addr_next_reg <= data_addr_next; + dma_hdr_addr_next_reg <= dma_hdr_addr_next; + end if; + end process; + + -- the response for an address request comes usually one clock period delayed. If that is not a + -- case, the stalling occurs. + DATA_ADDR_STALL_INC <= data_addr_next_reg and (not data_pcie_addr_vld); + DMA_HDR_ADDR_STALL_INC <= dma_hdr_addr_next_reg and (not dma_hdr_pcie_addr_vld); + + -- Counters of requests for PCIe addresses (should be equal to the overall amount of packets) + DATA_ADDR_REQ_CNTR_INC <= data_pcie_addr_vld; + DMA_HDR_ADDR_REQ_CNTR_INC <= dma_hdr_pcie_addr_vld; end architecture; diff --git a/comp/dma/dma_calypte/comp/rx/comp/software_manager/rx_dma_calypte_sw_manager.vhd b/comp/dma/dma_calypte/comp/rx/comp/software_manager/rx_dma_calypte_sw_manager.vhd index 58c2db237..e174e58cc 100644 --- a/comp/dma/dma_calypte/comp/rx/comp/software_manager/rx_dma_calypte_sw_manager.vhd +++ b/comp/dma/dma_calypte/comp/rx/comp/software_manager/rx_dma_calypte_sw_manager.vhd @@ -118,7 +118,13 @@ port ( DPM_RD_CHAN : in std_logic_vector(log2(CHANNELS)-1 downto 0); DPM_RD_DATA : out std_logic_vector(POINTER_WIDTH-1 downto 0); HPM_RD_CHAN : in std_logic_vector(log2(CHANNELS)-1 downto 0); - HPM_RD_DATA : out std_logic_vector(POINTER_WIDTH-1 downto 0) + HPM_RD_DATA : out std_logic_vector(POINTER_WIDTH-1 downto 0); + + -- Performance counters + DATA_BUFF_FULL_CHAN : out std_logic_vector(log2(CHANNELS) -1 downto 0); + DATA_BUFF_FULL_CNTR_INCR : out std_logic; + DMA_HDR_BUFF_FULL_CHAN : out std_logic_vector(log2(CHANNELS) -1 downto 0); + DMA_HDR_BUFF_FULL_CNTR_INCR : out std_logic ); end entity; @@ -350,8 +356,8 @@ architecture FULL of RX_DMA_CALYPTE_SW_MANAGER is RSV_3 => 1 + 0, R_SDP => 1 + 2, -- Channel Stop indication (comparator) + Header manager R_SHP => 1 + 2, -- Channel Stop indication (comparator) + Header manager - R_HDP => 1 + 1, -- Comparator - R_HHP => 1 + 1, -- Comparator + R_HDP => 1 + 2, -- Comparator + Perf. counters + R_HHP => 1 + 2, -- Comparator + Perf. counters RSV_8 => 1 + 0, RSV_9 => 1 + 0, RSV_10 => 1 + 0, @@ -583,6 +589,16 @@ architecture FULL of RX_DMA_CALYPTE_SW_MANAGER is signal stop_hhp_ok_reg : std_logic; -- ===================================================================== + -- ============================================================================================= + -- Performance counter connections + -- ============================================================================================= + signal buff_full_shp_compare : std_logic_vector(POINTER_WIDTH -1 downto 0); + signal buff_full_sdp_compare : std_logic_vector(POINTER_WIDTH -1 downto 0); + signal buff_full_hhp_compare : std_logic_vector(POINTER_WIDTH -1 downto 0); + signal buff_full_hdp_compare : std_logic_vector(POINTER_WIDTH -1 downto 0); + + signal buff_full_hhp_compare_reg : std_logic_vector(POINTER_WIDTH -1 downto 0); + signal buff_full_hdp_compare_reg : std_logic_vector(POINTER_WIDTH -1 downto 0); -- attribute mark_debug : string; -- attribute mark_debug of active_chan_reg : signal is "true"; -- attribute mark_debug of start_pending_reg_chan : signal is "true"; @@ -1247,4 +1263,34 @@ begin stop_hhp_ok_reg <= '1' when (comp_hpp_res = "00") else '0'; -- ===================================================================== + -- ============================================================================================= + -- Connections to performance counters + -- ============================================================================================= + + reg_addrb(R_HHP)(2) <= SHP_RD_CHAN; + reg_addrb(R_HDP)(2) <= SDP_RD_CHAN; + + inc_reg_p: process (CLK) is + begin + if (rising_edge(CLK)) then + DMA_HDR_BUFF_FULL_CHAN <= SHP_RD_CHAN; + DATA_BUFF_FULL_CHAN <= SDP_RD_CHAN; + buff_full_shp_compare <= reg_dob_opt(R_SHP)(2)(POINTER_WIDTH-1 downto 0); + buff_full_sdp_compare <= reg_dob_opt(R_SDP)(2)(POINTER_WIDTH-1 downto 0); + buff_full_hhp_compare <= reg_dob_opt(R_HHP)(2)(POINTER_WIDTH-1 downto 0); + buff_full_hdp_compare <= reg_dob_opt(R_HDP)(2)(POINTER_WIDTH-1 downto 0); + buff_full_hhp_compare_reg <= buff_full_hhp_compare; + buff_full_hdp_compare_reg <= buff_full_hdp_compare; + end if; + end process; + + DATA_BUFF_FULL_CNTR_INCR <= '1' when + (unsigned(buff_full_sdp_compare) -1) = unsigned(buff_full_hdp_compare) + and (buff_full_hdp_compare /= buff_full_hdp_compare_reg) + else '0'; + + DMA_HDR_BUFF_FULL_CNTR_INCR <= '1' when + (unsigned(buff_full_shp_compare) -1) = unsigned(buff_full_hhp_compare) + and (buff_full_hhp_compare /= buff_full_hhp_compare_reg) + else '0'; end architecture; diff --git a/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd b/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd index c7934d62f..8eaa05202 100644 --- a/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd +++ b/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd @@ -54,7 +54,10 @@ entity RX_DMA_CALYPTE is -- * Maximum allowed value is 2**16 - 1 PKT_SIZE_MAX : natural := 2**16 - 1; - TRBUF_REG_EN : boolean := FALSE + -- Enables a register in the transaction buffer that improves throughput (but increases latency). + TRBUF_REG_EN : boolean := FALSE; + -- Enables performance counters in the design for metrics. + PERF_CNTR_EN : boolean := FALSE ); port ( @@ -124,6 +127,22 @@ architecture FULL of RX_DMA_CALYPTE is constant BUFFERED_DATA_SIZE : natural := 128; --============================================================================================================= + constant MI_SPLIT_PORTS : natural := 2; + constant MI_SPLIT_BASES : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH-1 downto 0) := ( + 0 => x"00000000", + 1 => x"00003000"); + + constant MI_SPLIT_ADDR_MASK : std_logic_vector(MI_WIDTH-1 downto 0) := x"00003000"; + + signal mi_split_dwr : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH -1 downto 0); + signal mi_split_addr : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH -1 downto 0); + signal mi_split_be : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH/8 -1 downto 0); + signal mi_split_rd : std_logic_vector(MI_SPLIT_PORTS -1 downto 0); + signal mi_split_wr : std_logic_vector(MI_SPLIT_PORTS -1 downto 0); + signal mi_split_drd : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH -1 downto 0); + signal mi_split_ardy : std_logic_vector(MI_SPLIT_PORTS -1 downto 0); + signal mi_split_drdy : std_logic_vector(MI_SPLIT_PORTS -1 downto 0); + signal start_req_chan : std_logic_vector((log2(CHANNELS)-1) downto 0); signal start_req_vld : std_logic; signal start_req_done : std_logic; @@ -208,6 +227,27 @@ architecture FULL of RX_DMA_CALYPTE is signal data_path_dst_rdy : std_logic; signal hdr_log_dst_rdy : std_logic; + -- ============================================================================================= + -- Performance counters' increment signals + -- ============================================================================================= + constant PERF_CNTR_NUM : positive := 6; + constant PERF_CNTR_WIDTH : positive := 64; + + signal perf_cntr_diff_packed : slv_array_t(PERF_CNTR_NUM -1 downto 0)(PERF_CNTR_WIDTH -1 downto 0); + signal perf_cntr_incr_packed : std_logic_vector(PERF_CNTR_NUM -1 downto 0); + + signal data_addr_req_cntr_incr : std_logic; + signal dma_hdr_addr_req_cntr_incr : std_logic; + signal data_addr_stall_incr : std_logic; + signal dma_hdr_addr_stall_incr : std_logic; + signal pcie_mfb_stall_incr : std_logic; + signal pcie_mfb_beats_incr : std_logic; + + signal data_buff_full_chan : std_logic_vector(log2(CHANNELS) -1 downto 0); + signal data_buff_full_cntr_incr : std_logic; + signal dma_hdr_buff_full_chan : std_logic_vector(log2(CHANNELS) -1 downto 0); + signal dma_hdr_buff_full_cntr_incr : std_logic; + --============================================================================================== -- Debug signals for the RX DMA --============================================================================================== @@ -279,6 +319,118 @@ begin report "RX_LL_DMA: The design is not set for such PCIe MFB configuration, the valid are: MFB#(1,1,8,32), MFB#(2,1,8,32)." severity FAILURE; + perf_cntr_g: if (PERF_CNTR_EN) generate + + mi_splitter_i : entity work.MI_SPLITTER_PLUS_GEN + generic map ( + ADDR_WIDTH => MI_WIDTH, + DATA_WIDTH => MI_WIDTH, + META_WIDTH => 0, + PORTS => MI_SPLIT_PORTS, + PIPE_OUT => (others => FALSE), + + ADDR_BASES => MI_SPLIT_PORTS, + ADDR_BASE => MI_SPLIT_BASES, + ADDR_MASK => MI_SPLIT_ADDR_MASK, + + DEVICE => DEVICE) + port map ( + CLK => CLK, + RESET => RESET, + + RX_DWR => MI_DWR, + RX_MWR => (others => '0'), + RX_ADDR => MI_ADDR, + RX_BE => MI_BE, + RX_RD => MI_RD, + RX_WR => MI_WR, + RX_ARDY => MI_ARDY, + RX_DRD => MI_DRD, + RX_DRDY => MI_DRDY, + + TX_DWR => mi_split_dwr, + TX_MWR => open, + TX_ADDR => mi_split_addr, + TX_BE => mi_split_be, + TX_RD => mi_split_rd, + TX_WR => mi_split_wr, + TX_ARDY => mi_split_ardy, + TX_DRD => mi_split_drd, + TX_DRDY => mi_split_drdy); + + perf_counters_p: entity work.DATA_LOGGER + generic map ( + MI_DATA_WIDTH => MI_WIDTH, + MI_ADDR_WIDTH => MI_WIDTH, + + CNTER_CNT => PERF_CNTR_NUM, + VALUE_CNT => 2, + + CTRLO_WIDTH => 0, + CTRLI_WIDTH => 0, + + CNTER_WIDTH => PERF_CNTR_WIDTH, + VALUE_WIDTH => (others => log2(CHANNELS)), + + MIN_EN => (others => FALSE), + MAX_EN => (others => FALSE), + SUM_EN => (others => TRUE), + HIST_EN => (others => TRUE), + + SUM_EXTRA_WIDTH => (others => 16), + HIST_BOX_CNT => (others => CHANNELS), + HIST_BOX_WIDTH => (others => PERF_CNTR_WIDTH), + CTRLO_DEFAULT => (others => '0')) + port map ( + CLK => CLK, + RST => RESET, + + RST_DONE => open, + SW_RST => open, + + CTRLO => open, + CTRLI => (others => '0'), + + CNTERS_INCR => perf_cntr_incr_packed, + CNTERS_SUBMIT => perf_cntr_incr_packed, + CNTERS_DIFF => perf_cntr_diff_packed, + + VALUES_VLD => data_buff_full_cntr_incr & dma_hdr_buff_full_cntr_incr, + VALUES => data_buff_full_chan & dma_hdr_buff_full_chan, + + MI_DWR => mi_split_dwr(1), + MI_ADDR => mi_split_addr(1), + MI_BE => mi_split_be(1), + MI_RD => mi_split_rd(1), + MI_WR => mi_split_wr(1), + MI_ARDY => mi_split_ardy(1), + MI_DRD => mi_split_drd(1), + MI_DRDY => mi_split_drdy(1)); + + perf_cntr_diff_packed <= (others => std_logic_vector(to_unsigned(1, PERF_CNTR_WIDTH))); + perf_cntr_incr_packed <= pcie_mfb_beats_incr + & data_addr_req_cntr_incr + & dma_hdr_addr_req_cntr_incr + & data_addr_stall_incr + & dma_hdr_addr_stall_incr + & pcie_mfb_stall_incr; + + -- Counts the amount of beats where a transaction is ready but the PCIE interface is not + pcie_mfb_stall_incr <= (not PCIE_UP_MFB_DST_RDY) and PCIE_UP_MFB_SRC_RDY and (not RESET); + -- Counts an overall amount of beats in which packets are sent + pcie_mfb_beats_incr <= PCIE_UP_MFB_DST_RDY and PCIE_UP_MFB_SRC_RDY and (not RESET); + else generate + mi_split_dwr(0) <= MI_DWR; + mi_split_addr(0) <= MI_ADDR; + mi_split_be(0) <= MI_BE; + mi_split_rd(0) <= MI_RD; + mi_split_wr(0) <= MI_WR; + + MI_ARDY <= mi_split_ardy(0); + MI_DRD <= mi_split_drd(0); + MI_DRDY <= mi_split_drdy(0); + end generate; + rx_dma_sw_manager_i : entity work.RX_DMA_CALYPTE_SW_MANAGER generic map ( DEVICE => DEVICE, @@ -295,14 +447,14 @@ begin CLK => CLK, RESET => RESET, - MI_ADDR => MI_ADDR, - MI_DWR => MI_DWR, - MI_BE => MI_BE, - MI_RD => MI_RD, - MI_WR => MI_WR, - MI_DRD => MI_DRD, - MI_ARDY => MI_ARDY, - MI_DRDY => MI_DRDY, + MI_ADDR => mi_split_addr(0), + MI_DWR => mi_split_dwr(0), + MI_BE => mi_split_be(0), + MI_RD => mi_split_rd(0), + MI_WR => mi_split_wr(0), + MI_DRD => mi_split_drd(0), + MI_ARDY => mi_split_ardy(0), + MI_DRDY => mi_split_drdy(0), PKT_SENT_CHAN => hdrm_pkt_sent_chan, PKT_SENT_INC => hdrm_pkt_sent_inc, @@ -344,7 +496,12 @@ begin DPM_RD_CHAN => hdrm_data_rd_chan, DPM_RD_DATA => hdrm_dpm_rd_data, HPM_RD_CHAN => hdrm_hdr_rd_chan, - HPM_RD_DATA => hdrm_hpm_rd_data); + HPM_RD_DATA => hdrm_hpm_rd_data, + + DATA_BUFF_FULL_CHAN => data_buff_full_chan, + DATA_BUFF_FULL_CNTR_INCR => data_buff_full_cntr_incr, + DMA_HDR_BUFF_FULL_CHAN => dma_hdr_buff_full_chan, + DMA_HDR_BUFF_FULL_CNTR_INCR => dma_hdr_buff_full_cntr_incr); USER_RX_MFB_DST_RDY <= hdr_log_dst_rdy and data_path_dst_rdy; @@ -415,7 +572,13 @@ begin PKT_CNTR_CHAN => hdrm_pkt_sent_chan, PKT_CNTR_SENT_INC => hdrm_pkt_sent_inc, PKT_CNTR_DISC_INC => hdrm_pkt_disc_inc, - PKT_CNTR_PKT_SIZE => hdrm_pkt_sent_bytes); + PKT_CNTR_PKT_SIZE => hdrm_pkt_sent_bytes, + + DATA_ADDR_REQ_CNTR_INC => data_addr_req_cntr_incr, + DMA_HDR_ADDR_REQ_CNTR_INC => dma_hdr_addr_req_cntr_incr, + DATA_ADDR_STALL_INC => data_addr_stall_incr, + DMA_HDR_ADDR_STALL_INC => dma_hdr_addr_stall_incr); + rx_dma_hdr_insertor_i : entity work.RX_DMA_CALYPTE_HDR_INSERTOR generic map ( diff --git a/comp/dma/dma_calypte/comp/rx/sw/perf_cntrs.py b/comp/dma/dma_calypte/comp/rx/sw/perf_cntrs.py new file mode 100755 index 000000000..018a38470 --- /dev/null +++ b/comp/dma/dma_calypte/comp/rx/sw/perf_cntrs.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 CESNET z. s. p. o. +# Author(s): Vladislav Valek + +import nfb +import argparse +from data_logger.data_logger import DataLogger +from time import sleep + +import curses + + +class RxDmaPerfCounters(DataLogger): + + DT_COMPATIBLE = "cesnet,dma_calypte_rx_perf_cntrs" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.counter_names = { + 0 : "PCIE_MFB_STALL_CNTR", + 1 : "DMA_HDR_ADDR_STALL_CNTR", + 2 : "DATA_ADDR_STALL_CNTR", + 3 : "DMA_HDR_ADDR_REQ_CNTR", + 4 : "DATA_ADDR_REQ_CNTR", + 5 : "PCIE_MFB_BEATS_CNTR" + } + + self.cntr_num = self.config['CNTER_CNT'] + + def show_cntrs(self): + print("Loading {} counters.".format(self.cntr_num)) + + for i in range(self.cntr_num): + cntr_val = self.load_cnter(i) + + print("{}: {}".format(self.counter_names[i], cntr_val)) + + def load_cntrs_all(self): + cntr_storage = [0]*self.cntr_num + + for i in range(self.cntr_num): + cntr_storage[i] = self.load_cnter(i) + + return cntr_storage + + def measure_blocking(self, stdscr): + stdscr.clear() + + try: + while True: + cntr_storage = self.load_cntrs_all() + + if (cntr_storage[5] != 0): + pcie_mfb_stall = (cntr_storage[0] / cntr_storage[5]) * 100 + else: + pcie_mfb_stall = 0.0 + + stdscr.addstr(0, 0, "PCIE IP stalls: {:.2}% (absolute {})".format(pcie_mfb_stall, cntr_storage[0])) + + if (cntr_storage[4] != 0): + data_addr_stall = (cntr_storage[2] / cntr_storage[4]) * 100 + else: + data_addr_stall = 0.0 + + stdscr.addstr(1, 0, "Wait for data address: {:.2}% (absolute {})".format(data_addr_stall, cntr_storage[2])) + + if (cntr_storage[3] != 0): + dma_hdr_addr_stall = (cntr_storage[1] / cntr_storage[3]) * 100 + else: + dma_hdr_addr_stall = 0.0 + + stdscr.addstr(2, 0, "Wait for DMA address: {:.2}% (absolute {})".format(dma_hdr_addr_stall, cntr_storage[1])) + + stdscr.addstr(3, 0, "Total data address req: {}".format(cntr_storage[4])) + stdscr.addstr(4, 0, "Total DMA addr req: {}".format(cntr_storage[3])) + stdscr.addstr(5, 0, "Total DMA PCIE beats: {}".format(cntr_storage[5])) + + stdscr.refresh() + sleep(1) + stdscr.clear() + + except KeyboardInterrupt: + print("Interrupt caught, terminating...") + + +def parseParams(): + parser = argparse.ArgumentParser( + description="Control script for performance counters.", + ) + + access = parser.add_argument_group('Card specifiers') + access.add_argument( + '-d', '--device', default=nfb.libnfb.Nfb.default_dev_path, + metavar='device', help="Target device") + access.add_argument( + '-i', '--index', type=int, metavar='index', default=0, help="Index of a counter array inside DeviceTree") + + common = parser.add_argument_group("Counters control") + common.add_argument('-p', '--print', action='store_true', help="Prints internal registers in JSON format") + common.add_argument('-m', '--measure', action='store_true', help="Continuously measures the amount of blocking") + common.add_argument('--rst', action='store_true', help="Reset the component.") + + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parseParams() + perf_cntrs = RxDmaPerfCounters(dev=args.device, index=args.index) + + if args.rst: + perf_cntrs.rst() + elif args.print: + print(perf_cntrs.stats_to_str(hist=True)) + elif args.measure: + perf_cntrs.rst() + curses.wrapper(perf_cntrs.measure_blocking) + else: + perf_cntrs.show_cntrs() diff --git a/comp/dma/dma_calypte/comp/rx/uvm/signals.fdo b/comp/dma/dma_calypte/comp/rx/uvm/signals.fdo index 6bb87451a..a1fdc3188 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/signals.fdo +++ b/comp/dma/dma_calypte/comp/rx/uvm/signals.fdo @@ -6,7 +6,14 @@ proc ctrl {NAME PATH} { add wave -divider "$NAME" - add_wave "-noupdate -hex" $PATH/MI_* + add_wave "-noupdate -hex" $PATH/MI_ADDR + add_wave "-noupdate -hex" $PATH/MI_DWR + add_wave "-noupdate -hex" $PATH/MI_BE + add_wave "-noupdate -hex" $PATH/MI_RD + add_wave "-noupdate -hex" $PATH/MI_WR + add_wave "-noupdate -hex" $PATH/MI_DRD + add_wave "-noupdate -hex" $PATH/MI_ARDY + add_wave "-noupdate -hex" $PATH/MI_DRDY } proc wr {NAME PATH} { @@ -20,6 +27,7 @@ proc rd {NAME PATH} { } proc ints {PATH} { + add_wave [list -noupdate -hex -group "INTERNALS"] $PATH/mi_split_* add_wave [list -noupdate -hex -group "INTERNALS"] $PATH/start_* add_wave [list -noupdate -hex -group "INTERNALS"] $PATH/stop_* add_wave [list -noupdate -hex -group "INTERNALS"] $PATH/hdrm_* @@ -30,6 +38,15 @@ proc ints {PATH} { } proc high_level_view {PATH} { + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/data_addr_req_cntr_incr + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/dma_hdr_addr_req_cntr_incr + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/data_addr_stall_incr + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/dma_hdr_addr_stall_incr + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/pcie_mfb_stall_incr + add_wave [list -noupdate -unsigned -group "HIGH LEVEL DASHBOARD"] $PATH/data_buff_full_chan + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/data_buff_full_cntr_incr + add_wave [list -noupdate -unsigned -group "HIGH LEVEL DASHBOARD"] $PATH/dma_hdr_buff_full_chan + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/dma_hdr_buff_full_cntr_incr add_wave [list -noupdate -hex -group "HIGH LEVEL DASHBOARD"] $PATH/rx_dma_sw_manager_i/start_fsm_pst add_wave [list -noupdate -hex -group "HIGH LEVEL DASHBOARD"] $PATH/rx_dma_sw_manager_i/start_fsm_nst add_wave [list -noupdate -hex -group "HIGH LEVEL DASHBOARD"] $PATH/rx_dma_sw_manager_i/stop_fsm_pst diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv index 36d7496ca..028f97a93 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv @@ -5,7 +5,7 @@ //-- SPDX-License-Identifier: BSD-3-Clause -module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USER_ITEM_WIDTH, PCIE_UP_REGIONS, PCIE_UP_REGION_SIZE, PCIE_UP_BLOCK_SIZE, PCIE_UP_ITEM_WIDTH, CHANNELS, PKT_SIZE_MAX, SW_ADDR_WIDTH, POINTER_WIDTH, CNTRS_WIDTH, TRBUF_REG_EN) +module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USER_ITEM_WIDTH, PCIE_UP_REGIONS, PCIE_UP_REGION_SIZE, PCIE_UP_BLOCK_SIZE, PCIE_UP_ITEM_WIDTH, CHANNELS, PKT_SIZE_MAX, SW_ADDR_WIDTH, POINTER_WIDTH, CNTRS_WIDTH, TRBUF_REG_EN, PERF_CNTR_EN) ( input logic CLK, input logic RST, @@ -47,7 +47,8 @@ module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USE .SW_ADDR_WIDTH (SW_ADDR_WIDTH), .CNTRS_WIDTH (CNTRS_WIDTH), .PKT_SIZE_MAX (PKT_SIZE_MAX), - .TRBUF_REG_EN (TRBUF_REG_EN) + .TRBUF_REG_EN (TRBUF_REG_EN), + .PERF_CNTR_EN (PERF_CNTR_EN) ) VHDL_DUT_U ( .CLK (CLK), .RESET (RST), diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv index 8a21ad992..9bc154976 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv @@ -80,7 +80,8 @@ module testbench; .SW_ADDR_WIDTH (test::SW_ADDR_WIDTH), .POINTER_WIDTH (test::POINTER_WIDTH), .CNTRS_WIDTH (test::CNTRS_WIDTH), - .TRBUF_REG_EN (test::TRBUF_REG_EN) + .TRBUF_REG_EN (test::TRBUF_REG_EN), + .PERF_CNTR_EN (test::PERF_CNTR_EN) ) DUT_U ( .CLK (CLK), diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv index e4c068b8d..cae46ebf2 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv @@ -31,6 +31,7 @@ package test; parameter CNTRS_WIDTH = 64; parameter PKT_SIZE_MAX = 2**12; parameter TRBUF_REG_EN = 1'b1; + parameter PERF_CNTR_EN = 1'b0; parameter DEVICE = "ULTRASCALE"; diff --git a/comp/dma/dma_calypte/dma_calypte.vhd b/comp/dma/dma_calypte/dma_calypte.vhd index 01b5dae10..6ba9ff7e1 100644 --- a/comp/dma/dma_calypte/dma_calypte.vhd +++ b/comp/dma/dma_calypte/dma_calypte.vhd @@ -72,6 +72,8 @@ entity DMA_CALYPTE is -- Enables an additional register of the transaction buffer that improves -- throughput TRBUF_REG_EN : boolean := false; + -- Enables performance counters alowing metrics generation. + PERF_CNTR_EN : boolean := false; -- ===================================================================== -- TX DMA settings @@ -266,7 +268,8 @@ begin CNTRS_WIDTH => DSP_CNT_WIDTH, HDR_META_WIDTH => HDR_META_WIDTH, PKT_SIZE_MAX => USR_RX_PKT_SIZE_MAX, - TRBUF_REG_EN => TRBUF_REG_EN) + TRBUF_REG_EN => TRBUF_REG_EN, + PERF_CNTR_EN => PERF_CNTR_EN) port map ( CLK => CLK, diff --git a/core/comp/dma/dma_mod/DevTree.tcl b/core/comp/dma/dma_mod/DevTree.tcl index 49c5b0060..cace722ab 100644 --- a/core/comp/dma/dma_mod/DevTree.tcl +++ b/core/comp/dma/dma_mod/DevTree.tcl @@ -44,6 +44,10 @@ proc dts_dmamod_open {base type rxn txn pcie rx_frame_size_max tx_frame_size_max } } + if {$type == 4 && $DMA_DEBUG_ENABLE} { + dts_dma_perf_cntrs ret [expr $base + 0x3000] + } + # TX DMA channels for {set i 0} {$i < $txn} {incr i} { if {$type == 3} { diff --git a/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd b/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd index 51fb43a4b..9e3173b61 100644 --- a/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd +++ b/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd @@ -693,6 +693,7 @@ begin RX_PTR_WIDTH => RX_DP_WIDTH, USR_RX_PKT_SIZE_MAX => USR_RX_PKT_SIZE_MAX, TRBUF_REG_EN => TRUE, + PERF_CNTR_EN => DBG_CNTR_EN, TX_CHANNELS => TX_CHANNELS, TX_PTR_WIDTH => TX_DP_WIDTH, From 6dabfe88ebfe6c7e1b5e3073f8e5ec91dc7b470b Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Thu, 17 Oct 2024 21:13:41 +0200 Subject: [PATCH 08/11] Vivado_non_prj.inc.tcl [BUGFIX]: check if PLATFORM_TAGS exist --- build/Vivado_non_prj.inc.tcl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/build/Vivado_non_prj.inc.tcl b/build/Vivado_non_prj.inc.tcl index 1467b6792..66c84d28f 100644 --- a/build/Vivado_non_prj.inc.tcl +++ b/build/Vivado_non_prj.inc.tcl @@ -655,9 +655,12 @@ proc nb_sanitize_vars {synth_flags hierarchy} { set SYNTH_FLAGS(TOOL) "vivado" - global NB_PLATFORM_TAGS - global PLATFORM_TAGS - set NB_PLATFORM_TAGS "xilinx $PLATFORM_TAGS" + global NB_PLATFORM_TAGS env + if {[info exists env(PLATFORM_TAGS)]} { + set NB_PLATFORM_TAGS "$env(PLATFORM_TAGS)" + } else { + set NB_PLATFORM_TAGS "xilinx" + } if {[info commands version] != ""} { set SYNTH_FLAGS(TOOL_VERSION) [version -short] From 3e839f90dec271b525987c90afcf750a0dcaa1eb Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Thu, 17 Oct 2024 21:51:37 +0200 Subject: [PATCH 09/11] build/readme.rst [FEATURE]: add documentation to the DTS templates --- build/readme.rst | 104 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/build/readme.rst b/build/readme.rst index 2b9f2b337..44cec39b1 100644 --- a/build/readme.rst +++ b/build/readme.rst @@ -385,3 +385,107 @@ The (incomplete) list of SYNTH_FLAGS array items - USE_XPM_LIBRARIES: includes XPM_CDC XPM_MEMORY XPM_FIFO in Vivado projects For other values and their purpose see the Vivado.inc.tcl or Quartus.inc.tcl file in the build directory. + +Device Tree nodes +----------------- + +For the software to find internal firmware components, a *Device Tree (DT)* is +used. This provides a tree of available parts (called nodes in the DT +terminology) of the design that can be accessed from the host without +restriction. A developer creates TCL procedures that generate nodes to *DT +string (DTS)* for the components he finds fit. Since the creation of the DTS can +be challenging, there are several TCL procedures provided that simplify the +process. These procedures are contained within the ``dts_templates.tcl`` file +with clarifying comments. The following examples provide an overview of their +usage. + +Example 1 +~~~~~~~~~ + +This presents a least viable code that creates a node +``dma_calypte_rx_perf_cntrs0`` with the base address *0x8000* +and the size *0x30*. It also contains a compatible string +``cesnet,dma_calypte_rx_perf_cntrs``. The string property is appended to ``dts`` +variable that contains a reference to the required Device Tree string (DTS). + +.. code-block:: tcl + + dts_create_node dts "dma_calypte_rx_perf_cntrs0" { + dts_appendprop_comp_node dts 0x8000 0x30 "cesnet,dma_calypte_rx_perf_cntrs" + } + +Example 2 +~~~~~~~~~ + +A second, more complex example demonstrates addition of multiple properties to a +node called ``dma_ctrl_calypte_$dir$id`` (string can be further adjusted through +parameters ``dir`` and ``id``). + +.. code-block:: tcl + + proc dts_dma_calypte_ctrl {DTS dir id base pcie} { + upvar 1 $DTS dts + + dts_create_node dts "dma_ctrl_calypte_$dir$id" { + # Adding compatible string "cesnet,dma_ctrl_calypte_$dir" and the + # reg property with base address $base and the size 0x80. + dts_appendprop_comp_node dts $base 0x80 "cesnet,dma_ctrl_calypte_$dir" + # Integer property called "version" with the value 0x10000 + dts_appendprop_int dts "version" 0x10000 + # Integer prperty "pcie" with the value of $pcie + dts_appendprop_int dts "pcie" $pcie + + # The addition of custom properties (customly named) can be done + # through a standard "append" macro. + if { $dir == "tx" } { + append dts "data_buff = <&dma_calypte_tx_data_buff$id>;" + append dts "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;" + } + append dts "params = <&dma_params_$dir$pcie>;" + } + } + +Example 3 +~~~~~~~~~ + +This example shows how complex node with multiple subnodes is created. The parent +node is called ``dma_calypte_test_core0`` and contains subnodes +``mfb_loopback0``, ``dma_calypte_debug_core0``, ``dma_calypte_latency_meter0`` +and ``dma_calypte_reset_fsm0``. Further nesting of nodes is possible as can be +seen when adding the ``mfb_generator0`` node. Each of the called procedures +contain a reference to the same DTS from the ``dts`` variable. + +.. code-block:: tcl + + proc dts_calypte_test_core {DTS base_addr} { + # Populate reference from the calling environment + upvar 1 $DTS dts + + set LOOPBACK_BASE_ADDR [expr $base_addr + 0x0] + set TX_DBG_CORE_BASE_ADDR [expr $base_addr + 0x10000] + set LATENCY_METER_BASE_ADDR [expr $base_addr + 0x20000] + set RESET_FSM_BASE_ADDR [expr $base_addr + 0x30000] + + dts_create_node dts "dma_calypte_test_core0" { + + dts_create_node dts "mfb_loopback0" { + dts_appendprop_comp_node dts $LOOPBACK_BASE_ADDR 8 "cesnet,mfb_loopback" + } + + dts_create_node dts "dma_calypte_debug_core0" { + dts_appendprop_comp_node dts $TX_DBG_CORE_BASE_ADDR 0x1600 "cesnet,dma_calypte_debug_core" + + dts_create_node dts "mfb_generator0" { + dts_appendprop_comp_node dts [expr $TX_DBG_CORE_BASE_ADDR+0x8000] 0x40 "cesnet,mfb_generator" + } + } + + dts_create_node dts "dma_calypte_latency_meter0" { + dts_appendprop_comp_node dts $LATENCY_METER_BASE_ADDR 0x30 "cesnet,dma_calypte_latency_meter" + } + + dts_create_node dts "dma_calypte_reset_fsm0" { + dts_appendprop_comp_node dts $RESET_FSM_BASE_ADDR 0x4 "cesnet,dma_calypte_reset_fsm" + } + } + } From 6f255e2c7f46fb40e1703c3e335f9b598bbe84f2 Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Tue, 22 Oct 2024 13:55:34 +0200 Subject: [PATCH 10/11] rx_dma_calypte_hdr_manager [FEATURE]: activate relaxed ordering for RQ transactions --- .../comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd | 4 ++-- comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd b/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd index 865aab17c..dbeff71e6 100644 --- a/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd +++ b/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd @@ -774,7 +774,7 @@ begin IN_VFID => (others => '0'), IN_TAG => (others => '0'), IN_DW_CNT => std_logic_vector(to_unsigned(DATA_SEGMENT_SIZE/4, 11)), - IN_ATTRIBUTES => "000", + IN_ATTRIBUTES => "010", IN_FBE => "1111", IN_LBE => "1111", IN_ADDR_LEN => pcie_addr_len_data_tran, @@ -820,7 +820,7 @@ begin IN_VFID => (others => '0'), IN_TAG => (others => '0'), IN_DW_CNT => std_logic_vector(to_unsigned(8/4, 11)), - IN_ATTRIBUTES => "000", + IN_ATTRIBUTES => "010", IN_FBE => "1111", IN_LBE => "1111", IN_ADDR_LEN => pcie_addr_len_dma_hdr_tran, diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv index 5eb0c0347..b8eaf2f00 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv @@ -163,7 +163,7 @@ class model #(CHANNELS, PKT_SIZE_MAX, META_WIDTH, DEVICE) extends uvm_component; function void get_pcie_header(int unsigned packet_size, logic [64-1:0] addr, output logic[32-1 : 0] header[], output logic[168-1 : 0] meta); logic [2-1:0] at = 0; logic [1-1:0] ecrc = 0; - logic [3-1:0] attr = 0; + logic [3-1:0] attr = 2; logic [3-1:0] tc = 0; logic [1-1:0] rq_id_enabled = 0; logic [16-1:0] cm_id = 0; //compleater ID From c4d520f7ae44fd7d941ea124b6c3015069cd51ff Mon Sep 17 00:00:00 2001 From: Vladislav Valek Date: Tue, 5 Nov 2024 14:35:29 +0100 Subject: [PATCH 11/11] dma_test_core [BUGFIX]: fix histogram boxes count The count should be a power of 2. --- comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd b/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd index 6179288c2..c06d659ac 100644 --- a/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd +++ b/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd @@ -504,7 +504,7 @@ begin HIST_EN => (others => true), SUM_EXTRA_WIDTH => (others => 16), - HIST_BOX_CNT => (others => 100), + HIST_BOX_CNT => (others => 128), HIST_BOX_WIDTH => (others => 32), CTRLO_DEFAULT => (others => '0')) port map (