diff --git a/build/DevTree.tcl b/build/DevTree.tcl index 41b5fbd68..1f3da840e 100644 --- a/build/DevTree.tcl +++ b/build/DevTree.tcl @@ -1,5 +1,6 @@ source [file join [file dirname [info script]] "scripts" "dts" "packed_item.tcl"] source [file join [file dirname [info script]] "scripts" "dts" "ndp_header.tcl"] +source [file join [file dirname [info script]] "scripts" "dts" "dts_templates.tcl"] # ---------------------------------------------------------------------- diff --git a/build/Vivado_non_prj.inc.tcl b/build/Vivado_non_prj.inc.tcl index 1467b6792..66c84d28f 100644 --- a/build/Vivado_non_prj.inc.tcl +++ b/build/Vivado_non_prj.inc.tcl @@ -655,9 +655,12 @@ proc nb_sanitize_vars {synth_flags hierarchy} { set SYNTH_FLAGS(TOOL) "vivado" - global NB_PLATFORM_TAGS - global PLATFORM_TAGS - set NB_PLATFORM_TAGS "xilinx $PLATFORM_TAGS" + global NB_PLATFORM_TAGS env + if {[info exists env(PLATFORM_TAGS)]} { + set NB_PLATFORM_TAGS "$env(PLATFORM_TAGS)" + } else { + set NB_PLATFORM_TAGS "xilinx" + } if {[info commands version] != ""} { set SYNTH_FLAGS(TOOL_VERSION) [version -short] diff --git a/build/readme.rst b/build/readme.rst index 2b9f2b337..44cec39b1 100644 --- a/build/readme.rst +++ b/build/readme.rst @@ -385,3 +385,107 @@ The (incomplete) list of SYNTH_FLAGS array items - USE_XPM_LIBRARIES: includes XPM_CDC XPM_MEMORY XPM_FIFO in Vivado projects For other values and their purpose see the Vivado.inc.tcl or Quartus.inc.tcl file in the build directory. + +Device Tree nodes +----------------- + +For the software to find internal firmware components, a *Device Tree (DT)* is +used. This provides a tree of available parts (called nodes in the DT +terminology) of the design that can be accessed from the host without +restriction. A developer creates TCL procedures that generate nodes to *DT +string (DTS)* for the components he finds fit. Since the creation of the DTS can +be challenging, there are several TCL procedures provided that simplify the +process. These procedures are contained within the ``dts_templates.tcl`` file +with clarifying comments. The following examples provide an overview of their +usage. + +Example 1 +~~~~~~~~~ + +This presents a least viable code that creates a node +``dma_calypte_rx_perf_cntrs0`` with the base address *0x8000* +and the size *0x30*. It also contains a compatible string +``cesnet,dma_calypte_rx_perf_cntrs``. The string property is appended to ``dts`` +variable that contains a reference to the required Device Tree string (DTS). + +.. code-block:: tcl + + dts_create_node dts "dma_calypte_rx_perf_cntrs0" { + dts_appendprop_comp_node dts 0x8000 0x30 "cesnet,dma_calypte_rx_perf_cntrs" + } + +Example 2 +~~~~~~~~~ + +A second, more complex example demonstrates addition of multiple properties to a +node called ``dma_ctrl_calypte_$dir$id`` (string can be further adjusted through +parameters ``dir`` and ``id``). + +.. code-block:: tcl + + proc dts_dma_calypte_ctrl {DTS dir id base pcie} { + upvar 1 $DTS dts + + dts_create_node dts "dma_ctrl_calypte_$dir$id" { + # Adding compatible string "cesnet,dma_ctrl_calypte_$dir" and the + # reg property with base address $base and the size 0x80. + dts_appendprop_comp_node dts $base 0x80 "cesnet,dma_ctrl_calypte_$dir" + # Integer property called "version" with the value 0x10000 + dts_appendprop_int dts "version" 0x10000 + # Integer prperty "pcie" with the value of $pcie + dts_appendprop_int dts "pcie" $pcie + + # The addition of custom properties (customly named) can be done + # through a standard "append" macro. + if { $dir == "tx" } { + append dts "data_buff = <&dma_calypte_tx_data_buff$id>;" + append dts "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;" + } + append dts "params = <&dma_params_$dir$pcie>;" + } + } + +Example 3 +~~~~~~~~~ + +This example shows how complex node with multiple subnodes is created. The parent +node is called ``dma_calypte_test_core0`` and contains subnodes +``mfb_loopback0``, ``dma_calypte_debug_core0``, ``dma_calypte_latency_meter0`` +and ``dma_calypte_reset_fsm0``. Further nesting of nodes is possible as can be +seen when adding the ``mfb_generator0`` node. Each of the called procedures +contain a reference to the same DTS from the ``dts`` variable. + +.. code-block:: tcl + + proc dts_calypte_test_core {DTS base_addr} { + # Populate reference from the calling environment + upvar 1 $DTS dts + + set LOOPBACK_BASE_ADDR [expr $base_addr + 0x0] + set TX_DBG_CORE_BASE_ADDR [expr $base_addr + 0x10000] + set LATENCY_METER_BASE_ADDR [expr $base_addr + 0x20000] + set RESET_FSM_BASE_ADDR [expr $base_addr + 0x30000] + + dts_create_node dts "dma_calypte_test_core0" { + + dts_create_node dts "mfb_loopback0" { + dts_appendprop_comp_node dts $LOOPBACK_BASE_ADDR 8 "cesnet,mfb_loopback" + } + + dts_create_node dts "dma_calypte_debug_core0" { + dts_appendprop_comp_node dts $TX_DBG_CORE_BASE_ADDR 0x1600 "cesnet,dma_calypte_debug_core" + + dts_create_node dts "mfb_generator0" { + dts_appendprop_comp_node dts [expr $TX_DBG_CORE_BASE_ADDR+0x8000] 0x40 "cesnet,mfb_generator" + } + } + + dts_create_node dts "dma_calypte_latency_meter0" { + dts_appendprop_comp_node dts $LATENCY_METER_BASE_ADDR 0x30 "cesnet,dma_calypte_latency_meter" + } + + dts_create_node dts "dma_calypte_reset_fsm0" { + dts_appendprop_comp_node dts $RESET_FSM_BASE_ADDR 0x4 "cesnet,dma_calypte_reset_fsm" + } + } + } diff --git a/build/scripts/dts/dts_templates.tcl b/build/scripts/dts/dts_templates.tcl new file mode 100644 index 000000000..a01e76533 --- /dev/null +++ b/build/scripts/dts/dts_templates.tcl @@ -0,0 +1,78 @@ +# dts_templates.tcl: templates for various node types within the DeviceTree +# Copyright (C) 2024 CESNET z. s. p. o. +# Author(s): Vladisav Valek +# +# SPDX-License-Identifier: BSD-3-Clause + +# Adds a string property to a Device Tree node +# 1. DTS - a reference to Device Tree string +# 2. name - name of a string property +# 3. value - value of a string property +proc dts_appendprop_string {DTS name value} { + upvar 1 $DTS dts + append dts "$name = \"$value\";\n" +} + +# Adds integer property to a Device Tree node +# 1. DTS - a reference to Device Tree string +# 2. name - name of an integer property +# 3. value - value of a integer property +proc dts_appendprop_int {DTS name value} { + upvar 1 $DTS dts + append dts "$name = <$value>;\n" +} + +# Adds register as a "reg" property to a Device Tree node +# 1. DTS - a reference to Device Tree string +# 2. addr - base address of a register +# 3. size - size of a register +proc dts_appendprop_reg {DTS addr size} { + upvar 1 $DTS dts + append dts "reg = <$addr $size>;\n" +} + +# Adds cells that specify processing of address and size values within reg properties of a DT node +# 1. DTS - a reference to Device Tree string +# 2. addr - specifies how many cells within the reg property represent the BASE ADDRESS of a register +# 3. size - specifies how many cells within the reg property represent the SIZE of a register +# NOTE: A cell within a reg property is a value of type uint32. +proc dts_add_cells {DTS {addr 1} {size 1}} { + upvar 1 $DTS dts + dts_appendprop_int dts "#address-cells" $addr + dts_appendprop_int dts "#size-cells" $size +} + +# Adds the minimal set of properties (compatble string and a register address) +# 1. DTS - a reference to DTS +# 2. base_addr - base address in the MI address space +# 3. size - size of the register space in the MI address space +# 4. compatible - compatible string +proc dts_appendprop_comp_node {DTS base_addr size compatible} { + upvar 1 $DTS dts + + dts_appendprop_string dts "compatible" "$compatible" + dts_appendprop_reg dts $base_addr $size +} + +# This creates a node within a DTS +# 1. DTS - a reference to Device Tree string +# 2. alias - (also called label) provides an alternative name used for cross-referencing within a +# Device Tree +# 3. name - a name of a node +# 4. body - a set of procedures that add properties to a node (see example within the documentation +# of a Build System) +proc dts_create_labeled_node {DTS alias name body} { + upvar 1 $DTS dts + + if {$alias ne ""} { + append dts "$alias: " + } + append dts "$name {\n" + uplevel 1 $body + append dts "};\n" +} + +# Wrapper over dts_create_labeled_node that creates a node without a label +proc dts_create_node {DTS name body} { + uplevel 1 [list dts_create_labeled_node $DTS "" $name $body] +} diff --git a/comp/dma/dma_calypte/DevTree.tcl b/comp/dma/dma_calypte/DevTree.tcl index 5150e4da9..d6338ddba 100644 --- a/comp/dma/dma_calypte/DevTree.tcl +++ b/comp/dma/dma_calypte/DevTree.tcl @@ -3,34 +3,43 @@ # 2. id - channel ID # 3. base - base address of channel # 4. pcie - index(es) of PCIe endpoint(s) which DMA controller uses. -proc dts_dma_calypte_ctrl {dir id base pcie} { - set ret "" - append ret "dma_ctrl_calypte" "_$dir$id {" - append ret "compatible = \"cesnet,dma_ctrl_calypte" "_" $dir "\";" - append ret "reg = <$base 0x80>;" - append ret "version = <0x00010000>;" - append ret "pcie = <$pcie>;" - if { $dir == "tx" } { - append ret "data_buff = <&dma_calypte_tx_data_buff$id>;" - append ret "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;" +proc dts_dma_calypte_ctrl {DTS dir id base pcie} { + upvar 1 $DTS dts + + dts_create_node dts "dma_ctrl_calypte_$dir$id" { + dts_appendprop_comp_node dts $base 0x80 "cesnet,dma_ctrl_calypte_$dir" + dts_appendprop_int dts "version" 0x10000 + dts_appendprop_int dts "pcie" $pcie + if { $dir == "tx" } { + append dts "data_buff = <&dma_calypte_tx_data_buff$id>;" + append dts "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;" + } + append dts "params = <&dma_params_$dir$pcie>;" } - append ret "params = <&dma_params_$dir$pcie>;" - append ret "};" - return $ret } # generates Device Tree entries for data buffers in DMA Calypte # 1. type - content of the buffer (header or data) # 2. id - channel ID -# 3. base - base address for the first buffer +# 3. base - base address for the buffer # 4. size - size of the buffer # 5. pcie - index(es) of PCIe endpoint(s) which DMA controller uses. -proc dts_dma_calypte_tx_buffer {type id base size pcie} { - set ret "" - append ret "dma_calypte_tx_${type}_buff${id}: dma_calypte_tx_${type}_buff${id} {" - append ret "compatible = \"cesnet,dma_calypte_tx_${type}_buff\";" - append ret "reg = <$base $size>;" - append ret "pcie = <$pcie>;" - append ret "};" - return $ret +proc dts_dma_calypte_tx_buffer {DTS type id base size pcie} { + upvar 1 $DTS dts + + dts_create_labeled_node dts "dma_calypte_tx_${type}_buff${id}" "dma_calypte_tx_${type}_buff${id}" { + dts_appendprop_comp_node dts $base $size "cesnet,dma_calypte_tx_${type}_buff" + dts_appendprop_int dts "pcie" $pcie + } +} + +# Adds a node to the Device Tree for performance counters within DMA Calypte +# 1. DTS - reference to DeviceTree string +# 2. Base - base address of the registers in the MI address space +proc dts_dma_perf_cntrs {DTS base} { + upvar 1 $DTS dts + + dts_create_node dts "dma_calypte_rx_perf_cntrs0" { + dts_appendprop_comp_node dts $base 0x30 "cesnet,dma_calypte_rx_perf_cntrs" + } } diff --git a/comp/dma/dma_calypte/comp/rx/Modules.tcl b/comp/dma/dma_calypte/comp/rx/Modules.tcl index 739ab6e4e..90e81e722 100644 --- a/comp/dma/dma_calypte/comp/rx/Modules.tcl +++ b/comp/dma/dma_calypte/comp/rx/Modules.tcl @@ -8,13 +8,15 @@ lappend PACKAGES "$OFM_PATH/comp/base/pkg/math_pack.vhd" lappend PACKAGES "$OFM_PATH/comp/base/pkg/type_pack.vhd" lappend PACKAGES "$OFM_PATH/comp/base/pkg/pcie_meta_pack.vhd" -set HDR_INSERTOR_BASE "$ENTITY_BASE/comp/hdr_insertor" -set HDR_MANAGER_BASE "$ENTITY_BASE/comp/hdr_manager" -set TRANS_BUFFER_BASE "$ENTITY_BASE/comp/trans_buffer" -set INPUT_BUFFER_BASE "$ENTITY_BASE/comp/input_buffer" -set SW_MANAGER_BASE "$ENTITY_BASE/comp/software_manager" -set MFB_FIFOX_BASE "$OFM_PATH/comp/mfb_tools/storage/fifox" -set MFB_FRAME_LNG_CHECK_BASE "$OFM_PATH/comp/mfb_tools/logic/frame_lng_check" +set HDR_INSERTOR_BASE "$ENTITY_BASE/comp/hdr_insertor" +set HDR_MANAGER_BASE "$ENTITY_BASE/comp/hdr_manager" +set TRANS_BUFFER_BASE "$ENTITY_BASE/comp/trans_buffer" +set INPUT_BUFFER_BASE "$ENTITY_BASE/comp/input_buffer" +set SW_MANAGER_BASE "$ENTITY_BASE/comp/software_manager" +set MFB_FIFOX_BASE "$OFM_PATH/comp/mfb_tools/storage/fifox" +set MFB_FRAME_LNG_CHECK_BASE "$OFM_PATH/comp/mfb_tools/logic/frame_lng_check" +set DATA_LOGGER_BASE "$OFM_PATH/comp/debug/data_logger" +set MI_SPLITTER_PLUS_GEN_BASE "$OFM_PATH/comp/mi_tools/splitter_plus_gen" lappend COMPONENTS \ [ list "RX_DMA_CALYPTE_HDR_INSERTOR" $HDR_INSERTOR_BASE "FULL"] \ @@ -24,6 +26,7 @@ lappend COMPONENTS \ [ list "RX_DMA_CALYPTE_SW_MANAGER" $SW_MANAGER_BASE "FULL"] \ [ list "MFB_FIFOX" $MFB_FIFOX_BASE "FULL"] \ [ list "MFB_FRAME_LNG_CHECK" $MFB_FRAME_LNG_CHECK_BASE "FULL"] \ - + [ list "DATA_LOGGER" $DATA_LOGGER_BASE "FULL"] \ + [ list "MI_SPLITTER_PLUS_GEN" $MI_SPLITTER_PLUS_GEN_BASE "FULL"] lappend MOD "$ENTITY_BASE/rx_dma_calypte.vhd" diff --git a/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd b/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd index fc6fe3c32..dbeff71e6 100644 --- a/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd +++ b/comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd @@ -137,7 +137,15 @@ entity RX_DMA_CALYPTE_HDR_MANAGER is PKT_CNTR_CHAN : out std_logic_vector(log2(CHANNELS) -1 downto 0); PKT_CNTR_SENT_INC : out std_logic; PKT_CNTR_DISC_INC : out std_logic; - PKT_CNTR_PKT_SIZE : out std_logic_vector(log2(PKT_MTU+1) -1 downto 0) + PKT_CNTR_PKT_SIZE : out std_logic_vector(log2(PKT_MTU+1) -1 downto 0); + + -- ========================================================================================= + -- Performance counter outputs + -- ========================================================================================= + DATA_ADDR_REQ_CNTR_INC : out std_logic; + DMA_HDR_ADDR_REQ_CNTR_INC : out std_logic; + DATA_ADDR_STALL_INC : out std_logic; + DMA_HDR_ADDR_STALL_INC : out std_logic ); end entity; @@ -311,6 +319,12 @@ architecture FULL of RX_DMA_CALYPTE_HDR_MANAGER is signal ptr_fifo_rd : std_logic; signal ptr_fifo_empty : std_logic; + -- ============================================================================================= + -- Perfofmance counter logic + -- ============================================================================================= + signal data_addr_next_reg : std_logic; + signal dma_hdr_addr_next_reg : std_logic; + -- ============================================================================================= -- Debug signals and probes (either for verification or ILA/SignalTap) -- ============================================================================================= @@ -760,7 +774,7 @@ begin IN_VFID => (others => '0'), IN_TAG => (others => '0'), IN_DW_CNT => std_logic_vector(to_unsigned(DATA_SEGMENT_SIZE/4, 11)), - IN_ATTRIBUTES => "000", + IN_ATTRIBUTES => "010", IN_FBE => "1111", IN_LBE => "1111", IN_ADDR_LEN => pcie_addr_len_data_tran, @@ -806,7 +820,7 @@ begin IN_VFID => (others => '0'), IN_TAG => (others => '0'), IN_DW_CNT => std_logic_vector(to_unsigned(8/4, 11)), - IN_ATTRIBUTES => "000", + IN_ATTRIBUTES => "010", IN_FBE => "1111", IN_LBE => "1111", IN_ADDR_LEN => pcie_addr_len_dma_hdr_tran, @@ -1025,4 +1039,24 @@ begin -- signal is valid and set to 1. DMA_HDR_SRC_RDY <= ((not hdr_meta_fifo_empty) and (not ptr_fifo_empty) and (not pkt_size_fifo_empty) and (not discard_fifo_empty) and (not discard_fifo_do(0))) or ((not discard_fifo_empty) and discard_fifo_do(0)); + + -- ============================================================================================= + -- Performance counter logic + -- ============================================================================================= + addr_next_reg_p: process (CLK) is + begin + if (rising_edge(CLK)) then + data_addr_next_reg <= data_addr_next; + dma_hdr_addr_next_reg <= dma_hdr_addr_next; + end if; + end process; + + -- the response for an address request comes usually one clock period delayed. If that is not a + -- case, the stalling occurs. + DATA_ADDR_STALL_INC <= data_addr_next_reg and (not data_pcie_addr_vld); + DMA_HDR_ADDR_STALL_INC <= dma_hdr_addr_next_reg and (not dma_hdr_pcie_addr_vld); + + -- Counters of requests for PCIe addresses (should be equal to the overall amount of packets) + DATA_ADDR_REQ_CNTR_INC <= data_pcie_addr_vld; + DMA_HDR_ADDR_REQ_CNTR_INC <= dma_hdr_pcie_addr_vld; end architecture; diff --git a/comp/dma/dma_calypte/comp/rx/comp/software_manager/rx_dma_calypte_sw_manager.vhd b/comp/dma/dma_calypte/comp/rx/comp/software_manager/rx_dma_calypte_sw_manager.vhd index 58c2db237..e174e58cc 100644 --- a/comp/dma/dma_calypte/comp/rx/comp/software_manager/rx_dma_calypte_sw_manager.vhd +++ b/comp/dma/dma_calypte/comp/rx/comp/software_manager/rx_dma_calypte_sw_manager.vhd @@ -118,7 +118,13 @@ port ( DPM_RD_CHAN : in std_logic_vector(log2(CHANNELS)-1 downto 0); DPM_RD_DATA : out std_logic_vector(POINTER_WIDTH-1 downto 0); HPM_RD_CHAN : in std_logic_vector(log2(CHANNELS)-1 downto 0); - HPM_RD_DATA : out std_logic_vector(POINTER_WIDTH-1 downto 0) + HPM_RD_DATA : out std_logic_vector(POINTER_WIDTH-1 downto 0); + + -- Performance counters + DATA_BUFF_FULL_CHAN : out std_logic_vector(log2(CHANNELS) -1 downto 0); + DATA_BUFF_FULL_CNTR_INCR : out std_logic; + DMA_HDR_BUFF_FULL_CHAN : out std_logic_vector(log2(CHANNELS) -1 downto 0); + DMA_HDR_BUFF_FULL_CNTR_INCR : out std_logic ); end entity; @@ -350,8 +356,8 @@ architecture FULL of RX_DMA_CALYPTE_SW_MANAGER is RSV_3 => 1 + 0, R_SDP => 1 + 2, -- Channel Stop indication (comparator) + Header manager R_SHP => 1 + 2, -- Channel Stop indication (comparator) + Header manager - R_HDP => 1 + 1, -- Comparator - R_HHP => 1 + 1, -- Comparator + R_HDP => 1 + 2, -- Comparator + Perf. counters + R_HHP => 1 + 2, -- Comparator + Perf. counters RSV_8 => 1 + 0, RSV_9 => 1 + 0, RSV_10 => 1 + 0, @@ -583,6 +589,16 @@ architecture FULL of RX_DMA_CALYPTE_SW_MANAGER is signal stop_hhp_ok_reg : std_logic; -- ===================================================================== + -- ============================================================================================= + -- Performance counter connections + -- ============================================================================================= + signal buff_full_shp_compare : std_logic_vector(POINTER_WIDTH -1 downto 0); + signal buff_full_sdp_compare : std_logic_vector(POINTER_WIDTH -1 downto 0); + signal buff_full_hhp_compare : std_logic_vector(POINTER_WIDTH -1 downto 0); + signal buff_full_hdp_compare : std_logic_vector(POINTER_WIDTH -1 downto 0); + + signal buff_full_hhp_compare_reg : std_logic_vector(POINTER_WIDTH -1 downto 0); + signal buff_full_hdp_compare_reg : std_logic_vector(POINTER_WIDTH -1 downto 0); -- attribute mark_debug : string; -- attribute mark_debug of active_chan_reg : signal is "true"; -- attribute mark_debug of start_pending_reg_chan : signal is "true"; @@ -1247,4 +1263,34 @@ begin stop_hhp_ok_reg <= '1' when (comp_hpp_res = "00") else '0'; -- ===================================================================== + -- ============================================================================================= + -- Connections to performance counters + -- ============================================================================================= + + reg_addrb(R_HHP)(2) <= SHP_RD_CHAN; + reg_addrb(R_HDP)(2) <= SDP_RD_CHAN; + + inc_reg_p: process (CLK) is + begin + if (rising_edge(CLK)) then + DMA_HDR_BUFF_FULL_CHAN <= SHP_RD_CHAN; + DATA_BUFF_FULL_CHAN <= SDP_RD_CHAN; + buff_full_shp_compare <= reg_dob_opt(R_SHP)(2)(POINTER_WIDTH-1 downto 0); + buff_full_sdp_compare <= reg_dob_opt(R_SDP)(2)(POINTER_WIDTH-1 downto 0); + buff_full_hhp_compare <= reg_dob_opt(R_HHP)(2)(POINTER_WIDTH-1 downto 0); + buff_full_hdp_compare <= reg_dob_opt(R_HDP)(2)(POINTER_WIDTH-1 downto 0); + buff_full_hhp_compare_reg <= buff_full_hhp_compare; + buff_full_hdp_compare_reg <= buff_full_hdp_compare; + end if; + end process; + + DATA_BUFF_FULL_CNTR_INCR <= '1' when + (unsigned(buff_full_sdp_compare) -1) = unsigned(buff_full_hdp_compare) + and (buff_full_hdp_compare /= buff_full_hdp_compare_reg) + else '0'; + + DMA_HDR_BUFF_FULL_CNTR_INCR <= '1' when + (unsigned(buff_full_shp_compare) -1) = unsigned(buff_full_hhp_compare) + and (buff_full_hhp_compare /= buff_full_hhp_compare_reg) + else '0'; end architecture; diff --git a/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd b/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd index c231e8f70..8eaa05202 100644 --- a/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd +++ b/comp/dma/dma_calypte/comp/rx/rx_dma_calypte.vhd @@ -54,8 +54,10 @@ entity RX_DMA_CALYPTE is -- * Maximum allowed value is 2**16 - 1 PKT_SIZE_MAX : natural := 2**16 - 1; - TRBUF_FIFO_EN : boolean := FALSE; - TRBUF_REG_EN : boolean := FALSE + -- Enables a register in the transaction buffer that improves throughput (but increases latency). + TRBUF_REG_EN : boolean := FALSE; + -- Enables performance counters in the design for metrics. + PERF_CNTR_EN : boolean := FALSE ); port ( @@ -125,6 +127,22 @@ architecture FULL of RX_DMA_CALYPTE is constant BUFFERED_DATA_SIZE : natural := 128; --============================================================================================================= + constant MI_SPLIT_PORTS : natural := 2; + constant MI_SPLIT_BASES : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH-1 downto 0) := ( + 0 => x"00000000", + 1 => x"00003000"); + + constant MI_SPLIT_ADDR_MASK : std_logic_vector(MI_WIDTH-1 downto 0) := x"00003000"; + + signal mi_split_dwr : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH -1 downto 0); + signal mi_split_addr : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH -1 downto 0); + signal mi_split_be : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH/8 -1 downto 0); + signal mi_split_rd : std_logic_vector(MI_SPLIT_PORTS -1 downto 0); + signal mi_split_wr : std_logic_vector(MI_SPLIT_PORTS -1 downto 0); + signal mi_split_drd : slv_array_t(MI_SPLIT_PORTS -1 downto 0)(MI_WIDTH -1 downto 0); + signal mi_split_ardy : std_logic_vector(MI_SPLIT_PORTS -1 downto 0); + signal mi_split_drdy : std_logic_vector(MI_SPLIT_PORTS -1 downto 0); + signal start_req_chan : std_logic_vector((log2(CHANNELS)-1) downto 0); signal start_req_vld : std_logic; signal start_req_done : std_logic; @@ -169,21 +187,13 @@ architecture FULL of RX_DMA_CALYPTE is signal hdrm_pkt_disc_inc : std_logic; signal hdrm_pkt_sent_bytes : std_logic_vector((log2(PKT_SIZE_MAX+1)-1) downto 0); - signal trbuf_fifo_tx_data : std_logic_vector(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS*MFB_ITEM_WIDTH_TRBUF2INS-1 downto 0); - signal trbuf_fifo_tx_sof_pos : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS))-1 downto 0); - signal trbuf_fifo_tx_eof_pos : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS))-1 downto 0); - signal trbuf_fifo_tx_sof : std_logic; - signal trbuf_fifo_tx_eof : std_logic; - signal trbuf_fifo_tx_src_rdy : std_logic; - signal trbuf_fifo_tx_dst_rdy : std_logic; - - signal trbuf_fifo_rx_data : std_logic_vector(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS*MFB_ITEM_WIDTH_TRBUF2INS-1 downto 0); - signal trbuf_fifo_rx_sof_pos : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS))-1 downto 0); - signal trbuf_fifo_rx_eof_pos : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS))-1 downto 0); - signal trbuf_fifo_rx_sof : std_logic; - signal trbuf_fifo_rx_eof : std_logic; - signal trbuf_fifo_rx_src_rdy : std_logic; - signal trbuf_fifo_rx_dst_rdy : std_logic; + signal mfb_data_trbuf : std_logic_vector(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS*MFB_ITEM_WIDTH_TRBUF2INS-1 downto 0); + signal mfb_sof_pos_trbuf : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS))-1 downto 0); + signal mfb_eof_pos_trbuf : std_logic_vector (max(1, log2(MFB_REGION_SIZE_TRBUF2INS*MFB_BLOCK_SIZE_TRBUF2INS))-1 downto 0); + signal mfb_sof_trbuf : std_logic; + signal mfb_eof_trbuf : std_logic; + signal mfb_src_rdy_trbuf : std_logic; + signal mfb_dst_rdy_trbuf : std_logic; -- ============================================================================================= -- Frame length checker ---> Transaction buffer @@ -217,6 +227,27 @@ architecture FULL of RX_DMA_CALYPTE is signal data_path_dst_rdy : std_logic; signal hdr_log_dst_rdy : std_logic; + -- ============================================================================================= + -- Performance counters' increment signals + -- ============================================================================================= + constant PERF_CNTR_NUM : positive := 6; + constant PERF_CNTR_WIDTH : positive := 64; + + signal perf_cntr_diff_packed : slv_array_t(PERF_CNTR_NUM -1 downto 0)(PERF_CNTR_WIDTH -1 downto 0); + signal perf_cntr_incr_packed : std_logic_vector(PERF_CNTR_NUM -1 downto 0); + + signal data_addr_req_cntr_incr : std_logic; + signal dma_hdr_addr_req_cntr_incr : std_logic; + signal data_addr_stall_incr : std_logic; + signal dma_hdr_addr_stall_incr : std_logic; + signal pcie_mfb_stall_incr : std_logic; + signal pcie_mfb_beats_incr : std_logic; + + signal data_buff_full_chan : std_logic_vector(log2(CHANNELS) -1 downto 0); + signal data_buff_full_cntr_incr : std_logic; + signal dma_hdr_buff_full_chan : std_logic_vector(log2(CHANNELS) -1 downto 0); + signal dma_hdr_buff_full_cntr_incr : std_logic; + --============================================================================================== -- Debug signals for the RX DMA --============================================================================================== @@ -258,13 +289,13 @@ architecture FULL of RX_DMA_CALYPTE is -- attribute mark_debug of hdrm_dma_hdr_src_rdy : signal is "true"; -- attribute mark_debug of hdrm_dma_hdr_dst_rdy : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_data : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_sof : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_eof : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_sof_pos : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_eof_pos : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_src_rdy : signal is "true"; - -- attribute mark_debug of trbuf_fifo_tx_dst_rdy : signal is "true"; + -- attribute mark_debug of mfb_data_trbuf : signal is "true"; + -- attribute mark_debug of mfb_sof_trbuf : signal is "true"; + -- attribute mark_debug of mfb_eof_trbuf : signal is "true"; + -- attribute mark_debug of mfb_sof_pos_trbuf : signal is "true"; + -- attribute mark_debug of mfb_eof_pos_trbuf : signal is "true"; + -- attribute mark_debug of mfb_src_rdy_trbuf : signal is "true"; + -- attribute mark_debug of mfb_dst_rdy_trbuf : signal is "true"; -- attribute mark_debug of mfb_src_rdy_inbuf : signal is "true"; -- attribute mark_debug of mfb_dst_rdy_inbuf : signal is "true"; @@ -288,6 +319,118 @@ begin report "RX_LL_DMA: The design is not set for such PCIe MFB configuration, the valid are: MFB#(1,1,8,32), MFB#(2,1,8,32)." severity FAILURE; + perf_cntr_g: if (PERF_CNTR_EN) generate + + mi_splitter_i : entity work.MI_SPLITTER_PLUS_GEN + generic map ( + ADDR_WIDTH => MI_WIDTH, + DATA_WIDTH => MI_WIDTH, + META_WIDTH => 0, + PORTS => MI_SPLIT_PORTS, + PIPE_OUT => (others => FALSE), + + ADDR_BASES => MI_SPLIT_PORTS, + ADDR_BASE => MI_SPLIT_BASES, + ADDR_MASK => MI_SPLIT_ADDR_MASK, + + DEVICE => DEVICE) + port map ( + CLK => CLK, + RESET => RESET, + + RX_DWR => MI_DWR, + RX_MWR => (others => '0'), + RX_ADDR => MI_ADDR, + RX_BE => MI_BE, + RX_RD => MI_RD, + RX_WR => MI_WR, + RX_ARDY => MI_ARDY, + RX_DRD => MI_DRD, + RX_DRDY => MI_DRDY, + + TX_DWR => mi_split_dwr, + TX_MWR => open, + TX_ADDR => mi_split_addr, + TX_BE => mi_split_be, + TX_RD => mi_split_rd, + TX_WR => mi_split_wr, + TX_ARDY => mi_split_ardy, + TX_DRD => mi_split_drd, + TX_DRDY => mi_split_drdy); + + perf_counters_p: entity work.DATA_LOGGER + generic map ( + MI_DATA_WIDTH => MI_WIDTH, + MI_ADDR_WIDTH => MI_WIDTH, + + CNTER_CNT => PERF_CNTR_NUM, + VALUE_CNT => 2, + + CTRLO_WIDTH => 0, + CTRLI_WIDTH => 0, + + CNTER_WIDTH => PERF_CNTR_WIDTH, + VALUE_WIDTH => (others => log2(CHANNELS)), + + MIN_EN => (others => FALSE), + MAX_EN => (others => FALSE), + SUM_EN => (others => TRUE), + HIST_EN => (others => TRUE), + + SUM_EXTRA_WIDTH => (others => 16), + HIST_BOX_CNT => (others => CHANNELS), + HIST_BOX_WIDTH => (others => PERF_CNTR_WIDTH), + CTRLO_DEFAULT => (others => '0')) + port map ( + CLK => CLK, + RST => RESET, + + RST_DONE => open, + SW_RST => open, + + CTRLO => open, + CTRLI => (others => '0'), + + CNTERS_INCR => perf_cntr_incr_packed, + CNTERS_SUBMIT => perf_cntr_incr_packed, + CNTERS_DIFF => perf_cntr_diff_packed, + + VALUES_VLD => data_buff_full_cntr_incr & dma_hdr_buff_full_cntr_incr, + VALUES => data_buff_full_chan & dma_hdr_buff_full_chan, + + MI_DWR => mi_split_dwr(1), + MI_ADDR => mi_split_addr(1), + MI_BE => mi_split_be(1), + MI_RD => mi_split_rd(1), + MI_WR => mi_split_wr(1), + MI_ARDY => mi_split_ardy(1), + MI_DRD => mi_split_drd(1), + MI_DRDY => mi_split_drdy(1)); + + perf_cntr_diff_packed <= (others => std_logic_vector(to_unsigned(1, PERF_CNTR_WIDTH))); + perf_cntr_incr_packed <= pcie_mfb_beats_incr + & data_addr_req_cntr_incr + & dma_hdr_addr_req_cntr_incr + & data_addr_stall_incr + & dma_hdr_addr_stall_incr + & pcie_mfb_stall_incr; + + -- Counts the amount of beats where a transaction is ready but the PCIE interface is not + pcie_mfb_stall_incr <= (not PCIE_UP_MFB_DST_RDY) and PCIE_UP_MFB_SRC_RDY and (not RESET); + -- Counts an overall amount of beats in which packets are sent + pcie_mfb_beats_incr <= PCIE_UP_MFB_DST_RDY and PCIE_UP_MFB_SRC_RDY and (not RESET); + else generate + mi_split_dwr(0) <= MI_DWR; + mi_split_addr(0) <= MI_ADDR; + mi_split_be(0) <= MI_BE; + mi_split_rd(0) <= MI_RD; + mi_split_wr(0) <= MI_WR; + + MI_ARDY <= mi_split_ardy(0); + MI_DRD <= mi_split_drd(0); + MI_DRDY <= mi_split_drdy(0); + end generate; + rx_dma_sw_manager_i : entity work.RX_DMA_CALYPTE_SW_MANAGER generic map ( DEVICE => DEVICE, @@ -304,14 +447,14 @@ begin CLK => CLK, RESET => RESET, - MI_ADDR => MI_ADDR, - MI_DWR => MI_DWR, - MI_BE => MI_BE, - MI_RD => MI_RD, - MI_WR => MI_WR, - MI_DRD => MI_DRD, - MI_ARDY => MI_ARDY, - MI_DRDY => MI_DRDY, + MI_ADDR => mi_split_addr(0), + MI_DWR => mi_split_dwr(0), + MI_BE => mi_split_be(0), + MI_RD => mi_split_rd(0), + MI_WR => mi_split_wr(0), + MI_DRD => mi_split_drd(0), + MI_ARDY => mi_split_ardy(0), + MI_DRDY => mi_split_drdy(0), PKT_SENT_CHAN => hdrm_pkt_sent_chan, PKT_SENT_INC => hdrm_pkt_sent_inc, @@ -353,7 +496,12 @@ begin DPM_RD_CHAN => hdrm_data_rd_chan, DPM_RD_DATA => hdrm_dpm_rd_data, HPM_RD_CHAN => hdrm_hdr_rd_chan, - HPM_RD_DATA => hdrm_hpm_rd_data); + HPM_RD_DATA => hdrm_hpm_rd_data, + + DATA_BUFF_FULL_CHAN => data_buff_full_chan, + DATA_BUFF_FULL_CNTR_INCR => data_buff_full_cntr_incr, + DMA_HDR_BUFF_FULL_CHAN => dma_hdr_buff_full_chan, + DMA_HDR_BUFF_FULL_CNTR_INCR => dma_hdr_buff_full_cntr_incr); USER_RX_MFB_DST_RDY <= hdr_log_dst_rdy and data_path_dst_rdy; @@ -424,7 +572,13 @@ begin PKT_CNTR_CHAN => hdrm_pkt_sent_chan, PKT_CNTR_SENT_INC => hdrm_pkt_sent_inc, PKT_CNTR_DISC_INC => hdrm_pkt_disc_inc, - PKT_CNTR_PKT_SIZE => hdrm_pkt_sent_bytes); + PKT_CNTR_PKT_SIZE => hdrm_pkt_sent_bytes, + + DATA_ADDR_REQ_CNTR_INC => data_addr_req_cntr_incr, + DMA_HDR_ADDR_REQ_CNTR_INC => dma_hdr_addr_req_cntr_incr, + DATA_ADDR_STALL_INC => data_addr_stall_incr, + DMA_HDR_ADDR_STALL_INC => dma_hdr_addr_stall_incr); + rx_dma_hdr_insertor_i : entity work.RX_DMA_CALYPTE_HDR_INSERTOR generic map ( @@ -443,11 +597,11 @@ begin CLK => CLK, RST => RESET, - RX_MFB_DATA => trbuf_fifo_tx_data, - RX_MFB_SOF => trbuf_fifo_tx_sof, - RX_MFB_EOF => trbuf_fifo_tx_eof, - RX_MFB_SRC_RDY => trbuf_fifo_tx_src_rdy, - RX_MFB_DST_RDY => trbuf_fifo_tx_dst_rdy, + RX_MFB_DATA => mfb_data_trbuf, + RX_MFB_SOF => mfb_sof_trbuf, + RX_MFB_EOF => mfb_eof_trbuf, + RX_MFB_SRC_RDY => mfb_src_rdy_trbuf, + RX_MFB_DST_RDY => mfb_dst_rdy_trbuf, TX_MFB_DATA => PCIE_UP_MFB_DATA, TX_MFB_META => PCIE_UP_MFB_META, @@ -473,66 +627,13 @@ begin HDRM_DMA_HDR_SRC_RDY => hdrm_dma_hdr_src_rdy, HDRM_DMA_HDR_DST_RDY => hdrm_dma_hdr_dst_rdy); - tr_buf_fifo_g: if (TRBUF_FIFO_EN) generate - - trbuf_fifo_i : entity work.MFB_FIFOX - generic map ( - REGIONS => 1, - REGION_SIZE => MFB_REGION_SIZE_TRBUF2INS, - BLOCK_SIZE => MFB_BLOCK_SIZE_TRBUF2INS, - ITEM_WIDTH => MFB_ITEM_WIDTH_TRBUF2INS, - META_WIDTH => 0, - FIFO_DEPTH => 32, - RAM_TYPE => "AUTO", - DEVICE => DEVICE, - ALMOST_FULL_OFFSET => 2, - ALMOST_EMPTY_OFFSET => 2) - port map ( - CLK => CLK, - RST => RESET, - - RX_DATA => trbuf_fifo_rx_data, - RX_META => (others => '0'), - RX_SOF_POS => trbuf_fifo_rx_sof_pos, - RX_EOF_POS => trbuf_fifo_rx_eof_pos, - RX_SOF(0) => trbuf_fifo_rx_sof, - RX_EOF(0) => trbuf_fifo_rx_eof, - RX_SRC_RDY => trbuf_fifo_rx_src_rdy, - RX_DST_RDY => trbuf_fifo_rx_dst_rdy, - - TX_DATA => trbuf_fifo_tx_data, - TX_META => open, - TX_SOF_POS => trbuf_fifo_tx_sof_pos, - TX_EOF_POS => trbuf_fifo_tx_eof_pos, - TX_SOF(0) => trbuf_fifo_tx_sof, - TX_EOF(0) => trbuf_fifo_tx_eof, - TX_SRC_RDY => trbuf_fifo_tx_src_rdy, - TX_DST_RDY => trbuf_fifo_tx_dst_rdy, - - FIFO_STATUS => open, - - FIFO_AFULL => open, - FIFO_AEMPTY => open); - - else generate - - trbuf_fifo_tx_data <= trbuf_fifo_rx_data; - trbuf_fifo_tx_sof_pos <= trbuf_fifo_rx_sof_pos; - trbuf_fifo_tx_eof_pos <= trbuf_fifo_rx_eof_pos; - trbuf_fifo_tx_sof <= trbuf_fifo_rx_sof; - trbuf_fifo_tx_eof <= trbuf_fifo_rx_eof; - trbuf_fifo_tx_src_rdy <= trbuf_fifo_rx_src_rdy; - trbuf_fifo_rx_dst_rdy <= trbuf_fifo_tx_dst_rdy; - - end generate; - tr_buff_g : if (BUFFERED_DATA_SIZE = MFB_REGION_SIZE_INBUF2TRBUF*MFB_BLOCK_SIZE_INBUF2TRBUF) generate - trbuf_fifo_rx_data <= mfb_data_inbuf; - trbuf_fifo_rx_sof <= mfb_sof_inbuf; - trbuf_fifo_rx_eof <= mfb_eof_inbuf; - trbuf_fifo_rx_src_rdy <= mfb_src_rdy_inbuf; - mfb_dst_rdy_inbuf <= trbuf_fifo_rx_dst_rdy; + mfb_data_trbuf <= mfb_data_inbuf; + mfb_sof_trbuf <= mfb_sof_inbuf; + mfb_eof_trbuf <= mfb_eof_inbuf; + mfb_src_rdy_trbuf <= mfb_src_rdy_inbuf; + mfb_dst_rdy_inbuf <= mfb_dst_rdy_trbuf; else generate @@ -555,13 +656,13 @@ begin RX_MFB_SRC_RDY => mfb_src_rdy_lng_check, RX_MFB_DST_RDY => mfb_dst_rdy_lng_check, - TX_MFB_DATA => trbuf_fifo_rx_data, - TX_MFB_SOF_POS => trbuf_fifo_rx_sof_pos, - TX_MFB_EOF_POS => trbuf_fifo_rx_eof_pos, - TX_MFB_SOF => trbuf_fifo_rx_sof, - TX_MFB_EOF => trbuf_fifo_rx_eof, - TX_MFB_SRC_RDY => trbuf_fifo_rx_src_rdy, - TX_MFB_DST_RDY => trbuf_fifo_rx_dst_rdy); + TX_MFB_DATA => mfb_data_trbuf, + TX_MFB_SOF_POS => mfb_sof_pos_trbuf, + TX_MFB_EOF_POS => mfb_eof_pos_trbuf, + TX_MFB_SOF => mfb_sof_trbuf, + TX_MFB_EOF => mfb_eof_trbuf, + TX_MFB_SRC_RDY => mfb_src_rdy_trbuf, + TX_MFB_DST_RDY => mfb_dst_rdy_trbuf); end generate; diff --git a/comp/dma/dma_calypte/comp/rx/sw/perf_cntrs.py b/comp/dma/dma_calypte/comp/rx/sw/perf_cntrs.py new file mode 100755 index 000000000..018a38470 --- /dev/null +++ b/comp/dma/dma_calypte/comp/rx/sw/perf_cntrs.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 CESNET z. s. p. o. +# Author(s): Vladislav Valek + +import nfb +import argparse +from data_logger.data_logger import DataLogger +from time import sleep + +import curses + + +class RxDmaPerfCounters(DataLogger): + + DT_COMPATIBLE = "cesnet,dma_calypte_rx_perf_cntrs" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self.counter_names = { + 0 : "PCIE_MFB_STALL_CNTR", + 1 : "DMA_HDR_ADDR_STALL_CNTR", + 2 : "DATA_ADDR_STALL_CNTR", + 3 : "DMA_HDR_ADDR_REQ_CNTR", + 4 : "DATA_ADDR_REQ_CNTR", + 5 : "PCIE_MFB_BEATS_CNTR" + } + + self.cntr_num = self.config['CNTER_CNT'] + + def show_cntrs(self): + print("Loading {} counters.".format(self.cntr_num)) + + for i in range(self.cntr_num): + cntr_val = self.load_cnter(i) + + print("{}: {}".format(self.counter_names[i], cntr_val)) + + def load_cntrs_all(self): + cntr_storage = [0]*self.cntr_num + + for i in range(self.cntr_num): + cntr_storage[i] = self.load_cnter(i) + + return cntr_storage + + def measure_blocking(self, stdscr): + stdscr.clear() + + try: + while True: + cntr_storage = self.load_cntrs_all() + + if (cntr_storage[5] != 0): + pcie_mfb_stall = (cntr_storage[0] / cntr_storage[5]) * 100 + else: + pcie_mfb_stall = 0.0 + + stdscr.addstr(0, 0, "PCIE IP stalls: {:.2}% (absolute {})".format(pcie_mfb_stall, cntr_storage[0])) + + if (cntr_storage[4] != 0): + data_addr_stall = (cntr_storage[2] / cntr_storage[4]) * 100 + else: + data_addr_stall = 0.0 + + stdscr.addstr(1, 0, "Wait for data address: {:.2}% (absolute {})".format(data_addr_stall, cntr_storage[2])) + + if (cntr_storage[3] != 0): + dma_hdr_addr_stall = (cntr_storage[1] / cntr_storage[3]) * 100 + else: + dma_hdr_addr_stall = 0.0 + + stdscr.addstr(2, 0, "Wait for DMA address: {:.2}% (absolute {})".format(dma_hdr_addr_stall, cntr_storage[1])) + + stdscr.addstr(3, 0, "Total data address req: {}".format(cntr_storage[4])) + stdscr.addstr(4, 0, "Total DMA addr req: {}".format(cntr_storage[3])) + stdscr.addstr(5, 0, "Total DMA PCIE beats: {}".format(cntr_storage[5])) + + stdscr.refresh() + sleep(1) + stdscr.clear() + + except KeyboardInterrupt: + print("Interrupt caught, terminating...") + + +def parseParams(): + parser = argparse.ArgumentParser( + description="Control script for performance counters.", + ) + + access = parser.add_argument_group('Card specifiers') + access.add_argument( + '-d', '--device', default=nfb.libnfb.Nfb.default_dev_path, + metavar='device', help="Target device") + access.add_argument( + '-i', '--index', type=int, metavar='index', default=0, help="Index of a counter array inside DeviceTree") + + common = parser.add_argument_group("Counters control") + common.add_argument('-p', '--print', action='store_true', help="Prints internal registers in JSON format") + common.add_argument('-m', '--measure', action='store_true', help="Continuously measures the amount of blocking") + common.add_argument('--rst', action='store_true', help="Reset the component.") + + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parseParams() + perf_cntrs = RxDmaPerfCounters(dev=args.device, index=args.index) + + if args.rst: + perf_cntrs.rst() + elif args.print: + print(perf_cntrs.stats_to_str(hist=True)) + elif args.measure: + perf_cntrs.rst() + curses.wrapper(perf_cntrs.measure_blocking) + else: + perf_cntrs.show_cntrs() diff --git a/comp/dma/dma_calypte/comp/rx/uvm/signals.fdo b/comp/dma/dma_calypte/comp/rx/uvm/signals.fdo index 6bb87451a..a1fdc3188 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/signals.fdo +++ b/comp/dma/dma_calypte/comp/rx/uvm/signals.fdo @@ -6,7 +6,14 @@ proc ctrl {NAME PATH} { add wave -divider "$NAME" - add_wave "-noupdate -hex" $PATH/MI_* + add_wave "-noupdate -hex" $PATH/MI_ADDR + add_wave "-noupdate -hex" $PATH/MI_DWR + add_wave "-noupdate -hex" $PATH/MI_BE + add_wave "-noupdate -hex" $PATH/MI_RD + add_wave "-noupdate -hex" $PATH/MI_WR + add_wave "-noupdate -hex" $PATH/MI_DRD + add_wave "-noupdate -hex" $PATH/MI_ARDY + add_wave "-noupdate -hex" $PATH/MI_DRDY } proc wr {NAME PATH} { @@ -20,6 +27,7 @@ proc rd {NAME PATH} { } proc ints {PATH} { + add_wave [list -noupdate -hex -group "INTERNALS"] $PATH/mi_split_* add_wave [list -noupdate -hex -group "INTERNALS"] $PATH/start_* add_wave [list -noupdate -hex -group "INTERNALS"] $PATH/stop_* add_wave [list -noupdate -hex -group "INTERNALS"] $PATH/hdrm_* @@ -30,6 +38,15 @@ proc ints {PATH} { } proc high_level_view {PATH} { + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/data_addr_req_cntr_incr + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/dma_hdr_addr_req_cntr_incr + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/data_addr_stall_incr + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/dma_hdr_addr_stall_incr + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/pcie_mfb_stall_incr + add_wave [list -noupdate -unsigned -group "HIGH LEVEL DASHBOARD"] $PATH/data_buff_full_chan + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/data_buff_full_cntr_incr + add_wave [list -noupdate -unsigned -group "HIGH LEVEL DASHBOARD"] $PATH/dma_hdr_buff_full_chan + add_wave [list -noupdate -bin -group "HIGH LEVEL DASHBOARD"] $PATH/dma_hdr_buff_full_cntr_incr add_wave [list -noupdate -hex -group "HIGH LEVEL DASHBOARD"] $PATH/rx_dma_sw_manager_i/start_fsm_pst add_wave [list -noupdate -hex -group "HIGH LEVEL DASHBOARD"] $PATH/rx_dma_sw_manager_i/start_fsm_nst add_wave [list -noupdate -hex -group "HIGH LEVEL DASHBOARD"] $PATH/rx_dma_sw_manager_i/stop_fsm_pst diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv index 19ad9b1e2..028f97a93 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/dut.sv @@ -5,7 +5,7 @@ //-- SPDX-License-Identifier: BSD-3-Clause -module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USER_ITEM_WIDTH, PCIE_UP_REGIONS, PCIE_UP_REGION_SIZE, PCIE_UP_BLOCK_SIZE, PCIE_UP_ITEM_WIDTH, CHANNELS, PKT_SIZE_MAX, SW_ADDR_WIDTH, POINTER_WIDTH, CNTRS_WIDTH, OPT_BUFF, TRBUF_REG_EN) +module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USER_ITEM_WIDTH, PCIE_UP_REGIONS, PCIE_UP_REGION_SIZE, PCIE_UP_BLOCK_SIZE, PCIE_UP_ITEM_WIDTH, CHANNELS, PKT_SIZE_MAX, SW_ADDR_WIDTH, POINTER_WIDTH, CNTRS_WIDTH, TRBUF_REG_EN, PERF_CNTR_EN) ( input logic CLK, input logic RST, @@ -47,8 +47,8 @@ module DMA_LL_DUT #(DEVICE, USER_REGIONS, USER_REGION_SIZE, USER_BLOCK_SIZE, USE .SW_ADDR_WIDTH (SW_ADDR_WIDTH), .CNTRS_WIDTH (CNTRS_WIDTH), .PKT_SIZE_MAX (PKT_SIZE_MAX), - .TRBUF_FIFO_EN (OPT_BUFF), - .TRBUF_REG_EN (TRBUF_REG_EN) + .TRBUF_REG_EN (TRBUF_REG_EN), + .PERF_CNTR_EN (PERF_CNTR_EN) ) VHDL_DUT_U ( .CLK (CLK), .RESET (RST), diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv index 5eb0c0347..b8eaf2f00 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/env/model.sv @@ -163,7 +163,7 @@ class model #(CHANNELS, PKT_SIZE_MAX, META_WIDTH, DEVICE) extends uvm_component; function void get_pcie_header(int unsigned packet_size, logic [64-1:0] addr, output logic[32-1 : 0] header[], output logic[168-1 : 0] meta); logic [2-1:0] at = 0; logic [1-1:0] ecrc = 0; - logic [3-1:0] attr = 0; + logic [3-1:0] attr = 2; logic [3-1:0] tc = 0; logic [1-1:0] rq_id_enabled = 0; logic [16-1:0] cm_id = 0; //compleater ID diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv index c4e539584..9bc154976 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/testbench.sv @@ -80,8 +80,8 @@ module testbench; .SW_ADDR_WIDTH (test::SW_ADDR_WIDTH), .POINTER_WIDTH (test::POINTER_WIDTH), .CNTRS_WIDTH (test::CNTRS_WIDTH), - .OPT_BUFF (test::OPT_BUFF), - .TRBUF_REG_EN (test::TRBUF_REG_EN) + .TRBUF_REG_EN (test::TRBUF_REG_EN), + .PERF_CNTR_EN (test::PERF_CNTR_EN) ) DUT_U ( .CLK (CLK), diff --git a/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv b/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv index 1bfcd90c3..cae46ebf2 100644 --- a/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv +++ b/comp/dma/dma_calypte/comp/rx/uvm/tbench/tests/pkg.sv @@ -30,8 +30,8 @@ package test; parameter SW_ADDR_WIDTH = 64; parameter CNTRS_WIDTH = 64; parameter PKT_SIZE_MAX = 2**12; - parameter OPT_BUFF = 1'b0; parameter TRBUF_REG_EN = 1'b1; + parameter PERF_CNTR_EN = 1'b0; parameter DEVICE = "ULTRASCALE"; diff --git a/comp/dma/dma_calypte/comp/test_core/DevTree.tcl b/comp/dma/dma_calypte/comp/test_core/DevTree.tcl new file mode 100644 index 000000000..fcdf3eb20 --- /dev/null +++ b/comp/dma/dma_calypte/comp/test_core/DevTree.tcl @@ -0,0 +1,39 @@ +# DevTree.tcl: generate nodes for the test core +# Copyright (C) 2024 CESNET z. s. p. o. +# Author(s): Vladisav Valek +# +# SPDX-License-Identifier: BSD-3-Clause + +proc dts_calypte_test_core {DTS base_addr {dbg_en False}} { + upvar 1 $DTS dts + + set LOOPBACK_BASE_ADDR [expr $base_addr + 0x0] + set TX_DBG_CORE_BASE_ADDR [expr $base_addr + 0x10000] + set LATENCY_METER_BASE_ADDR [expr $base_addr + 0x20000] + set RESET_FSM_BASE_ADDR [expr $base_addr + 0x30000] + + dts_create_node dts "dma_calypte_test_core0" { + + dts_create_node dts "mfb_loopback0" { + dts_appendprop_comp_node dts $LOOPBACK_BASE_ADDR 8 "cesnet,mfb_loopback" + } + + if ($dbg_en) { + dts_create_node dts "dma_calypte_debug_core0" { + dts_appendprop_comp_node dts $TX_DBG_CORE_BASE_ADDR 0x1600 "cesnet,dma_calypte_debug_core" + + dts_create_node dts "mfb_generator0" { + dts_appendprop_comp_node dts [expr $TX_DBG_CORE_BASE_ADDR+0x8000] 0x40 "cesnet,mfb_generator" + } + } + + dts_create_node dts "dma_calypte_latency_meter0" { + dts_appendprop_comp_node dts $LATENCY_METER_BASE_ADDR 0x30 "cesnet,dma_calypte_latency_meter" + } + } + + dts_create_node dts "dma_calypte_reset_fsm0" { + dts_appendprop_comp_node dts $RESET_FSM_BASE_ADDR 0x4 "cesnet,dma_calypte_reset_fsm" + } + } +} diff --git a/comp/dma/dma_calypte/comp/test_core/Modules.tcl b/comp/dma/dma_calypte/comp/test_core/Modules.tcl index ded8b6532..7c4911765 100644 --- a/comp/dma/dma_calypte/comp/test_core/Modules.tcl +++ b/comp/dma/dma_calypte/comp/test_core/Modules.tcl @@ -24,3 +24,4 @@ lappend COMPONENTS [ list "LATENCY_METER" $LATENCY_METER_BASE "FUL lappend COMPONENTS [ list "MFB_GENERATOR" $MFB_GENERATOR_BASE "FULL" ] lappend MOD "$ENTITY_BASE/dma_test_core.vhd" +lappend MOD "$ENTITY_BASE/DevTree.tcl" diff --git a/comp/dma/dma_calypte/comp/test_core/comp/tx_debug_core/debug_core.vhd b/comp/dma/dma_calypte/comp/test_core/comp/tx_debug_core/debug_core.vhd index e7c7b0596..d709e892e 100644 --- a/comp/dma/dma_calypte/comp/test_core/comp/tx_debug_core/debug_core.vhd +++ b/comp/dma/dma_calypte/comp/test_core/comp/tx_debug_core/debug_core.vhd @@ -509,7 +509,7 @@ architecture FULL of TX_DMA_DEBUG_CORE is -- ============================================================================================= -- Debug signals -- ============================================================================================= - attribute mark_debug : string; + -- attribute mark_debug : string; signal aux_sig_mfb_meta_chan_int : std_logic_vector(log2(CHANNELS) -1 downto 0); signal aux_sig_mfb_meta_pkt_size : std_logic_vector(log2(PKT_SIZE_MAX+1) -1 downto 0); @@ -518,26 +518,26 @@ architecture FULL of TX_DMA_DEBUG_CORE is signal tx_mfb_meta_chan_int : std_logic_vector(log2(CHANNELS) -1 downto 0); signal tx_mfb_meta_pkt_size : std_logic_vector(log2(PKT_SIZE_MAX+1) -1 downto 0); signal tx_mfb_meta_hdr_meta : std_logic_vector(DMA_META_WIDTH -1 downto 0); - attribute mark_debug of tx_mfb_meta_chan_int : signal is "true"; - attribute mark_debug of tx_mfb_meta_pkt_size : signal is "true"; - attribute mark_debug of tx_mfb_meta_hdr_meta : signal is "true"; - attribute mark_debug of TX_MFB_DATA : signal is "true"; - attribute mark_debug of TX_MFB_META : signal is "true"; - attribute mark_debug of TX_MFB_SOF : signal is "true"; - attribute mark_debug of TX_MFB_EOF : signal is "true"; - attribute mark_debug of TX_MFB_SOF_POS : signal is "true"; - attribute mark_debug of TX_MFB_EOF_POS : signal is "true"; - attribute mark_debug of TX_MFB_SRC_RDY : signal is "true"; - attribute mark_debug of TX_MFB_DST_RDY : signal is "true"; - - attribute mark_debug of pattern_comp_pst : signal is "true"; - attribute mark_debug of pattern_match_cntr_incr : signal is "true"; - attribute mark_debug of pattern_mismatch_cntr_incr : signal is "true"; - attribute mark_debug of pattern_copy_val : signal is "true"; - attribute mark_debug of meta_pattern_comp_pst : signal is "true"; - attribute mark_debug of meta_pattern_match_cntr_incr : signal is "true"; - attribute mark_debug of meta_pattern_mismatch_cntr_incr : signal is "true"; - attribute mark_debug of meta_pattern_copy_val : signal is "true"; + -- attribute mark_debug of tx_mfb_meta_chan_int : signal is "true"; + -- attribute mark_debug of tx_mfb_meta_pkt_size : signal is "true"; + -- attribute mark_debug of tx_mfb_meta_hdr_meta : signal is "true"; + -- attribute mark_debug of TX_MFB_DATA : signal is "true"; + -- attribute mark_debug of TX_MFB_META : signal is "true"; + -- attribute mark_debug of TX_MFB_SOF : signal is "true"; + -- attribute mark_debug of TX_MFB_EOF : signal is "true"; + -- attribute mark_debug of TX_MFB_SOF_POS : signal is "true"; + -- attribute mark_debug of TX_MFB_EOF_POS : signal is "true"; + -- attribute mark_debug of TX_MFB_SRC_RDY : signal is "true"; + -- attribute mark_debug of TX_MFB_DST_RDY : signal is "true"; + + -- attribute mark_debug of pattern_comp_pst : signal is "true"; + -- attribute mark_debug of pattern_match_cntr_incr : signal is "true"; + -- attribute mark_debug of pattern_mismatch_cntr_incr : signal is "true"; + -- attribute mark_debug of pattern_copy_val : signal is "true"; + -- attribute mark_debug of meta_pattern_comp_pst : signal is "true"; + -- attribute mark_debug of meta_pattern_match_cntr_incr : signal is "true"; + -- attribute mark_debug of meta_pattern_mismatch_cntr_incr : signal is "true"; + -- attribute mark_debug of meta_pattern_copy_val : signal is "true"; begin tx_mfb_meta_chan_int <= TX_MFB_META(log2(CHANNELS) -1 downto 0); diff --git a/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd b/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd index a867f9875..c06d659ac 100644 --- a/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd +++ b/comp/dma/dma_calypte/comp/test_core/dma_test_core.vhd @@ -33,7 +33,6 @@ entity DMA_TEST_CORE is MFB_LOOPBACK_EN : boolean := TRUE; LATENCY_METER_EN : boolean := TRUE; TX_DMA_DBG_CORE_EN : boolean := TRUE; - RX_DMA_DBG_CORE_EN : boolean := TRUE; ST_SP_DBG_SIGNAL_W : natural := 2; -- Width of MI bus @@ -117,15 +116,14 @@ end entity; architecture FULL of DMA_TEST_CORE is - constant MI_SPLIT_PORTS : natural := 5; + constant MI_SPLIT_PORTS : natural := 4; constant MI_SPLIT_BASES : slv_array_t(MI_SPLIT_PORTS-1 downto 0)(MI_WIDTH-1 downto 0) := ( 0 => X"00000000", -- MFB Loopback 1 => X"00010000", -- TX DMA Debug Core 2 => X"00020000", -- Latency meter - 3 => X"00030000", -- Reset FSM - 4 => X"00040000" -- RX DMA Debug Core + 3 => X"00030000" -- Reset FSM ); - constant MI_SPLIT_ADDR_MASK : std_logic_vector(MI_WIDTH -1 downto 0) := X"00070000"; + constant MI_SPLIT_ADDR_MASK : std_logic_vector(MI_WIDTH -1 downto 0) := X"00030000"; -- MI Asynchronous crossing signal mi_dwr_sync : std_logic_vector(MI_WIDTH -1 downto 0); @@ -248,25 +246,25 @@ architecture FULL of DMA_TEST_CORE is -- ============================================================================================= -- Debug probes -- ============================================================================================= - attribute mark_debug : string; - - attribute mark_debug of data_logger_rst : signal is "true"; - attribute mark_debug of tst_gen_mux_sel : signal is "true"; - attribute mark_debug of meas_fsm_pst : signal is "true"; - attribute mark_debug of pkt_cnt_pst : signal is "true"; - attribute mark_debug of test_finished : signal is "true"; - - attribute mark_debug of mfb_gen_ctrl_pkt_cnt_clr : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_length : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_chan_val : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_chan_inc : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_en : signal is "true"; - attribute mark_debug of mfb_gen_ctrl_pkt_cnt : signal is "true"; - - attribute mark_debug of lat_meas_val : signal is "true"; - attribute mark_debug of lat_meas_val_vld : signal is "true"; - attribute mark_debug of lat_meas_fifo_full : signal is "true"; - attribute mark_debug of lat_meas_fifo_items : signal is "true"; + -- attribute mark_debug : string; + + -- attribute mark_debug of data_logger_rst : signal is "true"; + -- attribute mark_debug of tst_gen_mux_sel : signal is "true"; + -- attribute mark_debug of meas_fsm_pst : signal is "true"; + -- attribute mark_debug of pkt_cnt_pst : signal is "true"; + -- attribute mark_debug of test_finished : signal is "true"; + + -- attribute mark_debug of mfb_gen_ctrl_pkt_cnt_clr : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_length : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_chan_val : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_chan_inc : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_en : signal is "true"; + -- attribute mark_debug of mfb_gen_ctrl_pkt_cnt : signal is "true"; + + -- attribute mark_debug of lat_meas_val : signal is "true"; + -- attribute mark_debug of lat_meas_val_vld : signal is "true"; + -- attribute mark_debug of lat_meas_fifo_full : signal is "true"; + -- attribute mark_debug of lat_meas_fifo_items : signal is "true"; begin mi_async_i : entity work.MI_ASYNC generic map( @@ -506,7 +504,7 @@ begin HIST_EN => (others => true), SUM_EXTRA_WIDTH => (others => 16), - HIST_BOX_CNT => (others => 100), + HIST_BOX_CNT => (others => 128), HIST_BOX_WIDTH => (others => 32), CTRLO_DEFAULT => (others => '0')) port map ( @@ -690,85 +688,17 @@ begin rx_mfb_dst_rdy_lbk <= rx_mfb_dst_rdy_gen_mux; end generate; - rx_dma_debug_core_g: if (RX_DMA_DBG_CORE_EN) generate - rx_debug_core_i : entity work.TX_DMA_DEBUG_CORE - generic map ( - DEVICE => DEVICE, - - MFB_REGIONS => MFB_REGIONS, - MFB_REGION_SIZE => MFB_REGION_SIZE, - MFB_BLOCK_SIZE => MFB_BLOCK_SIZE, - MFB_ITEM_WIDTH => MFB_ITEM_WIDTH, - - DMA_META_WIDTH => HDR_META_WIDTH, - PKT_SIZE_MAX => USR_RX_PKT_SIZE_MAX, - CHANNELS => RX_CHANNELS, - - DBG_CNTRS_WIDTH => 64, - ST_SP_DBG_SIGNAL_W => ST_SP_DBG_SIGNAL_W, - MI_WIDTH => MI_WIDTH, - MI_SAME_CLK => TRUE) - port map ( - CLK => CLK, - RESET => RESET, - - ST_SP_DBG_CHAN => (others => '0'), - ST_SP_DBG_META => (others => '0'), - - RX_MFB_META_PKT_SIZE => rx_mfb_meta_pkt_size_gen_mux, - RX_MFB_META_HDR_META => rx_mfb_meta_hdr_meta_gen_mux, - RX_MFB_META_CHAN => rx_mfb_meta_chan_gen_mux, - - RX_MFB_DATA => rx_mfb_data_gen_mux, - RX_MFB_SOF_POS => rx_mfb_sof_pos_gen_mux, - RX_MFB_EOF_POS => rx_mfb_eof_pos_gen_mux, - RX_MFB_SOF => rx_mfb_sof_gen_mux, - RX_MFB_EOF => rx_mfb_eof_gen_mux, - RX_MFB_SRC_RDY => rx_mfb_src_rdy_gen_mux, - RX_MFB_DST_RDY => rx_mfb_dst_rdy_gen_mux, - - TX_MFB_DATA => RX_MFB_DATA_OUT, - TX_MFB_META => rx_mfb_meta_dbg, - TX_MFB_SOF_POS => RX_MFB_SOF_POS_OUT, - TX_MFB_EOF_POS => RX_MFB_EOF_POS_OUT, - TX_MFB_SOF => RX_MFB_SOF_OUT, - TX_MFB_EOF => RX_MFB_EOF_OUT, - TX_MFB_SRC_RDY => RX_MFB_SRC_RDY_OUT, - TX_MFB_DST_RDY => RX_MFB_DST_RDY_OUT, - - MI_CLK => MI_CLK, - MI_RESET => MI_RESET, - - MI_ADDR => mi_addr_split(4), - MI_DWR => mi_dwr_split(4), - MI_BE => mi_be_split(4), - MI_RD => mi_rd_split(4), - MI_WR => mi_wr_split(4), - MI_DRD => mi_drd_split(4), - MI_ARDY => mi_ardy_split(4), - MI_DRDY => mi_drdy_split(4) - ); - - RX_MFB_META_PKT_SIZE_OUT <= rx_mfb_meta_dbg(log2(USR_RX_PKT_SIZE_MAX+1) + HDR_META_WIDTH + log2(RX_CHANNELS) -1 downto HDR_META_WIDTH + log2(RX_CHANNELS)); - RX_MFB_META_HDR_META_OUT <= rx_mfb_meta_dbg(HDR_META_WIDTH + log2(RX_CHANNELS) -1 downto log2(RX_CHANNELS)); - RX_MFB_META_CHAN_OUT <= rx_mfb_meta_dbg(log2(RX_CHANNELS) -1 downto 0); - else generate - mi_drd_split(4) <= X"DEADBEAD"; - mi_ardy_split(4) <= mi_rd_split(4) or mi_wr_split(4); - mi_drdy_split(4) <= mi_rd_split(4); - - RX_MFB_META_PKT_SIZE_OUT <= rx_mfb_meta_pkt_size_gen_mux; - RX_MFB_META_HDR_META_OUT <= rx_mfb_meta_hdr_meta_gen_mux; - RX_MFB_META_CHAN_OUT <= rx_mfb_meta_chan_gen_mux; - - RX_MFB_DATA_OUT <= rx_mfb_data_gen_mux; - RX_MFB_SOF_OUT <= rx_mfb_sof_gen_mux; - RX_MFB_EOF_OUT <= rx_mfb_eof_gen_mux; - RX_MFB_SOF_POS_OUT <= rx_mfb_sof_pos_gen_mux; - RX_MFB_EOF_POS_OUT <= rx_mfb_eof_pos_gen_mux; - RX_MFB_SRC_RDY_OUT <= rx_mfb_src_rdy_gen_mux; - rx_mfb_dst_rdy_gen_mux <= RX_MFB_DST_RDY_OUT; - end generate; + RX_MFB_META_PKT_SIZE_OUT <= rx_mfb_meta_pkt_size_gen_mux; + RX_MFB_META_HDR_META_OUT <= rx_mfb_meta_hdr_meta_gen_mux; + RX_MFB_META_CHAN_OUT <= rx_mfb_meta_chan_gen_mux; + + RX_MFB_DATA_OUT <= rx_mfb_data_gen_mux; + RX_MFB_SOF_OUT <= rx_mfb_sof_gen_mux; + RX_MFB_EOF_OUT <= rx_mfb_eof_gen_mux; + RX_MFB_SOF_POS_OUT <= rx_mfb_sof_pos_gen_mux; + RX_MFB_EOF_POS_OUT <= rx_mfb_eof_pos_gen_mux; + RX_MFB_SRC_RDY_OUT <= rx_mfb_src_rdy_gen_mux; + rx_mfb_dst_rdy_gen_mux <= RX_MFB_DST_RDY_OUT; -- ============================================================================================= -- Resetting FSM diff --git a/comp/dma/dma_calypte/dma_calypte.vhd b/comp/dma/dma_calypte/dma_calypte.vhd index 4ce156855..6ba9ff7e1 100644 --- a/comp/dma/dma_calypte/dma_calypte.vhd +++ b/comp/dma/dma_calypte/dma_calypte.vhd @@ -72,6 +72,8 @@ entity DMA_CALYPTE is -- Enables an additional register of the transaction buffer that improves -- throughput TRBUF_REG_EN : boolean := false; + -- Enables performance counters alowing metrics generation. + PERF_CNTR_EN : boolean := false; -- ===================================================================== -- TX DMA settings @@ -266,8 +268,8 @@ begin CNTRS_WIDTH => DSP_CNT_WIDTH, HDR_META_WIDTH => HDR_META_WIDTH, PKT_SIZE_MAX => USR_RX_PKT_SIZE_MAX, - TRBUF_FIFO_EN => FALSE, - TRBUF_REG_EN => TRBUF_REG_EN) + TRBUF_REG_EN => TRBUF_REG_EN, + PERF_CNTR_EN => PERF_CNTR_EN) port map ( CLK => CLK, diff --git a/core/comp/dma/dma_mod/DevTree.tcl b/core/comp/dma/dma_mod/DevTree.tcl index 67692e91e..cace722ab 100644 --- a/core/comp/dma/dma_mod/DevTree.tcl +++ b/core/comp/dma/dma_mod/DevTree.tcl @@ -40,10 +40,14 @@ proc dts_dmamod_open {base type rxn txn pcie rx_frame_size_max tx_frame_size_max } } elseif {$type == 4} { set var_base [expr $base + $i * 0x80] - append ret [dts_dma_calypte_ctrl "rx" $i $var_base $pcie] + dts_dma_calypte_ctrl ret "rx" $i $var_base $pcie } } + if {$type == 4 && $DMA_DEBUG_ENABLE} { + dts_dma_perf_cntrs ret [expr $base + 0x3000] + } + # TX DMA channels for {set i 0} {$i < $txn} {incr i} { if {$type == 3} { @@ -51,7 +55,7 @@ proc dts_dmamod_open {base type rxn txn pcie rx_frame_size_max tx_frame_size_max append ret [dts_dma_medusa_ctrl "ndp" $type "tx" $i $var_base $pcie "dma_params_tx$pcie"] } elseif {$type == 4} { set var_base [expr $base + $i * 0x80 + $offset] - append ret [dts_dma_calypte_ctrl "tx" $i $var_base $pcie] + dts_dma_calypte_ctrl ret "tx" $i $var_base $pcie } } diff --git a/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd b/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd index 727f5ecd0..9e3173b61 100644 --- a/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd +++ b/core/comp/dma/dma_mod/wrapper/dma_calypte_wrapper_arch.vhd @@ -25,7 +25,6 @@ architecture CALYPTE of DMA_WRAPPER is constant MFB_LOOPBACK_EN : boolean := TRUE; constant LATENCY_METER_EN : boolean := DBG_CNTR_EN; constant TX_DMA_DBG_CORE_EN : boolean := DBG_CNTR_EN; - constant RX_DMA_DBG_CORE_EN : boolean := FALSE; constant ST_SP_DBG_META_WIDTH : natural := 4; @@ -599,7 +598,6 @@ begin MFB_LOOPBACK_EN => MFB_LOOPBACK_EN, LATENCY_METER_EN => LATENCY_METER_EN, TX_DMA_DBG_CORE_EN => TX_DMA_DBG_CORE_EN, - RX_DMA_DBG_CORE_EN => RX_DMA_DBG_CORE_EN, ST_SP_DBG_SIGNAL_W => ST_SP_DBG_META_WIDTH, MI_WIDTH => MI_WIDTH @@ -695,6 +693,7 @@ begin RX_PTR_WIDTH => RX_DP_WIDTH, USR_RX_PKT_SIZE_MAX => USR_RX_PKT_SIZE_MAX, TRBUF_REG_EN => TRUE, + PERF_CNTR_EN => DBG_CNTR_EN, TX_CHANNELS => TX_CHANNELS, TX_PTR_WIDTH => TX_DP_WIDTH, diff --git a/core/top/DevTree.tcl b/core/top/DevTree.tcl index a9bda9d12..c96adb3fe 100644 --- a/core/top/DevTree.tcl +++ b/core/top/DevTree.tcl @@ -89,8 +89,8 @@ proc dts_build_netcope {} { } global DMA_DEBUG_ENABLE - if {$DMA_TYPE == 4 && $DMA_DEBUG_ENABLE} { - append ret [data_logger "0x1320000" 0 "dma_calypte_latency_meter"] + if {$DMA_TYPE == 4} { + dts_calypte_test_core ret "0x1300000" $DMA_DEBUG_ENABLE } # Network module @@ -201,7 +201,7 @@ proc dts_build_netcope {} { for {set i 0} {$i < $CHAN_PER_EP} {incr i} { set var_buff_base [expr $TX_DATA_BUFF_BASE + $i * $TX_BUFF_SIZE_HEX] - append ret [dts_dma_calypte_tx_buffer "data" $i $var_buff_base $TX_BUFF_SIZE_HEX "0"] + dts_dma_calypte_tx_buffer ret "data" $i $var_buff_base $TX_BUFF_SIZE_HEX "0" } set TX_HDR_BUFF_BASE [expr $TX_DATA_BUFF_BASE + $CHAN_PER_EP*$TX_BUFF_SIZE] @@ -209,7 +209,7 @@ proc dts_build_netcope {} { for {set i 0} {$i < $CHAN_PER_EP} {incr i} { set var_buff_base [expr $TX_HDR_BUFF_BASE + $i * $TX_BUFF_SIZE_HEX] - append ret [dts_dma_calypte_tx_buffer "hdr" $i $var_buff_base $TX_BUFF_SIZE_HEX "0"] + dts_dma_calypte_tx_buffer ret "hdr" $i $var_buff_base $TX_BUFF_SIZE_HEX "0" } append ret "};" }