Skip to content

Commit 9251cc2

Browse files
committed
Merge branch 'valek-feat-dma_calypte_perf_cntrs' into 'devel'
RX DMA Calypte [FEATURE]: Add performance counters to measure blocking behavior See merge request ndk/ndk-fpga!87
2 parents f1cd361 + c4d520f commit 9251cc2

File tree

23 files changed

+777
-286
lines changed

23 files changed

+777
-286
lines changed

build/DevTree.tcl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
source [file join [file dirname [info script]] "scripts" "dts" "packed_item.tcl"]
22
source [file join [file dirname [info script]] "scripts" "dts" "ndp_header.tcl"]
3+
source [file join [file dirname [info script]] "scripts" "dts" "dts_templates.tcl"]
34

45
# ----------------------------------------------------------------------
56

build/Vivado_non_prj.inc.tcl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -655,9 +655,12 @@ proc nb_sanitize_vars {synth_flags hierarchy} {
655655

656656
set SYNTH_FLAGS(TOOL) "vivado"
657657

658-
global NB_PLATFORM_TAGS
659-
global PLATFORM_TAGS
660-
set NB_PLATFORM_TAGS "xilinx $PLATFORM_TAGS"
658+
global NB_PLATFORM_TAGS env
659+
if {[info exists env(PLATFORM_TAGS)]} {
660+
set NB_PLATFORM_TAGS "$env(PLATFORM_TAGS)"
661+
} else {
662+
set NB_PLATFORM_TAGS "xilinx"
663+
}
661664

662665
if {[info commands version] != ""} {
663666
set SYNTH_FLAGS(TOOL_VERSION) [version -short]

build/readme.rst

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,3 +385,107 @@ The (incomplete) list of SYNTH_FLAGS array items
385385
- USE_XPM_LIBRARIES: includes XPM_CDC XPM_MEMORY XPM_FIFO in Vivado projects
386386

387387
For other values and their purpose see the Vivado.inc.tcl or Quartus.inc.tcl file in the build directory.
388+
389+
Device Tree nodes
390+
-----------------
391+
392+
For the software to find internal firmware components, a *Device Tree (DT)* is
393+
used. This provides a tree of available parts (called nodes in the DT
394+
terminology) of the design that can be accessed from the host without
395+
restriction. A developer creates TCL procedures that generate nodes to *DT
396+
string (DTS)* for the components he finds fit. Since the creation of the DTS can
397+
be challenging, there are several TCL procedures provided that simplify the
398+
process. These procedures are contained within the ``dts_templates.tcl`` file
399+
with clarifying comments. The following examples provide an overview of their
400+
usage.
401+
402+
Example 1
403+
~~~~~~~~~
404+
405+
This presents a least viable code that creates a node
406+
``dma_calypte_rx_perf_cntrs0`` with the base address *0x8000*
407+
and the size *0x30*. It also contains a compatible string
408+
``cesnet,dma_calypte_rx_perf_cntrs``. The string property is appended to ``dts``
409+
variable that contains a reference to the required Device Tree string (DTS).
410+
411+
.. code-block:: tcl
412+
413+
dts_create_node dts "dma_calypte_rx_perf_cntrs0" {
414+
dts_appendprop_comp_node dts 0x8000 0x30 "cesnet,dma_calypte_rx_perf_cntrs"
415+
}
416+
417+
Example 2
418+
~~~~~~~~~
419+
420+
A second, more complex example demonstrates addition of multiple properties to a
421+
node called ``dma_ctrl_calypte_$dir$id`` (string can be further adjusted through
422+
parameters ``dir`` and ``id``).
423+
424+
.. code-block:: tcl
425+
426+
proc dts_dma_calypte_ctrl {DTS dir id base pcie} {
427+
upvar 1 $DTS dts
428+
429+
dts_create_node dts "dma_ctrl_calypte_$dir$id" {
430+
# Adding compatible string "cesnet,dma_ctrl_calypte_$dir" and the
431+
# reg property with base address $base and the size 0x80.
432+
dts_appendprop_comp_node dts $base 0x80 "cesnet,dma_ctrl_calypte_$dir"
433+
# Integer property called "version" with the value 0x10000
434+
dts_appendprop_int dts "version" 0x10000
435+
# Integer prperty "pcie" with the value of $pcie
436+
dts_appendprop_int dts "pcie" $pcie
437+
438+
# The addition of custom properties (customly named) can be done
439+
# through a standard "append" macro.
440+
if { $dir == "tx" } {
441+
append dts "data_buff = <&dma_calypte_tx_data_buff$id>;"
442+
append dts "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;"
443+
}
444+
append dts "params = <&dma_params_$dir$pcie>;"
445+
}
446+
}
447+
448+
Example 3
449+
~~~~~~~~~
450+
451+
This example shows how complex node with multiple subnodes is created. The parent
452+
node is called ``dma_calypte_test_core0`` and contains subnodes
453+
``mfb_loopback0``, ``dma_calypte_debug_core0``, ``dma_calypte_latency_meter0``
454+
and ``dma_calypte_reset_fsm0``. Further nesting of nodes is possible as can be
455+
seen when adding the ``mfb_generator0`` node. Each of the called procedures
456+
contain a reference to the same DTS from the ``dts`` variable.
457+
458+
.. code-block:: tcl
459+
460+
proc dts_calypte_test_core {DTS base_addr} {
461+
# Populate reference from the calling environment
462+
upvar 1 $DTS dts
463+
464+
set LOOPBACK_BASE_ADDR [expr $base_addr + 0x0]
465+
set TX_DBG_CORE_BASE_ADDR [expr $base_addr + 0x10000]
466+
set LATENCY_METER_BASE_ADDR [expr $base_addr + 0x20000]
467+
set RESET_FSM_BASE_ADDR [expr $base_addr + 0x30000]
468+
469+
dts_create_node dts "dma_calypte_test_core0" {
470+
471+
dts_create_node dts "mfb_loopback0" {
472+
dts_appendprop_comp_node dts $LOOPBACK_BASE_ADDR 8 "cesnet,mfb_loopback"
473+
}
474+
475+
dts_create_node dts "dma_calypte_debug_core0" {
476+
dts_appendprop_comp_node dts $TX_DBG_CORE_BASE_ADDR 0x1600 "cesnet,dma_calypte_debug_core"
477+
478+
dts_create_node dts "mfb_generator0" {
479+
dts_appendprop_comp_node dts [expr $TX_DBG_CORE_BASE_ADDR+0x8000] 0x40 "cesnet,mfb_generator"
480+
}
481+
}
482+
483+
dts_create_node dts "dma_calypte_latency_meter0" {
484+
dts_appendprop_comp_node dts $LATENCY_METER_BASE_ADDR 0x30 "cesnet,dma_calypte_latency_meter"
485+
}
486+
487+
dts_create_node dts "dma_calypte_reset_fsm0" {
488+
dts_appendprop_comp_node dts $RESET_FSM_BASE_ADDR 0x4 "cesnet,dma_calypte_reset_fsm"
489+
}
490+
}
491+
}

build/scripts/dts/dts_templates.tcl

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# dts_templates.tcl: templates for various node types within the DeviceTree
2+
# Copyright (C) 2024 CESNET z. s. p. o.
3+
# Author(s): Vladisav Valek <[email protected]>
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
7+
# Adds a string property to a Device Tree node
8+
# 1. DTS - a reference to Device Tree string
9+
# 2. name - name of a string property
10+
# 3. value - value of a string property
11+
proc dts_appendprop_string {DTS name value} {
12+
upvar 1 $DTS dts
13+
append dts "$name = \"$value\";\n"
14+
}
15+
16+
# Adds integer property to a Device Tree node
17+
# 1. DTS - a reference to Device Tree string
18+
# 2. name - name of an integer property
19+
# 3. value - value of a integer property
20+
proc dts_appendprop_int {DTS name value} {
21+
upvar 1 $DTS dts
22+
append dts "$name = <$value>;\n"
23+
}
24+
25+
# Adds register as a "reg" property to a Device Tree node
26+
# 1. DTS - a reference to Device Tree string
27+
# 2. addr - base address of a register
28+
# 3. size - size of a register
29+
proc dts_appendprop_reg {DTS addr size} {
30+
upvar 1 $DTS dts
31+
append dts "reg = <$addr $size>;\n"
32+
}
33+
34+
# Adds cells that specify processing of address and size values within reg properties of a DT node
35+
# 1. DTS - a reference to Device Tree string
36+
# 2. addr - specifies how many cells within the reg property represent the BASE ADDRESS of a register
37+
# 3. size - specifies how many cells within the reg property represent the SIZE of a register
38+
# NOTE: A cell within a reg property is a value of type uint32.
39+
proc dts_add_cells {DTS {addr 1} {size 1}} {
40+
upvar 1 $DTS dts
41+
dts_appendprop_int dts "#address-cells" $addr
42+
dts_appendprop_int dts "#size-cells" $size
43+
}
44+
45+
# Adds the minimal set of properties (compatble string and a register address)
46+
# 1. DTS - a reference to DTS
47+
# 2. base_addr - base address in the MI address space
48+
# 3. size - size of the register space in the MI address space
49+
# 4. compatible - compatible string
50+
proc dts_appendprop_comp_node {DTS base_addr size compatible} {
51+
upvar 1 $DTS dts
52+
53+
dts_appendprop_string dts "compatible" "$compatible"
54+
dts_appendprop_reg dts $base_addr $size
55+
}
56+
57+
# This creates a node within a DTS
58+
# 1. DTS - a reference to Device Tree string
59+
# 2. alias - (also called label) provides an alternative name used for cross-referencing within a
60+
# Device Tree
61+
# 3. name - a name of a node
62+
# 4. body - a set of procedures that add properties to a node (see example within the documentation
63+
# of a Build System)
64+
proc dts_create_labeled_node {DTS alias name body} {
65+
upvar 1 $DTS dts
66+
67+
if {$alias ne ""} {
68+
append dts "$alias: "
69+
}
70+
append dts "$name {\n"
71+
uplevel 1 $body
72+
append dts "};\n"
73+
}
74+
75+
# Wrapper over dts_create_labeled_node that creates a node without a label
76+
proc dts_create_node {DTS name body} {
77+
uplevel 1 [list dts_create_labeled_node $DTS "" $name $body]
78+
}

comp/dma/dma_calypte/DevTree.tcl

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,43 @@
33
# 2. id - channel ID
44
# 3. base - base address of channel
55
# 4. pcie - index(es) of PCIe endpoint(s) which DMA controller uses.
6-
proc dts_dma_calypte_ctrl {dir id base pcie} {
7-
set ret ""
8-
append ret "dma_ctrl_calypte" "_$dir$id {"
9-
append ret "compatible = \"cesnet,dma_ctrl_calypte" "_" $dir "\";"
10-
append ret "reg = <$base 0x80>;"
11-
append ret "version = <0x00010000>;"
12-
append ret "pcie = <$pcie>;"
13-
if { $dir == "tx" } {
14-
append ret "data_buff = <&dma_calypte_tx_data_buff$id>;"
15-
append ret "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;"
6+
proc dts_dma_calypte_ctrl {DTS dir id base pcie} {
7+
upvar 1 $DTS dts
8+
9+
dts_create_node dts "dma_ctrl_calypte_$dir$id" {
10+
dts_appendprop_comp_node dts $base 0x80 "cesnet,dma_ctrl_calypte_$dir"
11+
dts_appendprop_int dts "version" 0x10000
12+
dts_appendprop_int dts "pcie" $pcie
13+
if { $dir == "tx" } {
14+
append dts "data_buff = <&dma_calypte_tx_data_buff$id>;"
15+
append dts "hdr_buff = <&dma_calypte_tx_hdr_buff$id>;"
16+
}
17+
append dts "params = <&dma_params_$dir$pcie>;"
1618
}
17-
append ret "params = <&dma_params_$dir$pcie>;"
18-
append ret "};"
19-
return $ret
2019
}
2120

2221
# generates Device Tree entries for data buffers in DMA Calypte
2322
# 1. type - content of the buffer (header or data)
2423
# 2. id - channel ID
25-
# 3. base - base address for the first buffer
24+
# 3. base - base address for the buffer
2625
# 4. size - size of the buffer
2726
# 5. pcie - index(es) of PCIe endpoint(s) which DMA controller uses.
28-
proc dts_dma_calypte_tx_buffer {type id base size pcie} {
29-
set ret ""
30-
append ret "dma_calypte_tx_${type}_buff${id}: dma_calypte_tx_${type}_buff${id} {"
31-
append ret "compatible = \"cesnet,dma_calypte_tx_${type}_buff\";"
32-
append ret "reg = <$base $size>;"
33-
append ret "pcie = <$pcie>;"
34-
append ret "};"
35-
return $ret
27+
proc dts_dma_calypte_tx_buffer {DTS type id base size pcie} {
28+
upvar 1 $DTS dts
29+
30+
dts_create_labeled_node dts "dma_calypte_tx_${type}_buff${id}" "dma_calypte_tx_${type}_buff${id}" {
31+
dts_appendprop_comp_node dts $base $size "cesnet,dma_calypte_tx_${type}_buff"
32+
dts_appendprop_int dts "pcie" $pcie
33+
}
34+
}
35+
36+
# Adds a node to the Device Tree for performance counters within DMA Calypte
37+
# 1. DTS - reference to DeviceTree string
38+
# 2. Base - base address of the registers in the MI address space
39+
proc dts_dma_perf_cntrs {DTS base} {
40+
upvar 1 $DTS dts
41+
42+
dts_create_node dts "dma_calypte_rx_perf_cntrs0" {
43+
dts_appendprop_comp_node dts $base 0x30 "cesnet,dma_calypte_rx_perf_cntrs"
44+
}
3645
}

comp/dma/dma_calypte/comp/rx/Modules.tcl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@ lappend PACKAGES "$OFM_PATH/comp/base/pkg/math_pack.vhd"
88
lappend PACKAGES "$OFM_PATH/comp/base/pkg/type_pack.vhd"
99
lappend PACKAGES "$OFM_PATH/comp/base/pkg/pcie_meta_pack.vhd"
1010

11-
set HDR_INSERTOR_BASE "$ENTITY_BASE/comp/hdr_insertor"
12-
set HDR_MANAGER_BASE "$ENTITY_BASE/comp/hdr_manager"
13-
set TRANS_BUFFER_BASE "$ENTITY_BASE/comp/trans_buffer"
14-
set INPUT_BUFFER_BASE "$ENTITY_BASE/comp/input_buffer"
15-
set SW_MANAGER_BASE "$ENTITY_BASE/comp/software_manager"
16-
set MFB_FIFOX_BASE "$OFM_PATH/comp/mfb_tools/storage/fifox"
17-
set MFB_FRAME_LNG_CHECK_BASE "$OFM_PATH/comp/mfb_tools/logic/frame_lng_check"
11+
set HDR_INSERTOR_BASE "$ENTITY_BASE/comp/hdr_insertor"
12+
set HDR_MANAGER_BASE "$ENTITY_BASE/comp/hdr_manager"
13+
set TRANS_BUFFER_BASE "$ENTITY_BASE/comp/trans_buffer"
14+
set INPUT_BUFFER_BASE "$ENTITY_BASE/comp/input_buffer"
15+
set SW_MANAGER_BASE "$ENTITY_BASE/comp/software_manager"
16+
set MFB_FIFOX_BASE "$OFM_PATH/comp/mfb_tools/storage/fifox"
17+
set MFB_FRAME_LNG_CHECK_BASE "$OFM_PATH/comp/mfb_tools/logic/frame_lng_check"
18+
set DATA_LOGGER_BASE "$OFM_PATH/comp/debug/data_logger"
19+
set MI_SPLITTER_PLUS_GEN_BASE "$OFM_PATH/comp/mi_tools/splitter_plus_gen"
1820

1921
lappend COMPONENTS \
2022
[ list "RX_DMA_CALYPTE_HDR_INSERTOR" $HDR_INSERTOR_BASE "FULL"] \
@@ -24,6 +26,7 @@ lappend COMPONENTS \
2426
[ list "RX_DMA_CALYPTE_SW_MANAGER" $SW_MANAGER_BASE "FULL"] \
2527
[ list "MFB_FIFOX" $MFB_FIFOX_BASE "FULL"] \
2628
[ list "MFB_FRAME_LNG_CHECK" $MFB_FRAME_LNG_CHECK_BASE "FULL"] \
27-
29+
[ list "DATA_LOGGER" $DATA_LOGGER_BASE "FULL"] \
30+
[ list "MI_SPLITTER_PLUS_GEN" $MI_SPLITTER_PLUS_GEN_BASE "FULL"]
2831

2932
lappend MOD "$ENTITY_BASE/rx_dma_calypte.vhd"

comp/dma/dma_calypte/comp/rx/comp/hdr_manager/rx_dma_calypte_hdr_manager.vhd

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,15 @@ entity RX_DMA_CALYPTE_HDR_MANAGER is
137137
PKT_CNTR_CHAN : out std_logic_vector(log2(CHANNELS) -1 downto 0);
138138
PKT_CNTR_SENT_INC : out std_logic;
139139
PKT_CNTR_DISC_INC : out std_logic;
140-
PKT_CNTR_PKT_SIZE : out std_logic_vector(log2(PKT_MTU+1) -1 downto 0)
140+
PKT_CNTR_PKT_SIZE : out std_logic_vector(log2(PKT_MTU+1) -1 downto 0);
141+
142+
-- =========================================================================================
143+
-- Performance counter outputs
144+
-- =========================================================================================
145+
DATA_ADDR_REQ_CNTR_INC : out std_logic;
146+
DMA_HDR_ADDR_REQ_CNTR_INC : out std_logic;
147+
DATA_ADDR_STALL_INC : out std_logic;
148+
DMA_HDR_ADDR_STALL_INC : out std_logic
141149
);
142150
end entity;
143151

@@ -311,6 +319,12 @@ architecture FULL of RX_DMA_CALYPTE_HDR_MANAGER is
311319
signal ptr_fifo_rd : std_logic;
312320
signal ptr_fifo_empty : std_logic;
313321

322+
-- =============================================================================================
323+
-- Perfofmance counter logic
324+
-- =============================================================================================
325+
signal data_addr_next_reg : std_logic;
326+
signal dma_hdr_addr_next_reg : std_logic;
327+
314328
-- =============================================================================================
315329
-- Debug signals and probes (either for verification or ILA/SignalTap)
316330
-- =============================================================================================
@@ -760,7 +774,7 @@ begin
760774
IN_VFID => (others => '0'),
761775
IN_TAG => (others => '0'),
762776
IN_DW_CNT => std_logic_vector(to_unsigned(DATA_SEGMENT_SIZE/4, 11)),
763-
IN_ATTRIBUTES => "000",
777+
IN_ATTRIBUTES => "010",
764778
IN_FBE => "1111",
765779
IN_LBE => "1111",
766780
IN_ADDR_LEN => pcie_addr_len_data_tran,
@@ -806,7 +820,7 @@ begin
806820
IN_VFID => (others => '0'),
807821
IN_TAG => (others => '0'),
808822
IN_DW_CNT => std_logic_vector(to_unsigned(8/4, 11)),
809-
IN_ATTRIBUTES => "000",
823+
IN_ATTRIBUTES => "010",
810824
IN_FBE => "1111",
811825
IN_LBE => "1111",
812826
IN_ADDR_LEN => pcie_addr_len_dma_hdr_tran,
@@ -1025,4 +1039,24 @@ begin
10251039
-- signal is valid and set to 1.
10261040
DMA_HDR_SRC_RDY <= ((not hdr_meta_fifo_empty) and (not ptr_fifo_empty) and (not pkt_size_fifo_empty) and (not discard_fifo_empty) and (not discard_fifo_do(0)))
10271041
or ((not discard_fifo_empty) and discard_fifo_do(0));
1042+
1043+
-- =============================================================================================
1044+
-- Performance counter logic
1045+
-- =============================================================================================
1046+
addr_next_reg_p: process (CLK) is
1047+
begin
1048+
if (rising_edge(CLK)) then
1049+
data_addr_next_reg <= data_addr_next;
1050+
dma_hdr_addr_next_reg <= dma_hdr_addr_next;
1051+
end if;
1052+
end process;
1053+
1054+
-- the response for an address request comes usually one clock period delayed. If that is not a
1055+
-- case, the stalling occurs.
1056+
DATA_ADDR_STALL_INC <= data_addr_next_reg and (not data_pcie_addr_vld);
1057+
DMA_HDR_ADDR_STALL_INC <= dma_hdr_addr_next_reg and (not dma_hdr_pcie_addr_vld);
1058+
1059+
-- Counters of requests for PCIe addresses (should be equal to the overall amount of packets)
1060+
DATA_ADDR_REQ_CNTR_INC <= data_pcie_addr_vld;
1061+
DMA_HDR_ADDR_REQ_CNTR_INC <= dma_hdr_pcie_addr_vld;
10281062
end architecture;

0 commit comments

Comments
 (0)