Skip to content

Commit 0abdeef

Browse files
committed
Merge branch 'cabal-axis-trim' into 'devel'
feat(axis-tools): introduce AXIS_HEAD_TRIMMER, AXIS_TAIL_TRIMMER module See merge request ndk/ndk-fpga!377
2 parents 5359d03 + f99524c commit 0abdeef

File tree

16 files changed

+1404
-0
lines changed

16 files changed

+1404
-0
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Modules.tcl: Components include script
2+
# Copyright (C) 2026 CESNET z. s. p. o.
3+
# Author(s): Jakub Cabal <cabal@cesnet.cz>
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
7+
lappend PACKAGES "$OFM_PATH/comp/base/pkg/math_pack.vhd"
8+
lappend PACKAGES "$OFM_PATH/comp/base/pkg/type_pack.vhd"
9+
10+
lappend COMPONENTS [ list "BARREL_SHIFTER_GEN" "$OFM_PATH/comp/base/logic/barrel_shifter" "FULL" ]
11+
12+
lappend MOD "$ENTITY_BASE/axis_head_trimmer.vhd"
Lines changed: 376 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,376 @@
1+
-- Copyright (C) 2026 CESNET z. s. p. o.
2+
-- Author(s): Jakub Cabal <cabal@cesnet.cz>
3+
-- SPDX-License-Identifier: BSD-3-Clause
4+
5+
library IEEE;
6+
use IEEE.std_logic_1164.all;
7+
use IEEE.numeric_std.all;
8+
9+
use work.math_pack.all;
10+
use work.type_pack.all;
11+
12+
-- Removes a configurable number of bytes from the beginning of AXI-Stream packets.
13+
-- Uses a 2-stage input shift register with barrel shifter for data alignment.
14+
-- Trim instruction (length and enable) is sampled at the first word of each packet
15+
-- and applied to the entire packet. Guaranteed throughput: 1 word per clock cycle.
16+
--
17+
entity AXIS_HEAD_TRIMMER is
18+
generic (
19+
-- AXI-Stream data bus width in bits; must be a multiple of 8.
20+
AXI_TDATA_WIDTH : natural := 512;
21+
-- Maximum packet length (MTU) in bytes. Used to size the byte counter.
22+
PKT_MTU : natural := 9216; -- Jumbo frame support
23+
-- Target device.
24+
DEVICE : string := "AGILEX"
25+
);
26+
port (
27+
CLK : in std_logic;
28+
RESET : in std_logic;
29+
30+
-- RX Interface
31+
RX_AXI_TDATA : in std_logic_vector(AXI_TDATA_WIDTH-1 downto 0);
32+
RX_AXI_TKEEP : in std_logic_vector(AXI_TDATA_WIDTH/8-1 downto 0);
33+
RX_AXI_TLAST : in std_logic;
34+
RX_AXI_TVALID : in std_logic;
35+
RX_AXI_TREADY : out std_logic;
36+
37+
-- Trim instruction interface (sampled only at the first word of each packet)
38+
-- Number of bytes to remove from the beginning of the packet.
39+
-- Valid range: 0 to packet_length-1 (at least one byte must remain).
40+
RX_AXI_TRIM_LENGTH : in std_logic_vector(log2(PKT_MTU+1)-1 downto 0);
41+
-- Enable trim operation for this packet. When low, packet passes through unchanged.
42+
RX_AXI_TRIM_ENABLE : in std_logic;
43+
44+
-- TX Interface
45+
TX_AXI_TDATA : out std_logic_vector(AXI_TDATA_WIDTH-1 downto 0);
46+
TX_AXI_TKEEP : out std_logic_vector(AXI_TDATA_WIDTH/8-1 downto 0);
47+
TX_AXI_TLAST : out std_logic;
48+
TX_AXI_TVALID : out std_logic;
49+
TX_AXI_TREADY : in std_logic
50+
);
51+
end entity;
52+
53+
architecture FULL of AXIS_HEAD_TRIMMER is
54+
55+
constant WORD_BYTES : natural := AXI_TDATA_WIDTH/8;
56+
constant OFF_BYTES_W : natural := log2(WORD_BYTES);
57+
constant TRIM_LEN_W : natural := log2(PKT_MTU+1);
58+
constant SHREG_STAGES : natural := 2;
59+
constant BS_BLOCKS : natural := 2*WORD_BYTES;
60+
constant POPCOUNT_W : natural := log2(WORD_BYTES+1);
61+
62+
signal valid_word : std_logic;
63+
signal rx_axi_eop : std_logic;
64+
signal rx_axi_sop : std_logic;
65+
signal rx_axi_nonfirst_reg : std_logic;
66+
67+
signal rx_axi_trim_length_reg : std_logic_vector(log2(PKT_MTU)-1 downto 0);
68+
signal rx_axi_trim_enable_reg : std_logic;
69+
70+
signal popcount : unsigned(POPCOUNT_W-1 downto 0);
71+
signal byte_cnt_reg : unsigned(TRIM_LEN_W-1 downto 0);
72+
signal byte_cnt_next : unsigned(TRIM_LEN_W-1 downto 0);
73+
74+
signal trim_pos_ok : std_logic;
75+
signal trim_pos_next_ok : std_logic;
76+
signal trim_active : std_logic;
77+
signal trim_last : std_logic;
78+
79+
signal shreg_popcount : u_array_t(SHREG_STAGES downto 0)(POPCOUNT_W-1 downto 0);
80+
signal shreg_byte_cnt : u_array_t(SHREG_STAGES downto 0)(TRIM_LEN_W-1 downto 0);
81+
signal shreg_tdata : slv_array_t(SHREG_STAGES downto 0)(AXI_TDATA_WIDTH-1 downto 0);
82+
signal shreg_tkeep : slv_array_t(SHREG_STAGES downto 0)(WORD_BYTES-1 downto 0);
83+
signal shreg_tfirst : std_logic_vector(SHREG_STAGES downto 0);
84+
signal shreg_tlast : std_logic_vector(SHREG_STAGES downto 0);
85+
signal shreg_tvalid : std_logic_vector(SHREG_STAGES downto 0);
86+
signal shreg_trim_len : u_array_t(SHREG_STAGES downto 0)(TRIM_LEN_W-1 downto 0);
87+
signal shreg_trim_en : std_logic_vector(SHREG_STAGES downto 0);
88+
signal shreg_trim_active : std_logic_vector(SHREG_STAGES downto 0);
89+
signal shreg_trim_last : std_logic_vector(SHREG_STAGES downto 0);
90+
signal shreg_ready : std_logic_vector(SHREG_STAGES downto 0);
91+
signal shreg_ok : std_logic;
92+
signal shreg_tvalid_fix : std_logic;
93+
94+
signal buf_tdata : std_logic_vector(AXI_TDATA_WIDTH-1 downto 0);
95+
signal buf_tkeep : std_logic_vector(WORD_BYTES-1 downto 0);
96+
signal buf_tfirst : std_logic;
97+
signal buf_tlast : std_logic;
98+
signal buf_tvalid : std_logic;
99+
signal buf_tready : std_logic;
100+
signal buf_trim_len : unsigned(TRIM_LEN_W-1 downto 0);
101+
signal buf_trim_en : std_logic;
102+
signal buf_trim_active : std_logic;
103+
signal buf_trim_last : std_logic;
104+
signal buf_popcount : unsigned(POPCOUNT_W-1 downto 0);
105+
signal buf_byte_cnt : unsigned(TRIM_LEN_W-1 downto 0);
106+
signal buf_2word_bytes : unsigned(log2(2*WORD_BYTES+1)-1 downto 0);
107+
signal buf_next_bytes : unsigned(log2(2*WORD_BYTES+1)-1 downto 0);
108+
signal buf_new_keep : std_logic_vector(WORD_BYTES-1 downto 0);
109+
110+
signal buf_trim_shift : unsigned(OFF_BYTES_W-1 downto 0);
111+
signal buf_trim_shift_reg : unsigned(OFF_BYTES_W-1 downto 0);
112+
signal buf_trim_post : std_logic;
113+
114+
signal bs_shift : unsigned(OFF_BYTES_W-1 downto 0);
115+
signal bs_din : std_logic_vector(2*AXI_TDATA_WIDTH-1 downto 0);
116+
signal bs_dout : std_logic_vector(2*AXI_TDATA_WIDTH-1 downto 0);
117+
118+
signal tx_tdata_next : std_logic_vector(AXI_TDATA_WIDTH-1 downto 0);
119+
signal tx_tkeep_next : std_logic_vector(WORD_BYTES-1 downto 0);
120+
signal tx_tlast_next : std_logic;
121+
signal tx_tlast_next_reg : std_logic;
122+
signal tx_tvalid_next : std_logic;
123+
signal tx_tdata_reg : std_logic_vector(AXI_TDATA_WIDTH-1 downto 0);
124+
signal tx_tkeep_reg : std_logic_vector(WORD_BYTES-1 downto 0);
125+
signal tx_tlast_reg : std_logic;
126+
signal tx_tvalid_reg : std_logic;
127+
128+
begin
129+
130+
RX_AXI_TREADY <= shreg_ready(0);
131+
132+
valid_word <= RX_AXI_TVALID and RX_AXI_TREADY;
133+
rx_axi_eop <= RX_AXI_TLAST and valid_word;
134+
135+
-- Detect first beat of each packet for packet counter
136+
process (CLK)
137+
begin
138+
if rising_edge(CLK) then
139+
if (RESET = '1' or rx_axi_eop = '1') then
140+
rx_axi_nonfirst_reg <= '0';
141+
elsif (valid_word = '1') then
142+
rx_axi_nonfirst_reg <= '1';
143+
end if;
144+
end if;
145+
end process;
146+
147+
rx_axi_sop <= valid_word and not rx_axi_nonfirst_reg;
148+
149+
popcount <= to_unsigned(count_ones(RX_AXI_TKEEP), POPCOUNT_W);
150+
151+
-- Byte counter accumulates total bytes received in current packet
152+
byte_cnt_reg_p : process (CLK)
153+
begin
154+
if rising_edge(CLK) then
155+
if (valid_word = '1') then
156+
if (RX_AXI_TLAST = '1') then
157+
byte_cnt_reg <= (others => '0');
158+
else
159+
byte_cnt_reg <= byte_cnt_next;
160+
end if;
161+
end if;
162+
if (RESET = '1') then
163+
byte_cnt_reg <= (others => '0');
164+
end if;
165+
end if;
166+
end process;
167+
168+
byte_cnt_next <= byte_cnt_reg + popcount;
169+
170+
-- Capture trim instruction at packet start, hold for entire packet
171+
process (CLK)
172+
begin
173+
if rising_edge(CLK) then
174+
if (rx_axi_sop = '1') then
175+
rx_axi_trim_length_reg <= RX_AXI_TRIM_LENGTH;
176+
rx_axi_trim_enable_reg <= RX_AXI_TRIM_ENABLE;
177+
end if;
178+
if (RESET = '1') then
179+
rx_axi_trim_enable_reg <= '0';
180+
end if;
181+
end if;
182+
end process;
183+
184+
-- Detect if current/next position is within the trim region
185+
trim_pos_ok <= '1' when (byte_cnt_reg <= shreg_trim_len(0)) else '0';
186+
trim_pos_next_ok <= '1' when (byte_cnt_next <= shreg_trim_len(0)) else '0';
187+
188+
-- trim_active: entire word is trimmed (skipped)
189+
-- trim_last: word contains the trim boundary (partial trim)
190+
trim_active <= shreg_trim_en(0) and trim_pos_next_ok;
191+
trim_last <= shreg_trim_en(0) and trim_pos_ok and not trim_pos_next_ok;
192+
193+
shreg_tdata(0) <= RX_AXI_TDATA;
194+
shreg_tkeep(0) <= RX_AXI_TKEEP;
195+
shreg_tfirst(0) <= rx_axi_sop;
196+
shreg_tlast(0) <= RX_AXI_TLAST;
197+
shreg_tvalid(0) <= RX_AXI_TVALID;
198+
shreg_byte_cnt(0) <= byte_cnt_reg;
199+
shreg_popcount(0) <= popcount;
200+
201+
shreg_trim_len(0) <= unsigned(RX_AXI_TRIM_LENGTH) when (rx_axi_sop = '1') else unsigned(rx_axi_trim_length_reg);
202+
shreg_trim_en(0) <= RX_AXI_TRIM_ENABLE when (rx_axi_sop = '1') else rx_axi_trim_enable_reg;
203+
shreg_trim_active(0) <= trim_active;
204+
shreg_trim_last(0) <= trim_last;
205+
206+
shreg_g: for i in 0 to SHREG_STAGES-1 generate
207+
shreg_p : process (CLK)
208+
begin
209+
if rising_edge(CLK) then
210+
if (shreg_ready(i) = '1') then
211+
shreg_tdata(i+1) <= shreg_tdata(i);
212+
shreg_tkeep(i+1) <= shreg_tkeep(i);
213+
shreg_tfirst(i+1) <= shreg_tfirst(i);
214+
shreg_tlast(i+1) <= shreg_tlast(i);
215+
shreg_tvalid(i+1) <= shreg_tvalid(i);
216+
shreg_trim_len(i+1) <= shreg_trim_len(i);
217+
shreg_trim_en(i+1) <= shreg_trim_en(i);
218+
shreg_trim_active(i+1) <= shreg_trim_active(i);
219+
shreg_trim_last(i+1) <= shreg_trim_last(i);
220+
shreg_byte_cnt(i+1) <= shreg_byte_cnt(i);
221+
shreg_popcount(i+1) <= shreg_popcount(i);
222+
end if;
223+
if (RESET = '1') then
224+
shreg_tvalid(i+1) <= '0';
225+
shreg_trim_en(i+1) <= '0';
226+
end if;
227+
end if;
228+
end process;
229+
230+
shreg_ready(i) <= shreg_ready(i+1) or not shreg_tvalid(i+1);
231+
end generate;
232+
233+
-- Backpressure: stall input when output not ready or insufficient data in pipeline
234+
shreg_ready(SHREG_STAGES) <= buf_tready and shreg_ok;
235+
shreg_tvalid_fix <= shreg_ok;
236+
237+
-- shreg_ok indicates valid data can be presented at output
238+
shreg_ok <= (shreg_tvalid(SHREG_STAGES) and shreg_tvalid(SHREG_STAGES-1)) or
239+
(shreg_tvalid(SHREG_STAGES) and shreg_tlast(SHREG_STAGES-1));
240+
241+
buf_tdata <= shreg_tdata(SHREG_STAGES);
242+
buf_tkeep <= shreg_tkeep(SHREG_STAGES);
243+
buf_tfirst <= shreg_tfirst(SHREG_STAGES);
244+
buf_tlast <= shreg_tlast(SHREG_STAGES);
245+
buf_tvalid <= shreg_tvalid_fix;
246+
buf_trim_len <= shreg_trim_len(SHREG_STAGES);
247+
buf_trim_en <= shreg_trim_en(SHREG_STAGES);
248+
buf_trim_active <= shreg_trim_active(SHREG_STAGES);
249+
buf_trim_last <= shreg_trim_last(SHREG_STAGES);
250+
buf_byte_cnt <= shreg_byte_cnt(SHREG_STAGES);
251+
buf_popcount <= shreg_popcount(SHREG_STAGES);
252+
253+
buf_tready <= TX_AXI_TREADY;
254+
255+
-- Calculate shift amount: how many bytes to shift for alignment after trim
256+
buf_trim_shift <= resize(shreg_trim_len(SHREG_STAGES) - shreg_byte_cnt(SHREG_STAGES), OFF_BYTES_W);
257+
258+
-- Calculate total valid bytes in 2-word window (current + previous)
259+
process (all)
260+
begin
261+
buf_2word_bytes <= resize(shreg_popcount(SHREG_STAGES), log2(2*WORD_BYTES+1)) + shreg_popcount(SHREG_STAGES-1);
262+
if (shreg_tlast(SHREG_STAGES) = '1') then
263+
buf_2word_bytes <= resize(shreg_popcount(SHREG_STAGES), log2(2*WORD_BYTES+1));
264+
end if;
265+
end process;
266+
267+
-- Remaining bytes after applying barrel shifter offset
268+
buf_next_bytes <= buf_2word_bytes - bs_shift;
269+
270+
-- Generate TKEEP for partial last word based on remaining byte count
271+
process (all)
272+
begin
273+
buf_new_keep <= (others => '0');
274+
for i in 0 to WORD_BYTES-1 loop
275+
if (buf_next_bytes > i) then
276+
buf_new_keep(i) <= '1';
277+
end if;
278+
end loop;
279+
end process;
280+
281+
process (CLK)
282+
begin
283+
if rising_edge(CLK) then
284+
if (buf_trim_last = '1') then
285+
buf_trim_shift_reg <= buf_trim_shift;
286+
end if;
287+
end if;
288+
end process;
289+
290+
-- Track post-trim state for multi-word packets
291+
process (CLK)
292+
begin
293+
if rising_edge(CLK) then
294+
if (buf_tvalid = '1' and buf_tready = '1') then
295+
if (buf_trim_last = '1') then
296+
buf_trim_post <= '1';
297+
end if;
298+
if (buf_tlast = '1') then
299+
buf_trim_post <= '0';
300+
end if;
301+
end if;
302+
if (RESET = '1') then
303+
buf_trim_post <= '0';
304+
end if;
305+
end if;
306+
end process;
307+
308+
-- Select barrel shifter offset: current trim boundary or registered value for post-trim alignment
309+
bs_shift <= buf_trim_shift when (buf_trim_last = '1') else
310+
buf_trim_shift_reg when (buf_trim_post = '1') else
311+
(others => '0');
312+
313+
bs_din(2*AXI_TDATA_WIDTH-1 downto AXI_TDATA_WIDTH) <= shreg_tdata(SHREG_STAGES-1);
314+
bs_din(AXI_TDATA_WIDTH-1 downto 0) <= shreg_tdata(SHREG_STAGES);
315+
316+
barrel_shifter_i : entity work.BARREL_SHIFTER_GEN
317+
generic map (
318+
BLOCKS => BS_BLOCKS,
319+
BLOCK_SIZE => 8,
320+
SHIFT_LEFT => false
321+
)
322+
port map (
323+
DATA_IN => bs_din,
324+
DATA_OUT => bs_dout,
325+
SEL => std_logic_vector(resize(bs_shift, log2(BS_BLOCKS)))
326+
);
327+
328+
tx_tdata_next <= bs_dout(AXI_TDATA_WIDTH-1 downto 0);
329+
330+
-- TKEEP: all ones for full word, otherwise generate partial keep pattern
331+
tx_tkeep_next <= (others => '1') when (buf_next_bytes >= WORD_BYTES) else buf_new_keep;
332+
333+
-- TLAST asserted when remaining bytes fit in current word
334+
tx_tlast_next <= '1' when (buf_next_bytes <= WORD_BYTES) else '0';
335+
336+
process (CLK)
337+
begin
338+
if rising_edge(CLK) then
339+
if (buf_tvalid = '1' and buf_tready = '1') then
340+
if (tx_tlast_next = '1') then
341+
tx_tlast_next_reg <= '1';
342+
end if;
343+
if (buf_tlast = '1') then
344+
tx_tlast_next_reg <= '0';
345+
end if;
346+
end if;
347+
if (RESET = '1') then
348+
tx_tlast_next_reg <= '0';
349+
end if;
350+
end if;
351+
end process;
352+
353+
-- Suppress TVALID during trim and for one cycle after TLAST to handle word skipping
354+
tx_tvalid_next <= buf_tvalid and not (buf_trim_active or tx_tlast_next_reg);
355+
356+
output_reg_p : process (CLK)
357+
begin
358+
if rising_edge(CLK) then
359+
if (TX_AXI_TREADY = '1') then
360+
tx_tdata_reg <= tx_tdata_next;
361+
tx_tkeep_reg <= tx_tkeep_next;
362+
tx_tlast_reg <= tx_tlast_next;
363+
tx_tvalid_reg <= tx_tvalid_next;
364+
end if;
365+
if (RESET = '1') then
366+
tx_tvalid_reg <= '0';
367+
end if;
368+
end if;
369+
end process;
370+
371+
TX_AXI_TDATA <= tx_tdata_reg;
372+
TX_AXI_TKEEP <= tx_tkeep_reg;
373+
TX_AXI_TLAST <= tx_tlast_reg;
374+
TX_AXI_TVALID <= tx_tvalid_reg;
375+
376+
end architecture;

0 commit comments

Comments
 (0)