-
Notifications
You must be signed in to change notification settings - Fork 21
Add IPv4 fragmentation #348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Some comments aren't visible on the classic Files Changed page.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,186 @@ | ||
| // SPDX-License-Identifier: BSD-3-Clause | ||
| // Copyright (c) 2025 Anthony Harivel | ||
|
|
||
| #include <gr_datapath.h> | ||
| #include <gr_graph.h> | ||
| #include <gr_iface.h> | ||
| #include <gr_ip4_datapath.h> | ||
| #include <gr_log.h> | ||
| #include <gr_mbuf.h> | ||
| #include <gr_trace.h> | ||
|
|
||
| #include <rte_byteorder.h> | ||
| #include <rte_ip.h> | ||
| #include <rte_mbuf.h> | ||
|
|
||
| #include <assert.h> | ||
| #include <stdint.h> | ||
| #include <stdio.h> | ||
|
|
||
aharivel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| struct ip_fragment_trace_data { | ||
| uint16_t packet_id; | ||
| uint16_t frag_num; | ||
| uint16_t offset; | ||
| uint8_t more_frags; | ||
| }; | ||
|
|
||
| enum { | ||
| IP_OUTPUT = 0, | ||
| NO_MBUF, | ||
| ALREADY_FRAGMENTED, | ||
| ERROR, | ||
| EDGE_COUNT, | ||
| }; | ||
|
|
||
| static uint16_t | ||
| ip_fragment_process(struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs) { | ||
| struct rte_mbuf *mbuf, *frag_mbuf; | ||
| struct rte_ipv4_hdr *ip, *frag_ip; | ||
| uint16_t frag_size, frag_data_len; | ||
| uint16_t data_len, offset; | ||
| const struct iface *iface; | ||
| uint16_t num_frags, i; | ||
| uint16_t ip_hdr_len; | ||
| uint16_t sent = 0; | ||
| rte_edge_t edge; | ||
| void *payload; | ||
|
|
||
| for (uint16_t j = 0; j < nb_objs; j++) { | ||
| mbuf = objs[j]; | ||
| ip = rte_pktmbuf_mtod(mbuf, struct rte_ipv4_hdr *); | ||
|
|
||
| // Check if packet is already a fragment - if so, just pass it through | ||
| if (ip->fragment_offset | ||
| & RTE_BE16(RTE_IPV4_HDR_MF_FLAG | RTE_IPV4_HDR_OFFSET_MASK)) { | ||
| // This is already a fragment, drop it | ||
| edge = ALREADY_FRAGMENTED; | ||
| goto drop; | ||
| } | ||
aharivel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| iface = mbuf_data(mbuf)->iface; | ||
| assert(iface != NULL); | ||
|
|
||
| ip_hdr_len = rte_ipv4_hdr_len(ip); | ||
| data_len = rte_be_to_cpu_16(ip->total_length) - ip_hdr_len; | ||
|
|
||
| // Calculate fragment payload size (multiple of 8, >= 8) | ||
| uint16_t max_payload = (uint16_t)(iface->mtu - ip_hdr_len); | ||
| frag_size = RTE_ALIGN_FLOOR(max_payload, 8); | ||
| if (unlikely(frag_size < 8)) { | ||
| edge = ERROR; | ||
| goto drop; | ||
| } | ||
|
|
||
| num_frags = (data_len + frag_size - 1) / frag_size; | ||
aharivel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| assert(num_frags > 1); | ||
|
|
||
| // Prepare and enqueue first fragment (using original mbuf) | ||
| ip->total_length = rte_cpu_to_be_16(ip_hdr_len + frag_size); | ||
| ip->fragment_offset = RTE_BE16(RTE_IPV4_HDR_MF_FLAG); | ||
| ip->hdr_checksum = 0; | ||
| ip->hdr_checksum = rte_ipv4_cksum(ip); | ||
|
|
||
| if (gr_mbuf_is_traced(mbuf)) { | ||
| struct ip_fragment_trace_data *t; | ||
| t = gr_mbuf_trace_add(mbuf, node, sizeof(*t)); | ||
| t->packet_id = rte_be_to_cpu_16(ip->packet_id); | ||
| t->frag_num = 0; | ||
| t->offset = 0; | ||
| t->more_frags = 1; | ||
| } | ||
|
|
||
| // Enqueue first fragment | ||
| rte_node_enqueue_x1(graph, node, IP_OUTPUT, mbuf); | ||
| sent++; | ||
|
|
||
| // Create and enqueue remaining fragments | ||
| for (i = 1; i < num_frags; i++) { | ||
| // Create new fragment, copying the original IPv4 header. | ||
| frag_mbuf = rte_pktmbuf_copy(mbuf, mbuf->pool, 0, ip_hdr_len); | ||
| if (unlikely(frag_mbuf == NULL)) { | ||
| break; | ||
| } | ||
|
|
||
| frag_ip = rte_pktmbuf_mtod(frag_mbuf, struct rte_ipv4_hdr *); | ||
| offset = i * frag_size; | ||
| frag_data_len = RTE_MIN(frag_size, data_len - offset); | ||
|
|
||
| payload = rte_pktmbuf_append(frag_mbuf, frag_data_len); | ||
| if (unlikely(payload == NULL)) { | ||
| rte_pktmbuf_free(frag_mbuf); | ||
| break; | ||
| } | ||
|
|
||
| memcpy(payload, | ||
| rte_pktmbuf_mtod_offset(mbuf, const void *, ip_hdr_len + offset), | ||
| frag_data_len); | ||
|
|
||
| frag_ip->total_length = rte_cpu_to_be_16(ip_hdr_len + frag_data_len); | ||
| frag_ip->fragment_offset = rte_cpu_to_be_16( | ||
| (offset / 8) | ((i < num_frags - 1) ? RTE_IPV4_HDR_MF_FLAG : 0) | ||
| ); | ||
| frag_ip->hdr_checksum = 0; | ||
| frag_ip->hdr_checksum = rte_ipv4_cksum(frag_ip); | ||
|
|
||
| *ip_output_mbuf_data(frag_mbuf) = *ip_output_mbuf_data(mbuf); | ||
| frag_mbuf->packet_type = mbuf->packet_type; | ||
aharivel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (gr_mbuf_is_traced(mbuf)) { | ||
| struct ip_fragment_trace_data *t; | ||
| t = gr_mbuf_trace_add(frag_mbuf, node, sizeof(*t)); | ||
| t->packet_id = rte_be_to_cpu_16(frag_ip->packet_id); | ||
| t->frag_num = i; | ||
| t->offset = offset; | ||
| t->more_frags = (i < num_frags - 1) ? 1 : 0; | ||
| } | ||
|
|
||
| rte_node_enqueue_x1(graph, node, IP_OUTPUT, frag_mbuf); | ||
| sent++; | ||
| } | ||
|
|
||
| // Trim first fragment to the right size | ||
| rte_pktmbuf_trim(mbuf, data_len - frag_size); | ||
|
|
||
| continue; | ||
|
|
||
| drop: | ||
| rte_node_enqueue_x1(graph, node, edge, mbuf); | ||
| sent++; | ||
| } | ||
|
|
||
| return sent; | ||
| } | ||
|
|
||
| static int ip_fragment_trace_format(char *buf, size_t len, const void *data, size_t /*data_len*/) { | ||
| const struct ip_fragment_trace_data *t = data; | ||
| return snprintf( | ||
| buf, | ||
| len, | ||
| "id=%u frag=%u offset=%u%s", | ||
| t->packet_id, | ||
| t->frag_num, | ||
| t->offset, | ||
| t->more_frags ? " MF" : "" | ||
| ); | ||
| } | ||
|
|
||
| static struct rte_node_register fragment_node = { | ||
| .name = "ip_fragment", | ||
| .process = ip_fragment_process, | ||
| .nb_edges = EDGE_COUNT, | ||
| .next_nodes = { | ||
| [IP_OUTPUT] = "ip_output", | ||
| [NO_MBUF] = "error_no_headroom", | ||
| [ALREADY_FRAGMENTED] = "ip_fragment_already_fragmented", | ||
| [ERROR] = "ip_fragment_error" | ||
| }, | ||
| }; | ||
|
|
||
| static struct gr_node_info info = { | ||
| .node = &fragment_node, | ||
| .trace_format = ip_fragment_trace_format, | ||
| }; | ||
|
|
||
| GR_NODE_REGISTER(info); | ||
|
|
||
| GR_DROP_REGISTER(ip_fragment_error); | ||
| GR_DROP_REGISTER(ip_fragment_already_fragmented); | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| #!/bin/bash | ||
| # SPDX-License-Identifier: BSD-3-Clause | ||
| # Copyright (c) 2025 Anthony Harivel | ||
| # Test IPv4 fragmentation | ||
|
|
||
| . $(dirname $0)/_init.sh | ||
|
|
||
| p0=${run_id}0 | ||
| p1=${run_id}1 | ||
|
|
||
| grcli interface add port $p0 devargs net_tap0,iface=$p0 mac f0:0d:ac:dc:00:00 | ||
| # Set smaller MTU on p1 (egress) to force fragmentation | ||
| grcli interface add port $p1 devargs net_tap1,iface=$p1 mac f0:0d:ac:dc:00:01 mtu 1280 | ||
| grcli address add 172.16.0.1/24 iface $p0 | ||
| grcli address add 172.16.1.1/24 iface $p1 | ||
|
|
||
| for n in 0 1; do | ||
| p=$run_id$n | ||
| netns_add $p | ||
| ip link set $p mtu 1500 | ||
| ip link set $p netns $p | ||
| ip -n $p link set $p address ba:d0:ca:ca:00:0$n | ||
| ip -n $p link set $p up | ||
| ip -n $p link set lo up | ||
| ip -n $p addr add 172.16.$n.2/24 dev $p | ||
| ip -n $p route add default via 172.16.$n.1 | ||
| # Clear PMTU cache to ensure kernel uses interface MTU | ||
| ip -n $p route flush cache | ||
| done | ||
|
|
||
| # Test 1: Ping with default packet size (should work without fragmentation) | ||
| ip netns exec $p0 ping -i0.01 -c3 -n 172.16.1.2 | ||
|
|
||
| # Test 2: Large packet with DF flag set (should get ICMP fragmentation needed error) | ||
| # Send 1260-byte packet with DF=1 (Don't Fragment) | ||
| # Packet size: 1260 + 8 (ICMP) + 20 (IP) = 1288 bytes | ||
| # Fits in p0 MTU (1500) but exceeds p1 MTU (1280) | ||
| # Expected: ICMP Type 3 Code 4 (Fragmentation Needed and DF Set) | ||
| ip netns exec $p0 ping -i0.01 -c3 -s 1260 -M do -n 172.16.1.2 && fail "ping with DF flag should have failed" | ||
|
|
||
| # Test 3: Large packet without DF flag (should fragment and succeed) | ||
| # Send 1260-byte packet with DF=0 (fragmentation allowed) | ||
| # Packet size: 1260 + 8 (ICMP) + 20 (IP) = 1288 bytes | ||
| # Fits in p0 MTU (1500) but needs fragmentation for p1 MTU (1280) | ||
| # Expected: Packet is fragmented into 2 fragments (1276 + 32 bytes) and ping succeeds | ||
| ip netns exec $p0 ip route flush cache | ||
| ip netns exec $p0 ping -i0.01 -c3 -s 1260 -M dont -n 172.16.1.2 |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't it weird to have the node
ip_error_frag_neededwithout any link toicmp_output?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No idea.. I guess it's like "ip_error_ttl_exceeded" that doesn't have any link with "icmp_output" ?? Maybe a bug with the graph generation ?