From 69db25913303bfac8f5fdccd2d7b4eead31507c8 Mon Sep 17 00:00:00 2001 From: Jalal Mostafa Date: Sun, 23 Mar 2025 18:14:37 +0100 Subject: [PATCH 1/6] headers/linux: Update bpf.h to kernel version 6.3 Update the bpf.h UAPI header from the Linux to that from kernel version 6.3. We need the definition of BPF_F_XDP_DEV_BOUND_ONLY to support it in libxdp. Signed-off-by: Jalal Mostafa --- headers/linux/bpf.h | 735 +++++++++++++++++++++++++++++--------------- 1 file changed, 485 insertions(+), 250 deletions(-) diff --git a/headers/linux/bpf.h b/headers/linux/bpf.h index 59a217ca..62ce1f5d 100644 --- a/headers/linux/bpf.h +++ b/headers/linux/bpf.h @@ -87,10 +87,35 @@ struct bpf_cgroup_storage_key { __u32 attach_type; /* program attach type (enum bpf_attach_type) */ }; +enum bpf_cgroup_iter_order { + BPF_CGROUP_ITER_ORDER_UNSPEC = 0, + BPF_CGROUP_ITER_SELF_ONLY, /* process only a single object. */ + BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ + BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ + BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ +}; + union bpf_iter_link_info { struct { __u32 map_fd; } map; + struct { + enum bpf_cgroup_iter_order order; + + /* At most one of cgroup_fd and cgroup_id can be non-zero. If + * both are zero, the walk starts from the default cgroup v2 + * root. For walking v1 hierarchy, one should always explicitly + * specify cgroup_fd. + */ + __u32 cgroup_fd; + __u64 cgroup_id; + } cgroup; + /* Parameters of task iterators. */ + struct { + __u32 tid; + __u32 pid; + __u32 pid_fd; + } task; }; /* BPF syscall commands, see bpf(2) man-page for more details. */ @@ -897,7 +922,14 @@ enum bpf_map_type { BPF_MAP_TYPE_CPUMAP, BPF_MAP_TYPE_XSKMAP, BPF_MAP_TYPE_SOCKHASH, - BPF_MAP_TYPE_CGROUP_STORAGE, + BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, + /* BPF_MAP_TYPE_CGROUP_STORAGE is available to bpf programs attaching + * to a cgroup. The newer BPF_MAP_TYPE_CGRP_STORAGE is available to + * both cgroup-attached and other progs and supports all functionality + * provided by BPF_MAP_TYPE_CGROUP_STORAGE. So mark + * BPF_MAP_TYPE_CGROUP_STORAGE deprecated. + */ + BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, BPF_MAP_TYPE_QUEUE, @@ -909,6 +941,8 @@ enum bpf_map_type { BPF_MAP_TYPE_INODE_STORAGE, BPF_MAP_TYPE_TASK_STORAGE, BPF_MAP_TYPE_BLOOM_FILTER, + BPF_MAP_TYPE_USER_RINGBUF, + BPF_MAP_TYPE_CGRP_STORAGE, }; /* Note that tracing related programs such as @@ -1122,6 +1156,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded + * program becomes device-bound but can access XDP metadata. + */ +#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) + /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ @@ -1233,7 +1272,7 @@ enum { /* Query effective (directly attached + inherited from ancestor cgroups) * programs that will be executed for events within a cgroup. - * attach_flags with this flag are returned only for directly attached programs. + * attach_flags with this flag are always returned 0. */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) @@ -1432,7 +1471,10 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; - __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ + /* output: per-program attach_flags. + * not allowed to be set during effective query. + */ + __aligned_u64 prog_attach_flags; } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ @@ -1964,6 +2006,9 @@ union bpf_attr { * sending the packet. This flag was added for GRE * encapsulation, but might be used with other protocols * as well in the future. + * **BPF_F_NO_TUNNEL_KEY** + * Add a flag to tunnel metadata indicating that no tunnel + * key should be set in the resulting tunnel header. * * Here is a typical usage on the transmit path: * @@ -2547,14 +2592,19 @@ union bpf_attr { * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, - * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**, **SO_REUSEADDR**, + * **SO_REUSEPORT**, **SO_BINDTOIFINDEX**, **SO_TXREHASH**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, - * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**. + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, + * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, + * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, + * **TCP_BPF_RTO_MIN**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * * **IPPROTO_IPV6**, which supports the following *optname*\ s: + * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. * Return * 0 on success, or a negative error in case of failure. * @@ -2573,10 +2623,12 @@ union bpf_attr { * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). + * (room space is added or removed between the layer 2 and + * layer 3 headers). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). + * (room space is added or removed between the layer 3 and + * layer 4 headers). * * The following flags are supported at this time: * @@ -2600,6 +2652,11 @@ union bpf_attr { * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the * L2 type as Ethernet. * + * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**, + * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**: + * Indicate the new IP header version after decapsulating the outer + * IP header. Used when the inner and outer IP versions are different. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -2608,7 +2665,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u64 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -2744,7 +2801,7 @@ union bpf_attr { * * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description - * For en eBPF program attached to a perf event, retrieve the + * For an eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in * the structure pointed by *buf* and of size *buf_size*. Enabled * and running times are also stored in the structure (see @@ -2769,12 +2826,10 @@ union bpf_attr { * and **BPF_CGROUP_INET6_CONNECT**. * * This helper actually implements a subset of **getsockopt()**. - * It supports the following *level*\ s: - * - * * **IPPROTO_TCP**, which supports *optname* - * **TCP_CONGESTION**. - * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. - * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. + * It supports the same set of *optname*\ s that is supported by + * the **bpf_setsockopt**\ () helper. The exceptions are + * **TCP_BPF_*** is **bpf_setsockopt**\ () only and + * **TCP_SAVED_SYN** is **bpf_getsockopt**\ () only. * Return * 0 on success, or a negative error in case of failure. * @@ -3008,8 +3063,18 @@ union bpf_attr { * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. + * Collect (build_id, file_offset) instead of ips for user + * stack, only valid if **BPF_F_USER_STACK** is also + * specified. + * + * *file_offset* is an offset relative to the beginning + * of the executable or shared object file backing the vma + * which the *ip* falls in. It is *not* an offset relative + * to that object's base address. Accordingly, it must be + * adjusted by adding (sh_addr - sh_offset), where + * sh_{addr,offset} correspond to the executable section + * containing *file_offset* in the object, for comparisons + * to symbols' st_value to be valid. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject @@ -3069,6 +3134,11 @@ union bpf_attr { * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). + * **BPF_FIB_LOOKUP_SKIP_NEIGH** + * Skip the neighbour table lookup. *params*->dmac + * and *params*->smac will not be set as output. A common + * use case is to call **bpf_redirect_neigh**\ () after + * doing **bpf_fib_lookup**\ (). * * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. @@ -4425,7 +4495,7 @@ union bpf_attr { * * **-EEXIST** if the option already exists. * - * **-EFAULT** on failrue to parse the existing header options. + * **-EFAULT** on failure to parse the existing header options. * * **-EPERM** if the helper cannot be used under the current * *skops*\ **->op**. @@ -4634,7 +4704,7 @@ union bpf_attr { * a *map* with *task* as the **key**. From this * perspective, the usage is not much different from * **bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this - * helper enforces the key must be an task_struct and the map must also + * helper enforces the key must be a task_struct and the map must also * be a **BPF_MAP_TYPE_TASK_STORAGE**. * * Underneath, the value is stored locally at *task* instead of @@ -4692,7 +4762,7 @@ union bpf_attr { * * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size) * Description - * Returns the stored IMA hash of the *inode* (if it's avaialable). + * Returns the stored IMA hash of the *inode* (if it's available). * If the hash is larger than *size*, then only *size* * bytes will be copied to *dst* * Return @@ -4716,12 +4786,12 @@ union bpf_attr { * * The argument *len_diff* can be used for querying with a planned * size change. This allows to check MTU prior to changing packet - * ctx. Providing an *len_diff* adjustment that is larger than the + * ctx. Providing a *len_diff* adjustment that is larger than the * actual packet size (resulting in negative packet size) will in - * principle not exceed the MTU, why it is not considered a - * failure. Other BPF-helpers are needed for performing the - * planned size change, why the responsability for catch a negative - * packet size belong in those helpers. + * principle not exceed the MTU, which is why it is not considered + * a failure. Other BPF helpers are needed for performing the + * planned size change; therefore the responsibility for catching + * a negative packet size belongs in those helpers. * * Specifying *ifindex* zero means the MTU check is performed * against the current net device. This is practical if this isn't @@ -4919,6 +4989,7 @@ union bpf_attr { * Get address of the traced function (for tracing and kprobe programs). * Return * Address of the traced function. + * 0 for kprobes placed within the function (not at the entry). * * u64 bpf_get_attach_cookie(void *ctx) * Description @@ -5048,12 +5119,12 @@ union bpf_attr { * * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) * Description - * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * Get **n**-th argument register (zero based) of the traced function (for tracing programs) * returned in **value**. * * Return * 0 on success. - * **-EINVAL** if n >= arguments count of traced function. + * **-EINVAL** if n >= argument register count of traced function. * * long bpf_get_func_ret(void *ctx, u64 *value) * Description @@ -5066,24 +5137,37 @@ union bpf_attr { * * long bpf_get_func_arg_cnt(void *ctx) * Description - * Get number of arguments of the traced function (for tracing programs). + * Get number of registers of the traced function (for tracing programs) where + * function arguments are stored in these registers. * * Return - * The number of arguments of the traced function. + * The number of argument registers of the traced function. * * int bpf_get_retval(void) * Description - * Get the syscall's return value that will be returned to userspace. + * Get the BPF program's return value that will be returned to the upper layers. * - * This helper is currently supported by cgroup programs only. + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. * Return - * The syscall's return value. + * The BPF program's return value. * * int bpf_set_retval(int retval) * Description - * Set the syscall's return value that will be returned to userspace. + * Set the BPF program's return value that will be returned to the upper layers. + * + * This helper is currently supported by cgroup programs and only by the hooks + * where BPF program's return value is returned to the userspace via errno. + * + * Note that there is the following corner case where the program exports an error + * via bpf_set_retval but signals success via 'return 1': + * + * bpf_set_retval(-EPERM); + * return 1; + * + * In this case, the BPF program's return value will use helper's -EPERM. This + * still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case. * - * This helper is currently supported by cgroup programs only. * Return * 0 on success, or a negative error in case of failure. * @@ -5227,7 +5311,7 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags) + * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. @@ -5237,7 +5321,7 @@ union bpf_attr { * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if * *flags* is not 0. * - * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. @@ -5247,7 +5331,7 @@ union bpf_attr { * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* * is a read-only dynptr or if *flags* is not 0. * - * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) * Description * Get a pointer to the underlying dynptr data. * @@ -5331,224 +5415,321 @@ union bpf_attr { * **-EACCES** if the SYN cookie is not valid. * * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * u64 bpf_ktime_get_tai_ns(void) + * Description + * A nonsettable system-wide clock derived from wall-clock time but + * ignoring leap seconds. This clock does not experience + * discontinuities and backwards jumps caused by NTP inserting leap + * seconds as CLOCK_REALTIME does. + * + * See: **clock_gettime**\ (**CLOCK_TAI**) + * Return + * Current *ktime*. + * + * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags) + * Description + * Drain samples from the specified user ring buffer, and invoke + * the provided callback for each such sample: + * + * long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx); + * + * If **callback_fn** returns 0, the helper will continue to try + * and drain the next sample, up to a maximum of + * BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1, + * the helper will skip the rest of the samples and return. Other + * return values are not used now, and will be rejected by the + * verifier. + * Return + * The number of drained samples if no error was encountered while + * draining samples, or 0 if no samples were present in the ring + * buffer. If a user-space producer was epoll-waiting on this map, + * and at least one sample was drained, they will receive an event + * notification notifying them of available space in the ring + * buffer. If the BPF_RB_NO_WAKEUP flag is passed to this + * function, no wakeup notification will be sent. If the + * BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will + * be sent even if no sample was drained. + * + * On failure, the returned value is one of the following: + * + * **-EBUSY** if the ring buffer is contended, and another calling + * context was concurrently draining the ring buffer. + * + * **-EINVAL** if user-space is not properly tracking the ring + * buffer due to the producer position not being aligned to 8 + * bytes, a sample not being aligned to 8 bytes, or the producer + * position not matching the advertised length of a sample. + * + * **-E2BIG** if user-space has tried to publish a sample which is + * larger than the size of the ring buffer, or which cannot fit + * within a struct bpf_dynptr. + * + * void *bpf_cgrp_storage_get(struct bpf_map *map, struct cgroup *cgroup, void *value, u64 flags) + * Description + * Get a bpf_local_storage from the *cgroup*. + * + * Logically, it could be thought of as getting the value from + * a *map* with *cgroup* as the **key**. From this + * perspective, the usage is not much different from + * **bpf_map_lookup_elem**\ (*map*, **&**\ *cgroup*) except this + * helper enforces the key must be a cgroup struct and the map must also + * be a **BPF_MAP_TYPE_CGRP_STORAGE**. + * + * In reality, the local-storage value is embedded directly inside of the + * *cgroup* object itself, rather than being located in the + * **BPF_MAP_TYPE_CGRP_STORAGE** map. When the local-storage value is + * queried for some *map* on a *cgroup* object, the kernel will perform an + * O(n) iteration over all of the live local-storage values for that + * *cgroup* object until the local-storage value for the *map* is found. + * + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be + * used such that a new bpf_local_storage will be + * created if one does not exist. *value* can be used + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify + * the initial value of a bpf_local_storage. If *value* is + * **NULL**, the new bpf_local_storage will be zero initialized. + * Return + * A bpf_local_storage pointer is returned on success. + * + * **NULL** if not found or there was an error in adding + * a new bpf_local_storage. + * + * long bpf_cgrp_storage_delete(struct bpf_map *map, struct cgroup *cgroup) + * Description + * Delete a bpf_local_storage from a *cgroup*. + * Return + * 0 on success. + * + * **-ENOENT** if the bpf_local_storage cannot be found. */ -#define __BPF_FUNC_MAPPER(FN) \ - FN(unspec), \ - FN(map_lookup_elem), \ - FN(map_update_elem), \ - FN(map_delete_elem), \ - FN(probe_read), \ - FN(ktime_get_ns), \ - FN(trace_printk), \ - FN(get_prandom_u32), \ - FN(get_smp_processor_id), \ - FN(skb_store_bytes), \ - FN(l3_csum_replace), \ - FN(l4_csum_replace), \ - FN(tail_call), \ - FN(clone_redirect), \ - FN(get_current_pid_tgid), \ - FN(get_current_uid_gid), \ - FN(get_current_comm), \ - FN(get_cgroup_classid), \ - FN(skb_vlan_push), \ - FN(skb_vlan_pop), \ - FN(skb_get_tunnel_key), \ - FN(skb_set_tunnel_key), \ - FN(perf_event_read), \ - FN(redirect), \ - FN(get_route_realm), \ - FN(perf_event_output), \ - FN(skb_load_bytes), \ - FN(get_stackid), \ - FN(csum_diff), \ - FN(skb_get_tunnel_opt), \ - FN(skb_set_tunnel_opt), \ - FN(skb_change_proto), \ - FN(skb_change_type), \ - FN(skb_under_cgroup), \ - FN(get_hash_recalc), \ - FN(get_current_task), \ - FN(probe_write_user), \ - FN(current_task_under_cgroup), \ - FN(skb_change_tail), \ - FN(skb_pull_data), \ - FN(csum_update), \ - FN(set_hash_invalid), \ - FN(get_numa_node_id), \ - FN(skb_change_head), \ - FN(xdp_adjust_head), \ - FN(probe_read_str), \ - FN(get_socket_cookie), \ - FN(get_socket_uid), \ - FN(set_hash), \ - FN(setsockopt), \ - FN(skb_adjust_room), \ - FN(redirect_map), \ - FN(sk_redirect_map), \ - FN(sock_map_update), \ - FN(xdp_adjust_meta), \ - FN(perf_event_read_value), \ - FN(perf_prog_read_value), \ - FN(getsockopt), \ - FN(override_return), \ - FN(sock_ops_cb_flags_set), \ - FN(msg_redirect_map), \ - FN(msg_apply_bytes), \ - FN(msg_cork_bytes), \ - FN(msg_pull_data), \ - FN(bind), \ - FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), \ - FN(get_stack), \ - FN(skb_load_bytes_relative), \ - FN(fib_lookup), \ - FN(sock_hash_update), \ - FN(msg_redirect_hash), \ - FN(sk_redirect_hash), \ - FN(lwt_push_encap), \ - FN(lwt_seg6_store_bytes), \ - FN(lwt_seg6_adjust_srh), \ - FN(lwt_seg6_action), \ - FN(rc_repeat), \ - FN(rc_keydown), \ - FN(skb_cgroup_id), \ - FN(get_current_cgroup_id), \ - FN(get_local_storage), \ - FN(sk_select_reuseport), \ - FN(skb_ancestor_cgroup_id), \ - FN(sk_lookup_tcp), \ - FN(sk_lookup_udp), \ - FN(sk_release), \ - FN(map_push_elem), \ - FN(map_pop_elem), \ - FN(map_peek_elem), \ - FN(msg_push_data), \ - FN(msg_pop_data), \ - FN(rc_pointer_rel), \ - FN(spin_lock), \ - FN(spin_unlock), \ - FN(sk_fullsock), \ - FN(tcp_sock), \ - FN(skb_ecn_set_ce), \ - FN(get_listener_sock), \ - FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), \ - FN(sysctl_get_name), \ - FN(sysctl_get_current_value), \ - FN(sysctl_get_new_value), \ - FN(sysctl_set_new_value), \ - FN(strtol), \ - FN(strtoul), \ - FN(sk_storage_get), \ - FN(sk_storage_delete), \ - FN(send_signal), \ - FN(tcp_gen_syncookie), \ - FN(skb_output), \ - FN(probe_read_user), \ - FN(probe_read_kernel), \ - FN(probe_read_user_str), \ - FN(probe_read_kernel_str), \ - FN(tcp_send_ack), \ - FN(send_signal_thread), \ - FN(jiffies64), \ - FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), \ - FN(xdp_output), \ - FN(get_netns_cookie), \ - FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), \ - FN(ktime_get_boot_ns), \ - FN(seq_printf), \ - FN(seq_write), \ - FN(sk_cgroup_id), \ - FN(sk_ancestor_cgroup_id), \ - FN(ringbuf_output), \ - FN(ringbuf_reserve), \ - FN(ringbuf_submit), \ - FN(ringbuf_discard), \ - FN(ringbuf_query), \ - FN(csum_level), \ - FN(skc_to_tcp6_sock), \ - FN(skc_to_tcp_sock), \ - FN(skc_to_tcp_timewait_sock), \ - FN(skc_to_tcp_request_sock), \ - FN(skc_to_udp6_sock), \ - FN(get_task_stack), \ - FN(load_hdr_opt), \ - FN(store_hdr_opt), \ - FN(reserve_hdr_opt), \ - FN(inode_storage_get), \ - FN(inode_storage_delete), \ - FN(d_path), \ - FN(copy_from_user), \ - FN(snprintf_btf), \ - FN(seq_printf_btf), \ - FN(skb_cgroup_classid), \ - FN(redirect_neigh), \ - FN(per_cpu_ptr), \ - FN(this_cpu_ptr), \ - FN(redirect_peer), \ - FN(task_storage_get), \ - FN(task_storage_delete), \ - FN(get_current_task_btf), \ - FN(bprm_opts_set), \ - FN(ktime_get_coarse_ns), \ - FN(ima_inode_hash), \ - FN(sock_from_file), \ - FN(check_mtu), \ - FN(for_each_map_elem), \ - FN(snprintf), \ - FN(sys_bpf), \ - FN(btf_find_by_name_kind), \ - FN(sys_close), \ - FN(timer_init), \ - FN(timer_set_callback), \ - FN(timer_start), \ - FN(timer_cancel), \ - FN(get_func_ip), \ - FN(get_attach_cookie), \ - FN(task_pt_regs), \ - FN(get_branch_snapshot), \ - FN(trace_vprintk), \ - FN(skc_to_unix_sock), \ - FN(kallsyms_lookup_name), \ - FN(find_vma), \ - FN(loop), \ - FN(strncmp), \ - FN(get_func_arg), \ - FN(get_func_ret), \ - FN(get_func_arg_cnt), \ - FN(get_retval), \ - FN(set_retval), \ - FN(xdp_get_buff_len), \ - FN(xdp_load_bytes), \ - FN(xdp_store_bytes), \ - FN(copy_from_user_task), \ - FN(skb_set_tstamp), \ - FN(ima_file_hash), \ - FN(kptr_xchg), \ - FN(map_lookup_percpu_elem), \ - FN(skc_to_mptcp_sock), \ - FN(dynptr_from_mem), \ - FN(ringbuf_reserve_dynptr), \ - FN(ringbuf_submit_dynptr), \ - FN(ringbuf_discard_dynptr), \ - FN(dynptr_read), \ - FN(dynptr_write), \ - FN(dynptr_data), \ - FN(tcp_raw_gen_syncookie_ipv4), \ - FN(tcp_raw_gen_syncookie_ipv6), \ - FN(tcp_raw_check_syncookie_ipv4), \ - FN(tcp_raw_check_syncookie_ipv6), \ +#define ___BPF_FUNC_MAPPER(FN, ctx...) \ + FN(unspec, 0, ##ctx) \ + FN(map_lookup_elem, 1, ##ctx) \ + FN(map_update_elem, 2, ##ctx) \ + FN(map_delete_elem, 3, ##ctx) \ + FN(probe_read, 4, ##ctx) \ + FN(ktime_get_ns, 5, ##ctx) \ + FN(trace_printk, 6, ##ctx) \ + FN(get_prandom_u32, 7, ##ctx) \ + FN(get_smp_processor_id, 8, ##ctx) \ + FN(skb_store_bytes, 9, ##ctx) \ + FN(l3_csum_replace, 10, ##ctx) \ + FN(l4_csum_replace, 11, ##ctx) \ + FN(tail_call, 12, ##ctx) \ + FN(clone_redirect, 13, ##ctx) \ + FN(get_current_pid_tgid, 14, ##ctx) \ + FN(get_current_uid_gid, 15, ##ctx) \ + FN(get_current_comm, 16, ##ctx) \ + FN(get_cgroup_classid, 17, ##ctx) \ + FN(skb_vlan_push, 18, ##ctx) \ + FN(skb_vlan_pop, 19, ##ctx) \ + FN(skb_get_tunnel_key, 20, ##ctx) \ + FN(skb_set_tunnel_key, 21, ##ctx) \ + FN(perf_event_read, 22, ##ctx) \ + FN(redirect, 23, ##ctx) \ + FN(get_route_realm, 24, ##ctx) \ + FN(perf_event_output, 25, ##ctx) \ + FN(skb_load_bytes, 26, ##ctx) \ + FN(get_stackid, 27, ##ctx) \ + FN(csum_diff, 28, ##ctx) \ + FN(skb_get_tunnel_opt, 29, ##ctx) \ + FN(skb_set_tunnel_opt, 30, ##ctx) \ + FN(skb_change_proto, 31, ##ctx) \ + FN(skb_change_type, 32, ##ctx) \ + FN(skb_under_cgroup, 33, ##ctx) \ + FN(get_hash_recalc, 34, ##ctx) \ + FN(get_current_task, 35, ##ctx) \ + FN(probe_write_user, 36, ##ctx) \ + FN(current_task_under_cgroup, 37, ##ctx) \ + FN(skb_change_tail, 38, ##ctx) \ + FN(skb_pull_data, 39, ##ctx) \ + FN(csum_update, 40, ##ctx) \ + FN(set_hash_invalid, 41, ##ctx) \ + FN(get_numa_node_id, 42, ##ctx) \ + FN(skb_change_head, 43, ##ctx) \ + FN(xdp_adjust_head, 44, ##ctx) \ + FN(probe_read_str, 45, ##ctx) \ + FN(get_socket_cookie, 46, ##ctx) \ + FN(get_socket_uid, 47, ##ctx) \ + FN(set_hash, 48, ##ctx) \ + FN(setsockopt, 49, ##ctx) \ + FN(skb_adjust_room, 50, ##ctx) \ + FN(redirect_map, 51, ##ctx) \ + FN(sk_redirect_map, 52, ##ctx) \ + FN(sock_map_update, 53, ##ctx) \ + FN(xdp_adjust_meta, 54, ##ctx) \ + FN(perf_event_read_value, 55, ##ctx) \ + FN(perf_prog_read_value, 56, ##ctx) \ + FN(getsockopt, 57, ##ctx) \ + FN(override_return, 58, ##ctx) \ + FN(sock_ops_cb_flags_set, 59, ##ctx) \ + FN(msg_redirect_map, 60, ##ctx) \ + FN(msg_apply_bytes, 61, ##ctx) \ + FN(msg_cork_bytes, 62, ##ctx) \ + FN(msg_pull_data, 63, ##ctx) \ + FN(bind, 64, ##ctx) \ + FN(xdp_adjust_tail, 65, ##ctx) \ + FN(skb_get_xfrm_state, 66, ##ctx) \ + FN(get_stack, 67, ##ctx) \ + FN(skb_load_bytes_relative, 68, ##ctx) \ + FN(fib_lookup, 69, ##ctx) \ + FN(sock_hash_update, 70, ##ctx) \ + FN(msg_redirect_hash, 71, ##ctx) \ + FN(sk_redirect_hash, 72, ##ctx) \ + FN(lwt_push_encap, 73, ##ctx) \ + FN(lwt_seg6_store_bytes, 74, ##ctx) \ + FN(lwt_seg6_adjust_srh, 75, ##ctx) \ + FN(lwt_seg6_action, 76, ##ctx) \ + FN(rc_repeat, 77, ##ctx) \ + FN(rc_keydown, 78, ##ctx) \ + FN(skb_cgroup_id, 79, ##ctx) \ + FN(get_current_cgroup_id, 80, ##ctx) \ + FN(get_local_storage, 81, ##ctx) \ + FN(sk_select_reuseport, 82, ##ctx) \ + FN(skb_ancestor_cgroup_id, 83, ##ctx) \ + FN(sk_lookup_tcp, 84, ##ctx) \ + FN(sk_lookup_udp, 85, ##ctx) \ + FN(sk_release, 86, ##ctx) \ + FN(map_push_elem, 87, ##ctx) \ + FN(map_pop_elem, 88, ##ctx) \ + FN(map_peek_elem, 89, ##ctx) \ + FN(msg_push_data, 90, ##ctx) \ + FN(msg_pop_data, 91, ##ctx) \ + FN(rc_pointer_rel, 92, ##ctx) \ + FN(spin_lock, 93, ##ctx) \ + FN(spin_unlock, 94, ##ctx) \ + FN(sk_fullsock, 95, ##ctx) \ + FN(tcp_sock, 96, ##ctx) \ + FN(skb_ecn_set_ce, 97, ##ctx) \ + FN(get_listener_sock, 98, ##ctx) \ + FN(skc_lookup_tcp, 99, ##ctx) \ + FN(tcp_check_syncookie, 100, ##ctx) \ + FN(sysctl_get_name, 101, ##ctx) \ + FN(sysctl_get_current_value, 102, ##ctx) \ + FN(sysctl_get_new_value, 103, ##ctx) \ + FN(sysctl_set_new_value, 104, ##ctx) \ + FN(strtol, 105, ##ctx) \ + FN(strtoul, 106, ##ctx) \ + FN(sk_storage_get, 107, ##ctx) \ + FN(sk_storage_delete, 108, ##ctx) \ + FN(send_signal, 109, ##ctx) \ + FN(tcp_gen_syncookie, 110, ##ctx) \ + FN(skb_output, 111, ##ctx) \ + FN(probe_read_user, 112, ##ctx) \ + FN(probe_read_kernel, 113, ##ctx) \ + FN(probe_read_user_str, 114, ##ctx) \ + FN(probe_read_kernel_str, 115, ##ctx) \ + FN(tcp_send_ack, 116, ##ctx) \ + FN(send_signal_thread, 117, ##ctx) \ + FN(jiffies64, 118, ##ctx) \ + FN(read_branch_records, 119, ##ctx) \ + FN(get_ns_current_pid_tgid, 120, ##ctx) \ + FN(xdp_output, 121, ##ctx) \ + FN(get_netns_cookie, 122, ##ctx) \ + FN(get_current_ancestor_cgroup_id, 123, ##ctx) \ + FN(sk_assign, 124, ##ctx) \ + FN(ktime_get_boot_ns, 125, ##ctx) \ + FN(seq_printf, 126, ##ctx) \ + FN(seq_write, 127, ##ctx) \ + FN(sk_cgroup_id, 128, ##ctx) \ + FN(sk_ancestor_cgroup_id, 129, ##ctx) \ + FN(ringbuf_output, 130, ##ctx) \ + FN(ringbuf_reserve, 131, ##ctx) \ + FN(ringbuf_submit, 132, ##ctx) \ + FN(ringbuf_discard, 133, ##ctx) \ + FN(ringbuf_query, 134, ##ctx) \ + FN(csum_level, 135, ##ctx) \ + FN(skc_to_tcp6_sock, 136, ##ctx) \ + FN(skc_to_tcp_sock, 137, ##ctx) \ + FN(skc_to_tcp_timewait_sock, 138, ##ctx) \ + FN(skc_to_tcp_request_sock, 139, ##ctx) \ + FN(skc_to_udp6_sock, 140, ##ctx) \ + FN(get_task_stack, 141, ##ctx) \ + FN(load_hdr_opt, 142, ##ctx) \ + FN(store_hdr_opt, 143, ##ctx) \ + FN(reserve_hdr_opt, 144, ##ctx) \ + FN(inode_storage_get, 145, ##ctx) \ + FN(inode_storage_delete, 146, ##ctx) \ + FN(d_path, 147, ##ctx) \ + FN(copy_from_user, 148, ##ctx) \ + FN(snprintf_btf, 149, ##ctx) \ + FN(seq_printf_btf, 150, ##ctx) \ + FN(skb_cgroup_classid, 151, ##ctx) \ + FN(redirect_neigh, 152, ##ctx) \ + FN(per_cpu_ptr, 153, ##ctx) \ + FN(this_cpu_ptr, 154, ##ctx) \ + FN(redirect_peer, 155, ##ctx) \ + FN(task_storage_get, 156, ##ctx) \ + FN(task_storage_delete, 157, ##ctx) \ + FN(get_current_task_btf, 158, ##ctx) \ + FN(bprm_opts_set, 159, ##ctx) \ + FN(ktime_get_coarse_ns, 160, ##ctx) \ + FN(ima_inode_hash, 161, ##ctx) \ + FN(sock_from_file, 162, ##ctx) \ + FN(check_mtu, 163, ##ctx) \ + FN(for_each_map_elem, 164, ##ctx) \ + FN(snprintf, 165, ##ctx) \ + FN(sys_bpf, 166, ##ctx) \ + FN(btf_find_by_name_kind, 167, ##ctx) \ + FN(sys_close, 168, ##ctx) \ + FN(timer_init, 169, ##ctx) \ + FN(timer_set_callback, 170, ##ctx) \ + FN(timer_start, 171, ##ctx) \ + FN(timer_cancel, 172, ##ctx) \ + FN(get_func_ip, 173, ##ctx) \ + FN(get_attach_cookie, 174, ##ctx) \ + FN(task_pt_regs, 175, ##ctx) \ + FN(get_branch_snapshot, 176, ##ctx) \ + FN(trace_vprintk, 177, ##ctx) \ + FN(skc_to_unix_sock, 178, ##ctx) \ + FN(kallsyms_lookup_name, 179, ##ctx) \ + FN(find_vma, 180, ##ctx) \ + FN(loop, 181, ##ctx) \ + FN(strncmp, 182, ##ctx) \ + FN(get_func_arg, 183, ##ctx) \ + FN(get_func_ret, 184, ##ctx) \ + FN(get_func_arg_cnt, 185, ##ctx) \ + FN(get_retval, 186, ##ctx) \ + FN(set_retval, 187, ##ctx) \ + FN(xdp_get_buff_len, 188, ##ctx) \ + FN(xdp_load_bytes, 189, ##ctx) \ + FN(xdp_store_bytes, 190, ##ctx) \ + FN(copy_from_user_task, 191, ##ctx) \ + FN(skb_set_tstamp, 192, ##ctx) \ + FN(ima_file_hash, 193, ##ctx) \ + FN(kptr_xchg, 194, ##ctx) \ + FN(map_lookup_percpu_elem, 195, ##ctx) \ + FN(skc_to_mptcp_sock, 196, ##ctx) \ + FN(dynptr_from_mem, 197, ##ctx) \ + FN(ringbuf_reserve_dynptr, 198, ##ctx) \ + FN(ringbuf_submit_dynptr, 199, ##ctx) \ + FN(ringbuf_discard_dynptr, 200, ##ctx) \ + FN(dynptr_read, 201, ##ctx) \ + FN(dynptr_write, 202, ##ctx) \ + FN(dynptr_data, 203, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv4, 204, ##ctx) \ + FN(tcp_raw_gen_syncookie_ipv6, 205, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv4, 206, ##ctx) \ + FN(tcp_raw_check_syncookie_ipv6, 207, ##ctx) \ + FN(ktime_get_tai_ns, 208, ##ctx) \ + FN(user_ringbuf_drain, 209, ##ctx) \ + FN(cgrp_storage_get, 210, ##ctx) \ + FN(cgrp_storage_delete, 211, ##ctx) \ /* */ +/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't + * know or care about integer value that is now passed as second argument + */ +#define __BPF_FUNC_MAPPER_APPLY(name, value, FN) FN(name), +#define __BPF_FUNC_MAPPER(FN) ___BPF_FUNC_MAPPER(__BPF_FUNC_MAPPER_APPLY, FN) + /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call */ -#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x +#define __BPF_ENUM_FN(x, y) BPF_FUNC_ ## x = y, enum bpf_func_id { - __BPF_FUNC_MAPPER(__BPF_ENUM_FN) + ___BPF_FUNC_MAPPER(__BPF_ENUM_FN) __BPF_FUNC_MAX_ID, }; #undef __BPF_ENUM_FN @@ -5601,6 +5782,12 @@ enum { BPF_F_ZERO_CSUM_TX = (1ULL << 1), BPF_F_DONT_FRAGMENT = (1ULL << 2), BPF_F_SEQ_NUMBER = (1ULL << 3), + BPF_F_NO_TUNNEL_KEY = (1ULL << 4), +}; + +/* BPF_FUNC_skb_get_tunnel_key flags. */ +enum { + BPF_F_TUNINFO_FLAGS = (1ULL << 4), }; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and @@ -5635,6 +5822,8 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), + BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7), + BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8), }; enum { @@ -5792,7 +5981,10 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; - __u16 tunnel_ext; /* Padding, future use. */ + union { + __u16 tunnel_ext; /* compat */ + __be16 tunnel_flags; + }; __u32 tunnel_label; union { __u32 local_ipv4; @@ -5836,6 +6028,11 @@ enum bpf_ret_code { * represented by BPF_REDIRECT above). */ BPF_LWT_REROUTE = 128, + /* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR + * to indicate that no custom dissection was performed, and + * fallback to standard dissector is requested. + */ + BPF_FLOW_DISSECTOR_CONTINUE = 129, }; struct bpf_sock { @@ -6134,11 +6331,26 @@ struct bpf_link_info { struct { __aligned_u64 target_name; /* in/out: target_name buffer ptr */ __u32 target_name_len; /* in/out: target_name buffer len */ + + /* If the iter specific field is 32 bits, it can be put + * in the first or second union. Otherwise it should be + * put in the second union. + */ union { struct { __u32 map_id; } map; }; + union { + struct { + __u64 cgroup_id; + __u32 order; + } cgroup; + struct { + __u32 tid; + __u32 pid; + } task; + }; } iter; struct { __u32 netns_ino; @@ -6257,6 +6469,7 @@ struct bpf_sock_ops { * the outgoing header has not * been written yet. */ + __u64 skb_hwtstamp; }; /* Definitions for bpf_sock_ops_cb_flags */ @@ -6542,6 +6755,7 @@ struct bpf_raw_tracepoint_args { enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), + BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), }; enum { @@ -6699,6 +6913,27 @@ struct bpf_dynptr { __u64 :64; } __attribute__((aligned(8))); +struct bpf_list_head { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + +struct bpf_list_node { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + +struct bpf_rb_root { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + +struct bpf_rb_node { + __u64 :64; + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. From fa9f4b45c01ee1ea288f16640a77e70be05039c3 Mon Sep 17 00:00:00 2001 From: Jalal Mostafa Date: Thu, 3 Apr 2025 18:20:07 +0200 Subject: [PATCH 2/6] libxdp: add device binding support Kernel 6.3 supported for some NIC offloads for XDP programs. The feature in XDP is known to be XDP hints. XDP hints are only supported if the XDP program is bound to the NIC device using the BPF_F_XDP_DEV_BOUND_ONLY binding flag. The device binding flag is represented through `XDP_ATTACH_DEVBIND', a new attach flag for `xdp_program__attach`. Device binding is propagated to the dispatcher. Any subsequent programs attachments are rejected if they are different from the already running dispatcher on a network interface. The flag is recored using a new variable in `struct xdp_dispatcher_config`. Signed-off-by: Jalal Mostafa --- .gitignore | 2 + headers/xdp/libxdp.h | 4 + headers/xdp/prog_dispatcher.h | 3 +- lib/libxdp/libxdp.c | 199 ++++++++++++++++++++++++++++------ lib/libxdp/libxdp.map | 4 + 5 files changed, 178 insertions(+), 34 deletions(-) diff --git a/.gitignore b/.gitignore index bbe48fce..87e6d71a 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,5 @@ compile_commands.json # BPF skeleton files *.skel.h + +.vscode diff --git a/headers/xdp/libxdp.h b/headers/xdp/libxdp.h index c1a60669..801e826d 100644 --- a/headers/xdp/libxdp.h +++ b/headers/xdp/libxdp.h @@ -23,6 +23,9 @@ extern "C" { #define XDP_BPFFS_MOUNT_ENVVAR "LIBXDP_BPFFS_AUTOMOUNT" #define XDP_OBJECT_ENVVAR "LIBXDP_OBJECT_PATH" +#define XDP_ATTACH_DEVBIND (1 << 0) +#define XDP_ATTACH_FLAGS (XDP_ATTACH_DEVBIND) + enum xdp_attach_mode { XDP_MODE_UNSPEC = 0, XDP_MODE_NATIVE, @@ -116,6 +119,7 @@ struct xdp_program *xdp_multiprog__hw_prog(const struct xdp_multiprog *mp); bool xdp_multiprog__is_legacy(const struct xdp_multiprog *mp); int xdp_multiprog__program_count(const struct xdp_multiprog *mp); bool xdp_multiprog__xdp_frags_support(const struct xdp_multiprog *mp); +bool xdp_multiprog__xdp_dev_bound(const struct xdp_multiprog *mp); /* Only following members can be set at once: * diff --git a/headers/xdp/prog_dispatcher.h b/headers/xdp/prog_dispatcher.h index e7ead85f..09f00b32 100644 --- a/headers/xdp/prog_dispatcher.h +++ b/headers/xdp/prog_dispatcher.h @@ -6,7 +6,7 @@ #include #define XDP_METADATA_SECTION "xdp_metadata" -#define XDP_DISPATCHER_VERSION 2 +#define XDP_DISPATCHER_VERSION 3 /* magic byte is 'X' + 'D' + 'P' (88+68+80=236) */ #define XDP_DISPATCHER_MAGIC 236 @@ -29,6 +29,7 @@ struct xdp_dispatcher_config { __u32 chain_call_actions[MAX_DISPATCHER_ACTIONS]; __u32 run_prios[MAX_DISPATCHER_ACTIONS]; __u32 program_flags[MAX_DISPATCHER_ACTIONS]; + __u8 is_xdp_devbound; /* Whether this dispatcher is bounded to a device */ }; #endif diff --git a/lib/libxdp/libxdp.c b/lib/libxdp/libxdp.c index 7ef86d74..b0674c33 100644 --- a/lib/libxdp/libxdp.c +++ b/lib/libxdp/libxdp.c @@ -90,6 +90,7 @@ struct xdp_multiprog { bool is_loaded; bool is_legacy; bool kernel_frags_support; + bool kernel_devbound_support; bool checked_compat; enum xdp_attach_mode attach_mode; int ifindex; @@ -102,6 +103,17 @@ struct xdp_dispatcher_config_v1 { __u32 run_prios[MAX_DISPATCHER_ACTIONS]; }; +#define XDP_DISPATCHER_VERSION_V2 2 +struct xdp_dispatcher_config_v2 { + __u8 magic; /* Set to XDP_DISPATCHER_MAGIC */ + __u8 dispatcher_version; /* Set to XDP_DISPATCHER_VERSION */ + __u8 num_progs_enabled; /* Number of active program slots */ + __u8 is_xdp_frags; /* Whether this dispatcher is loaded with XDP frags support */ + __u32 chain_call_actions[MAX_DISPATCHER_ACTIONS]; + __u32 run_prios[MAX_DISPATCHER_ACTIONS]; + __u32 program_flags[MAX_DISPATCHER_ACTIONS]; +}; + static const char *xdp_action_names[] = { [XDP_ABORTED] = "XDP_ABORTED", [XDP_DROP] = "XDP_DROP", @@ -227,11 +239,11 @@ static struct xdp_multiprog *xdp_multiprog__generate(struct xdp_program **progs, size_t num_progs, int ifindex, struct xdp_multiprog *old_mp, - bool remove_progs); + bool remove_progs, + unsigned int flags); static int xdp_multiprog__pin(struct xdp_multiprog *mp); static int xdp_multiprog__unpin(struct xdp_multiprog *mp); - /* On NULL, libxdp always sets errno to 0 for old APIs, so that their * compatibility is maintained wrt old libxdp_get_error that called the older * version of libbpf_get_error which did PTR_ERR_OR_ZERO, but newer versions @@ -557,7 +569,7 @@ int xdp_lock_release(int lock_fd) static int do_xdp_attach(int ifindex, int prog_fd, int old_fd, __u32 xdp_flags) { #ifdef HAVE_LIBBPF_BPF_XDP_ATTACH - LIBBPF_OPTS(bpf_xdp_attach_opts, opts, + LIBBPF_OPTS(bpf_xdp_attach_opts, opts, .old_prog_fd = old_fd); return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, &opts); #else @@ -1613,9 +1625,13 @@ static int xdp_program__load(struct xdp_program *prog) prog->is_frags = xdp_program__xdp_frags_support(prog); #ifdef HAVE_LIBBPF_BPF_PROGRAM__FLAGS - if (bpf_program__type(prog->bpf_prog) == BPF_PROG_TYPE_EXT) - bpf_program__set_flags(prog->bpf_prog, - bpf_program__flags(prog->bpf_prog) & ~BPF_F_XDP_HAS_FRAGS); + if (bpf_program__type(prog->bpf_prog) == BPF_PROG_TYPE_EXT) { + bpf_program__set_flags( + prog->bpf_prog, + bpf_program__flags(prog->bpf_prog) & + ~(BPF_F_XDP_HAS_FRAGS | + BPF_F_XDP_DEV_BOUND_ONLY)); + } #endif err = bpf_object__load(prog->bpf_obj); @@ -1647,42 +1663,103 @@ struct xdp_program *xdp_program__clone(struct xdp_program *prog, unsigned int fl prog->prog_name, true); } +static const char *get_bpf_flag_name(__u32 flag) +{ + switch (flag) { + case BPF_F_XDP_DEV_BOUND_ONLY: + return "BPF_F_XDP_DEV_BOUND_ONLY"; + case BPF_F_XDP_HAS_FRAGS: + return "BPF_F_XDP_HAS_FRAGS"; + default: + return NULL; + } +} + #ifndef HAVE_LIBBPF_BPF_PROGRAM__FLAGS static bool kernel_has_frags_support(void) { pr_debug("Can't support frags with old version of libbpf that doesn't support setting program flags.\n"); return false; } + +static bool kernel_has_dev_bound(void) +{ + pr_debug("Can't bind to device with old version of libbpf that doesn't support setting program flags.\n"); + return false; +} #else -static bool kernel_has_frags_support(void) +static bool kernel_has_bpf_flag(__u32 flag) { struct xdp_program *test_prog; bool ret = false; int err; pr_debug("Checking for kernel frags support\n"); - test_prog = __xdp_program__find_file("xdp-dispatcher.o", NULL, "xdp_pass", NULL); + test_prog = __xdp_program__find_file("xdp-dispatcher.o", NULL, + "xdp_pass", NULL); if (IS_ERR(test_prog)) { err = PTR_ERR(test_prog); pr_warn("Couldn't open BPF file xdp-dispatcher.o\n"); return false; } - bpf_program__set_flags(test_prog->bpf_prog, BPF_F_XDP_HAS_FRAGS); + bpf_program__set_flags(test_prog->bpf_prog, flag); err = xdp_program__load(test_prog); if (!err) { - pr_debug("Kernel supports XDP programs with frags\n"); + pr_debug("Kernel supports XDP programs with flag: %s\n", + get_bpf_flag_name(flag)); ret = true; } else { - pr_debug("Kernel DOES NOT support XDP programs with frags\n"); + pr_debug("Kernel DOES NOT support XDP programs with flag: %s\n", + get_bpf_flag_name(flag)); } xdp_program__close(test_prog); return ret; } + +static bool kernel_has_frags_support(void) +{ + return kernel_has_bpf_flag(BPF_F_XDP_HAS_FRAGS); +} + +static bool kernel_has_dev_bound(void) +{ + return kernel_has_bpf_flag(BPF_F_XDP_DEV_BOUND_ONLY); +} #endif // HAVE_LIBBPF_BPF_PROGRAM__FLAGS +static int xdp_program__set_xdp_dev_bound(struct xdp_program *prog, + unsigned int ifindex) +{ + __u32 prog_flags; + int ret; + + if (IS_ERR_OR_NULL(prog) || !prog->bpf_prog || prog->prog_fd >= 0) + return libxdp_err(-EINVAL); + + if (!kernel_has_dev_bound()) { + pr_warn("Current kernel version does not support XDP device binding."); + return libxdp_err(-ENOTSUP); + } + + prog_flags = bpf_program__flags(prog->bpf_prog); + + if (ifindex > 0) + prog_flags |= BPF_F_XDP_DEV_BOUND_ONLY; + else + prog_flags &= ~BPF_F_XDP_DEV_BOUND_ONLY; + + ret = bpf_program__set_flags(prog->bpf_prog, prog_flags); + bpf_program__set_ifindex(prog->bpf_prog, ifindex); + if (!ret) + return libxdp_err(ret); + + return 0; +} + static int xdp_program__attach_single(struct xdp_program *prog, int ifindex, - enum xdp_attach_mode mode) + enum xdp_attach_mode mode, + unsigned int flags) { int err; @@ -1691,6 +1768,13 @@ static int xdp_program__attach_single(struct xdp_program *prog, int ifindex, xdp_program__set_xdp_frags_support(prog, false); bpf_program__set_type(prog->bpf_prog, BPF_PROG_TYPE_XDP); + + if (flags & XDP_ATTACH_DEVBIND) { + err = xdp_program__set_xdp_dev_bound(prog, ifindex); + if (err) + return err; + } + err = xdp_program__load(prog); if (err) return err; @@ -1702,7 +1786,6 @@ static int xdp_program__attach_single(struct xdp_program *prog, int ifindex, return xdp_attach_fd(xdp_program__fd(prog), -1, ifindex, mode); } - static int xdp_multiprog__main_fd(struct xdp_multiprog *mp) { if (IS_ERR_OR_NULL(mp)) @@ -1750,7 +1833,7 @@ static int xdp_program__attach_hw(struct xdp_program *prog, int ifindex) bpf_map__set_ifindex(map, ifindex); } - return xdp_program__attach_single(prog, ifindex, XDP_MODE_HW); + return xdp_program__attach_single(prog, ifindex, XDP_MODE_HW, 0); } static int xdp_multiprog__detach_hw(struct xdp_multiprog *old_mp) @@ -1782,7 +1865,7 @@ int xdp_program__attach_multi(struct xdp_program **progs, size_t num_progs, struct xdp_multiprog *old_mp = NULL, *mp; int err = 0, retry_counter = 0; - if (!progs || !num_progs || flags) + if (!progs || !num_progs || flags & ~XDP_ATTACH_FLAGS) return libxdp_err(-EINVAL); retry: @@ -1813,11 +1896,12 @@ int xdp_program__attach_multi(struct xdp_program **progs, size_t num_progs, envval = secure_getenv(XDP_SKIP_ENVVAR); if (envval && envval[0] == '1' && envval[1] == '\0') { pr_debug("Skipping dispatcher due to environment setting\n"); - return libxdp_err(xdp_program__attach_single(progs[0], ifindex, mode)); + return libxdp_err(xdp_program__attach_single(progs[0], ifindex, mode, flags)); } } - mp = xdp_multiprog__generate(progs, num_progs, ifindex, old_mp, false); + mp = xdp_multiprog__generate(progs, num_progs, ifindex, old_mp, false, + flags); if (IS_ERR(mp)) { err = PTR_ERR(mp); mp = NULL; @@ -1825,10 +1909,12 @@ int xdp_program__attach_multi(struct xdp_program **progs, size_t num_progs, if (num_progs == 1) { pr_info("Falling back to loading single prog " "without dispatcher\n"); - return libxdp_err(xdp_program__attach_single(progs[0], ifindex, mode)); + return libxdp_err(xdp_program__attach_single( + progs[0], ifindex, mode, flags)); } else { pr_warn("Can't fall back to legacy load with %zu " - "programs\n%s\n", num_progs, dispatcher_feature_err); + "programs\n%s\n", + num_progs, dispatcher_feature_err); } } goto out; @@ -1901,7 +1987,7 @@ int xdp_program__detach_multi(struct xdp_program **progs, size_t num_progs, if (flags || !num_progs || !progs) return libxdp_err(-EINVAL); - retry: +retry: new_mp = NULL; mp = xdp_multiprog__get_from_ifindex(ifindex); if (IS_ERR_OR_NULL(mp)) { @@ -1984,7 +2070,8 @@ int xdp_program__detach_multi(struct xdp_program **progs, size_t num_progs, if (err) goto out; } else { - new_mp = xdp_multiprog__generate(progs, num_progs, ifindex, mp, true); + new_mp = xdp_multiprog__generate(progs, num_progs, ifindex, mp, + true, flags); if (IS_ERR(new_mp)) { err = PTR_ERR(new_mp); if (err == -EOPNOTSUPP) { @@ -2043,7 +2130,8 @@ int xdp_program__detach(struct xdp_program *prog, int ifindex, return libxdp_err(xdp_program__detach_multi(&prog, 1, ifindex, mode, flags)); } -int xdp_program__test_run(struct xdp_program *prog, struct bpf_test_run_opts *opts, unsigned int flags) +int xdp_program__test_run(struct xdp_program *prog, + struct bpf_test_run_opts *opts, unsigned int flags) { struct xdp_multiprog *mp = NULL; int err, prog_fd; @@ -2058,7 +2146,7 @@ int xdp_program__test_run(struct xdp_program *prog, struct bpf_test_run_opts *op } if (prog->prog_type == BPF_PROG_TYPE_EXT) { - mp = xdp_multiprog__generate(&prog, 1, 0, NULL, false); + mp = xdp_multiprog__generate(&prog, 1, 0, NULL, false, flags); if (IS_ERR(mp)) { err = PTR_ERR(mp); if (err == -EOPNOTSUPP) @@ -2124,13 +2212,18 @@ static int xdp_multiprog__load(struct xdp_multiprog *mp) if (IS_ERR_OR_NULL(mp) || !mp->main_prog || mp->is_loaded || xdp_multiprog__is_legacy(mp)) return -EINVAL; - pr_debug("Loading multiprog dispatcher for %d programs %s frags support\n", - mp->config.num_progs_enabled, - mp->config.is_xdp_frags ? "with" : "without"); + pr_debug("Loading multiprog dispatcher. Progs: %d frags: %s device-bound: %s/%d\n", + mp->config.num_progs_enabled, + mp->config.is_xdp_frags ? "yes" : "no", + mp->config.is_xdp_devbound ? "yes" : "no", + mp->config.is_xdp_devbound ? 0 : mp->ifindex); if (mp->config.is_xdp_frags) xdp_program__set_xdp_frags_support(mp->main_prog, true); + if (mp->config.is_xdp_devbound) + xdp_program__set_xdp_dev_bound(mp->main_prog, mp->ifindex); + err = xdp_program__load(mp->main_prog); if (err) { pr_info("Failed to load dispatcher: %s\n", @@ -2230,11 +2323,27 @@ static int check_dispatcher_version(struct xdp_multiprog *mp, { struct xdp_dispatcher_config_v1 *config = (void *)buf; + for (i = 0; i < MAX_DISPATCHER_ACTIONS; i++) { + mp->config.chain_call_actions[i] = + config->chain_call_actions[i]; + mp->config.run_prios[i] = config->run_prios[i]; + } + mp->config.num_progs_enabled = config->num_progs_enabled; + break; + } + case XDP_DISPATCHER_VERSION_V2: + { + struct xdp_dispatcher_config_v2 *config = (void *)buf; + for (i = 0; i < MAX_DISPATCHER_ACTIONS; i++) { mp->config.chain_call_actions[i] = config->chain_call_actions[i]; mp->config.run_prios[i] = config->run_prios[i]; + mp->config.program_flags[i] = config->program_flags[i]; } mp->config.num_progs_enabled = config->num_progs_enabled; + mp->config.is_xdp_frags = config->is_xdp_frags; + mp->config.dispatcher_version = config->dispatcher_version; + mp->config.magic = config->magic; break; } case XDP_DISPATCHER_VERSION: @@ -2461,7 +2570,6 @@ static struct xdp_multiprog *xdp_multiprog__from_fd(int fd, int hw_fd, return ERR_PTR(err); } - static struct xdp_multiprog *xdp_multiprog__from_id(__u32 id, __u32 hw_id, int ifindex) { @@ -2605,7 +2713,7 @@ struct xdp_multiprog *xdp_multiprog__get_from_ifindex(int ifindex) } mp = libxdp_err_ptr(err, false); - } else + } else mp = libxdp_err_ptr(0, true); return mp; } @@ -2665,7 +2773,7 @@ int libxdp_check_kern_compat(void) char buf[100] = {}; libxdp_strerror(err, buf, sizeof(buf)); pr_debug("Failed to load program %s: %s\n", - xdp_program__name(test_prog), buf); + xdp_program__name(test_prog), buf); goto out; } @@ -2778,7 +2886,6 @@ static int xdp_multiprog__link_prog(struct xdp_multiprog *mp, if (err) goto err; - if (mp->config.num_progs_enabled == 1) attach_func = "xdp_dispatcher"; else @@ -2854,7 +2961,7 @@ static int xdp_multiprog__link_prog(struct xdp_multiprog *mp, if (err == -EPERM) { pr_debug("Got 'permission denied' error while " "attaching program to dispatcher.\n%s\n", - dispatcher_feature_err); + dispatcher_feature_err); err = -EOPNOTSUPP; } else { pr_warn("Failed to attach program %s to dispatcher: %s\n", @@ -2913,7 +3020,8 @@ static struct xdp_multiprog *xdp_multiprog__generate(struct xdp_program **progs, size_t num_progs, int ifindex, struct xdp_multiprog *old_mp, - bool remove_progs) + bool remove_progs, + unsigned int flags) { size_t num_new_progs = old_mp ? old_mp->num_links : 0; struct xdp_program **new_progs = NULL; @@ -2933,6 +3041,19 @@ static struct xdp_multiprog *xdp_multiprog__generate(struct xdp_program **progs, return ERR_PTR(-E2BIG); } + if (!remove_progs && old_mp) { + if (old_mp->config.is_xdp_devbound) { + if (!(flags & XDP_ATTACH_DEVBIND)) { + pr_warn("Dispatcher is already bound to ifindex %d. You did not specify XDP_ATTACH_DEVBIND in the attach flags of the new program\n", + old_mp->ifindex); + return ERR_PTR(-EINVAL); + } + } else if (flags & XDP_ATTACH_DEVBIND) { + pr_warn("Dispatcher was not bound to a device. Cannot rebind it, some old programs may require access to multiple interfaces\n"); + return ERR_PTR(-EINVAL); + } + } + pr_debug("Generating multi-prog dispatcher for %zu programs\n", num_new_progs); @@ -2941,6 +3062,7 @@ static struct xdp_multiprog *xdp_multiprog__generate(struct xdp_program **progs, return mp; mp->kernel_frags_support = kernel_has_frags_support(); + mp->kernel_devbound_support = kernel_has_dev_bound(); if (old_mp) { struct xdp_program *prog; @@ -3025,6 +3147,9 @@ static struct xdp_multiprog *xdp_multiprog__generate(struct xdp_program **progs, mp->config.dispatcher_version = mp->version; mp->config.num_progs_enabled = num_new_progs; mp->config.is_xdp_frags = mp->kernel_frags_support; + mp->config.is_xdp_devbound = !!old_mp ? old_mp->config.is_xdp_devbound : + (flags & XDP_ATTACH_DEVBIND); + for (i = 0; i < num_new_progs; i++) { mp->config.chain_call_actions[i] = (new_progs[i]->chain_call_actions | @@ -3032,9 +3157,12 @@ static struct xdp_multiprog *xdp_multiprog__generate(struct xdp_program **progs, mp->config.run_prios[i] = new_progs[i]->run_prio; if (xdp_program__xdp_frags_support(new_progs[i])) - mp->config.program_flags[i] = BPF_F_XDP_HAS_FRAGS; + mp->config.program_flags[i] |= BPF_F_XDP_HAS_FRAGS; else mp->config.is_xdp_frags = false; + + if (mp->config.is_xdp_devbound) + mp->config.program_flags[i] |= BPF_F_XDP_DEV_BOUND_ONLY; } if (mp->kernel_frags_support) { @@ -3404,6 +3532,11 @@ bool xdp_multiprog__xdp_frags_support(const struct xdp_multiprog *mp) return !xdp_multiprog__is_legacy(mp) && mp->config.is_xdp_frags; } +bool xdp_multiprog__xdp_dev_bound(const struct xdp_multiprog *mp) +{ + return !xdp_multiprog__is_legacy(mp) && mp->config.is_xdp_devbound; +} + static int remove_pin_dir(const char *subdir) { char prog_path[PATH_MAX], pin_path[PATH_MAX]; diff --git a/lib/libxdp/libxdp.map b/lib/libxdp/libxdp.map index e1af82fa..26d3ac5d 100644 --- a/lib/libxdp/libxdp.map +++ b/lib/libxdp/libxdp.map @@ -84,3 +84,7 @@ LIBXDP_1.5.0 { xsk_umem__create_opts; xsk_socket__create_opts; } LIBXDP_1.4.0; + +LIBXDP_1.6.0 { + xdp_multiprog__xdp_dev_bound; +} LIBXDP_1.5.0; From 80bf6db27b6686c6c1241dc09fbee0d1e8e1d9f7 Mon Sep 17 00:00:00 2001 From: Jalal Mostafa Date: Thu, 3 Apr 2025 18:29:19 +0200 Subject: [PATCH 3/6] libxdp: Add selftest for device binding Add a selftest program for libxdp device binding, testing the different permutations of loading a program with and without the flag. Signed-off-by: Jalal Mostafa --- lib/libxdp/tests/.gitignore | 1 + lib/libxdp/tests/Makefile | 2 +- lib/libxdp/tests/test-libxdp.sh | 14 +- lib/libxdp/tests/test_xdp_devbound.c | 405 +++++++++++++++++++++++++++ 4 files changed, 420 insertions(+), 2 deletions(-) create mode 100644 lib/libxdp/tests/test_xdp_devbound.c diff --git a/lib/libxdp/tests/.gitignore b/lib/libxdp/tests/.gitignore index c822c396..543526e6 100644 --- a/lib/libxdp/tests/.gitignore +++ b/lib/libxdp/tests/.gitignore @@ -5,3 +5,4 @@ test_dispatcher_versions test_xsk_non_privileged test_link_detach test_xsk_umem_flags +test_xdp_devbound diff --git a/lib/libxdp/tests/Makefile b/lib/libxdp/tests/Makefile index b16d7baa..36fccb55 100644 --- a/lib/libxdp/tests/Makefile +++ b/lib/libxdp/tests/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -USER_TARGETS := test_xsk_refcnt check_kern_compat test_xdp_frags test_dispatcher_versions test_link_detach test_xsk_umem_flags +USER_TARGETS := test_xsk_refcnt check_kern_compat test_xdp_devbound test_xdp_frags test_dispatcher_versions test_link_detach test_xsk_umem_flags BPF_TARGETS := xdp_dispatcher_v1 xdp_pass USER_LIBS := -lpthread diff --git a/lib/libxdp/tests/test-libxdp.sh b/lib/libxdp/tests/test-libxdp.sh index 89091989..e70b3098 100644 --- a/lib/libxdp/tests/test-libxdp.sh +++ b/lib/libxdp/tests/test-libxdp.sh @@ -1,6 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -ALL_TESTS="test_link_so test_link_a test_old_dispatcher test_xdp_frags test_xsk_prog_refcnt_bpffs test_xsk_prog_refcnt_legacy test_xsk_non_privileged test_link_detach test_xsk_umem_flags" +ALL_TESTS="test_link_so test_link_a test_old_dispatcher test_xdp_devbound test_xdp_frags test_xsk_prog_refcnt_bpffs test_xsk_prog_refcnt_legacy test_xsk_non_privileged test_link_detach test_xsk_umem_flags" TESTS_DIR=$(dirname "${BASH_SOURCE[0]}") @@ -81,6 +81,18 @@ test_xdp_frags() ip link delete xdp_veth_small0 } +test_xdp_devbound() +{ + check_mount_bpffs || return 1 + skip_if_missing_libxdp_compat + + ip link add xdp_veth0 type veth peer name xdp_veth1 + ip link add xdp_veth2 type veth peer name xdp_veth3 + check_run $TESTS_DIR/test_xdp_devbound xdp_veth1 xdp_veth3 2>&1 + ip link delete xdp_veth0 + ip link delete xdp_veth2 +} + test_old_dispatcher() { check_mount_bpffs || return 1 diff --git a/lib/libxdp/tests/test_xdp_devbound.c b/lib/libxdp/tests/test_xdp_devbound.c new file mode 100644 index 00000000..673f395f --- /dev/null +++ b/lib/libxdp/tests/test_xdp_devbound.c @@ -0,0 +1,405 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_utils.h" + +#include +#include + +#define ARRAY_SIZE(_x) (sizeof(_x) / sizeof((_x)[0])) + +static bool kern_compat; + +static struct xdp_program *load_prog(void) +{ + DECLARE_LIBXDP_OPTS(xdp_program_opts, opts, .prog_name = "xdp_pass", + .find_filename = "xdp-dispatcher.o", ); + return xdp_program__create(&opts); +} + +static int check_attached_progs(int ifindex, int count, bool devbound) +{ + struct xdp_multiprog *mp; + int ret; + + /* If the kernel does not support device binding, we always expect + * device binding support to be disabled on a returned dispatcher + */ + if (!kern_compat) + devbound = false; + + mp = xdp_multiprog__get_from_ifindex(ifindex); + ret = libxdp_get_error(mp); + if (ret) { + fprintf(stderr, "Couldn't get multiprog on ifindex %d: %s\n", + ifindex, strerror(-ret)); + return ret; + } + + ret = -EINVAL; + + if (xdp_multiprog__is_legacy(mp)) { + fprintf(stderr, "Found legacy prog on ifindex %d\n", ifindex); + goto out; + } + + if (xdp_multiprog__program_count(mp) != count) { + fprintf(stderr, + "Expected %d programs loaded on ifindex %d, found %d\n", + count, ifindex, xdp_multiprog__program_count(mp)); + goto out; + } + + if (xdp_multiprog__xdp_dev_bound(mp) != devbound) { + fprintf(stderr, + "Multiprog on ifindex %d %s device binding, expected %s\n", + ifindex, + xdp_multiprog__xdp_dev_bound(mp) ? "supports" : + "does not support", + devbound ? "support" : "no support"); + goto out; + } + + ret = 0; + +out: + xdp_multiprog__close(mp); + return ret; +} + +static void print_test_result(const char *func, int ret) +{ + fflush(stderr); + fprintf(stderr, "%s:\t%s\n", func, ret ? "FAILED" : "PASSED"); + fflush(stdout); +} + +static int load_attach_prog(struct xdp_program **prog, int ifindex, + bool devbound) +{ + int ret; + + *prog = load_prog(); + if (!*prog) { + ret = -errno; + fprintf(stderr, "Couldn't load program: %s\n", strerror(-ret)); + return ret; + } + + return xdp_program__attach(*prog, ifindex, XDP_MODE_NATIVE, + devbound ? XDP_ATTACH_DEVBIND : 0); +} + +static int _check_load(int ifindex, bool devbound, bool should_succeed) +{ + struct xdp_program *prog = NULL; + bool attached; + int ret; + + ret = load_attach_prog(&prog, ifindex, devbound); + attached = !ret; + + if (attached != should_succeed) { + ret = -EINVAL; + goto out; + } + + if (should_succeed) + ret = check_attached_progs(ifindex, 1, devbound); + else + ret = 0; + +out: + if (attached) + xdp_program__detach(prog, ifindex, XDP_MODE_NATIVE, 0); + xdp_program__close(prog); + return ret; +} + +static int check_load_devbound(int ifindex) +{ + int ret = _check_load(ifindex, true, true); + print_test_result(__func__, ret); + return ret; +} + +static int check_load_nodevbound_success(int ifindex) +{ + int ret = _check_load(ifindex, false, true); + print_test_result(__func__, ret); + return ret; +} + +static int check_load_devbound_multi(int ifindex) +{ + struct xdp_program *prog1 = NULL, *prog2 = NULL; + int ret; + + ret = load_attach_prog(&prog1, ifindex, true); + if (ret) + goto out; + + ret = load_attach_prog(&prog2, ifindex, true); + if (ret) + goto out_prog1; + + ret = check_attached_progs(ifindex, 2, true); + + xdp_program__detach(prog2, ifindex, XDP_MODE_NATIVE, 0); +out_prog1: + xdp_program__detach(prog1, ifindex, XDP_MODE_NATIVE, 0); +out: + xdp_program__close(prog2); + xdp_program__close(prog1); + print_test_result(__func__, ret); + return ret; +} + +static int _check_load_mix(int ifindex, bool devbound1, bool devbound2) +{ + struct xdp_program *prog1 = NULL, *prog2 = NULL; + int ret; + + ret = load_attach_prog(&prog1, ifindex, devbound1); + if (ret) + goto out; + + /* First program attached, dispatcher supports device binding */ + ret = check_attached_progs(ifindex, 1, devbound1); + if (ret) + goto out; + + ret = load_attach_prog(&prog2, ifindex, devbound2); + if (!ret) { + xdp_program__detach(prog2, ifindex, XDP_MODE_NATIVE, 0); + ret = -EINVAL; + goto out_prog1; + } + + /* Still only a single program loaded, with device binding */ + ret = check_attached_progs(ifindex, 1, devbound1); + +out_prog1: + xdp_program__detach(prog1, ifindex, XDP_MODE_NATIVE, 0); + +out: + xdp_program__close(prog2); + xdp_program__close(prog1); + return ret; +} + +static int check_load_mix_devbound_nodevbound(int ifindex) +{ + int ret = _check_load_mix(ifindex, true, false); + print_test_result(__func__, ret); + return ret; +} + +static int check_load_mix_nodevbound_devbound(int ifindex) +{ + int ret = _check_load_mix(ifindex, false, true); + print_test_result(__func__, ret); + return ret; +} + +static int check_load_devbound_multiple_ifindex(int ifindex1, int ifindex2) +{ + struct xdp_program *prog = NULL; + int ret; + + prog = load_prog(); + + ret = xdp_program__attach(prog, ifindex1, XDP_MODE_NATIVE, + XDP_ATTACH_DEVBIND); + if (ret) { + ret = -EINVAL; + goto out; + } + + /* Still only a single program loaded, with device binding */ + ret = check_attached_progs(ifindex1, 1, true); + if (ret) + goto out; + + ret = xdp_program__attach(prog, ifindex2, XDP_MODE_NATIVE, + XDP_ATTACH_DEVBIND); + if (!ret) { + xdp_program__detach(prog, ifindex2, XDP_MODE_NATIVE, 0); + ret = -EINVAL; + goto out; + } + +out: + xdp_program__detach(prog, ifindex1, XDP_MODE_NATIVE, 0); + xdp_program__close(prog); + print_test_result(__func__, !ret); + return !ret; +} + +static int check_load_mixed_multiple_ifindex(int ifindex1, int ifindex2) +{ + struct xdp_program *prog = NULL; + int ret; + + prog = load_prog(); + + ret = xdp_program__attach(prog, ifindex1, XDP_MODE_NATIVE, + XDP_ATTACH_DEVBIND); + if (ret) + goto out; + + /* Still only a single program loaded, with device binding */ + ret = check_attached_progs(ifindex1, 1, true); + if (ret) + goto out_prog1; + + ret = xdp_program__attach(prog, ifindex2, XDP_MODE_NATIVE, 0); + if (!ret) { + xdp_program__detach(prog, ifindex2, XDP_MODE_NATIVE, 0); + ret = -EINVAL; + } + +out_prog1: + xdp_program__detach(prog, ifindex1, XDP_MODE_NATIVE, 0); +out: + xdp_program__close(prog); + print_test_result(__func__, !ret); + return !ret; +} + +static int check_load2_mixed_multiple_ifindex(int ifindex1, int ifindex2) +{ + struct xdp_program *prog1 = NULL, *prog2 = NULL; + int ret; + + ret = load_attach_prog(&prog1, ifindex1, true); + if (ret) + goto out; + + /* First program attached, dispatcher supports device binding */ + ret = check_attached_progs(ifindex1, 1, true); + if (ret) + goto out_prog1; + + ret = load_attach_prog(&prog2, ifindex2, false); + if (ret) + goto out_prog1; + + /* Still only a single program loaded, with device binding */ + ret = check_attached_progs(ifindex2, 1, false); + +out_prog1: + xdp_program__detach(prog1, ifindex1, XDP_MODE_NATIVE, 0); + +out: + xdp_program__detach(prog2, ifindex2, XDP_MODE_NATIVE, 0); + xdp_program__close(prog2); + xdp_program__close(prog1); + print_test_result(__func__, ret); + + return ret; +} + +static bool check_devbound_compat(void) +{ + struct xdp_program *test_prog; + struct bpf_program *prog; + struct bpf_object *obj; + bool ret = false; + int err; + + test_prog = load_prog(); + if (!test_prog) + return false; + + obj = xdp_program__bpf_obj(test_prog); + if (!obj) + goto out; + + prog = bpf_object__find_program_by_name(obj, "xdp_pass"); + if (!prog) + goto out; + + bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); + err = bpf_object__load(obj); + if (!err) { + printf("Kernel supports XDP programs with device binding\n"); + ret = true; + } else { + printf("Kernel DOES NOT support XDP programs with device binding\n"); + } + fflush(stdout); + +out: + xdp_program__close(test_prog); + return ret; +} + +static void usage(char *progname) +{ + fprintf(stderr, "Usage: %s \n", progname); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY }; + int ifindex1, ifindex2, ret = 0; + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + char *envval; + + envval = secure_getenv("VERBOSE_TESTS"); + + silence_libbpf_logging(); + if (envval && envval[0] == '1') + verbose_libxdp_logging(); + else + silence_libxdp_logging(); + + if (argc != 3) + usage(argv[0]); + + ifindex1 = if_nametoindex(argv[1]); + if (!ifindex1) { + fprintf(stderr, "Interface '%s' not found.\n", argv[1]); + usage(argv[0]); + } + + ifindex2 = if_nametoindex(argv[2]); + if (!ifindex2) { + fprintf(stderr, "Interface '%s' not found.\n", argv[1]); + usage(argv[0]); + } + + kern_compat = check_devbound_compat(); + + ret = check_load_devbound(kern_compat ? ifindex1 : 0); + ret = check_load_nodevbound_success(ifindex1) || ret; + if (kern_compat) { + ret = check_load_devbound_multi(ifindex1) || ret; + ret = check_load_mix_devbound_nodevbound(ifindex1) || ret; + ret = check_load_mix_nodevbound_devbound(ifindex1) || ret; + ret = check_load_devbound_multiple_ifindex(ifindex1, ifindex2) || ret; + ret = check_load_mixed_multiple_ifindex(ifindex1, ifindex2) || ret; + ret = check_load2_mixed_multiple_ifindex(ifindex1, ifindex2) || ret; + } + + return ret; +} From f5fd7148cc740e90feafcbde4d020c3c96ebae3d Mon Sep 17 00:00:00 2001 From: Jalal Mostafa Date: Fri, 12 Sep 2025 15:29:14 +0200 Subject: [PATCH 4/6] libxdp: indent test_dispatcher_versions selftest using tabs Signed-off-by: Jalal Mostafa --- lib/libxdp/tests/test_dispatcher_versions.c | 216 ++++++++++---------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/lib/libxdp/tests/test_dispatcher_versions.c b/lib/libxdp/tests/test_dispatcher_versions.c index 14a8ba83..561bb071 100644 --- a/lib/libxdp/tests/test_dispatcher_versions.c +++ b/lib/libxdp/tests/test_dispatcher_versions.c @@ -34,69 +34,69 @@ int get_prog_id(int prog_fd) { struct bpf_prog_info info = {}; __u32 len = sizeof(info); - int err; + int err; err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); if (err) - return -errno; + return -errno; - return info.id; + return info.id; } int load_dispatcher_v1(int ifindex) { struct xdp_dispatcher_config_v1 dispatcher_config = {}; - struct bpf_object *obj_dispatcher, *obj_prog = NULL; + struct bpf_object *obj_dispatcher, *obj_prog = NULL; DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); - struct bpf_program *dispatcher_prog, *xdp_prog; - int ret, btf_id, lfd = -1, dispatcher_id; + struct bpf_program *dispatcher_prog, *xdp_prog; + int ret, btf_id, lfd = -1, dispatcher_id; char pin_path[PATH_MAX], buf[PATH_MAX]; - const char *attach_func = "prog0"; - struct bpf_map *map; + const char *attach_func = "prog0"; + struct bpf_map *map; - if (!ifindex) - return -ENOENT; + if (!ifindex) + return -ENOENT; obj_dispatcher = bpf_object__open("xdp_dispatcher_v1.o"); - if (!obj_dispatcher) - return -errno; + if (!obj_dispatcher) + return -errno; btf_id = btf__find_by_name_kind(bpf_object__btf(obj_dispatcher), - attach_func, BTF_KIND_FUNC); + attach_func, BTF_KIND_FUNC); if (btf_id <= 0) { ret = -ENOENT; goto out; } opts.target_btf_id = btf_id; - map = bpf_object__next_map(obj_dispatcher, NULL); + map = bpf_object__next_map(obj_dispatcher, NULL); if (!map) { ret = -ENOENT; goto out; } - dispatcher_prog = bpf_object__find_program_by_name(obj_dispatcher, - "xdp_dispatcher"); + dispatcher_prog = bpf_object__find_program_by_name(obj_dispatcher, + "xdp_dispatcher"); if (!dispatcher_prog) { ret = -errno; goto out; } - dispatcher_config.num_progs_enabled = 1; - dispatcher_config.chain_call_actions[0] = PROG_CHAIN_CALL_ACTIONS; - dispatcher_config.run_prios[0] = PROG_RUN_PRIO; + dispatcher_config.num_progs_enabled = 1; + dispatcher_config.chain_call_actions[0] = PROG_CHAIN_CALL_ACTIONS; + dispatcher_config.run_prios[0] = PROG_RUN_PRIO; - ret = bpf_map__set_initial_value(map, &dispatcher_config, - sizeof(dispatcher_config)); - if (ret) - goto out; + ret = bpf_map__set_initial_value(map, &dispatcher_config, + sizeof(dispatcher_config)); + if (ret) + goto out; - ret = bpf_object__load(obj_dispatcher); - if (ret) - goto out; + ret = bpf_object__load(obj_dispatcher); + if (ret) + goto out; - dispatcher_id = get_prog_id(bpf_program__fd(dispatcher_prog)); + dispatcher_id = get_prog_id(bpf_program__fd(dispatcher_prog)); if (dispatcher_id < 0) { ret = dispatcher_id; goto out; @@ -108,27 +108,27 @@ int load_dispatcher_v1(int ifindex) goto out; } - xdp_prog = bpf_object__find_program_by_name(obj_prog, "xdp_pass"); + xdp_prog = bpf_object__find_program_by_name(obj_prog, "xdp_pass"); if (!xdp_prog) { ret = -errno; goto out; } ret = bpf_program__set_attach_target(xdp_prog, - bpf_program__fd(dispatcher_prog), - attach_func); - if (ret) - goto out; + bpf_program__fd(dispatcher_prog), + attach_func); + if (ret) + goto out; - bpf_program__set_type(xdp_prog, BPF_PROG_TYPE_EXT); - bpf_program__set_expected_attach_type(xdp_prog, 0); + bpf_program__set_type(xdp_prog, BPF_PROG_TYPE_EXT); + bpf_program__set_expected_attach_type(xdp_prog, 0); - ret = bpf_object__load(obj_prog); - if (ret) - goto out; + ret = bpf_object__load(obj_prog); + if (ret) + goto out; - lfd = bpf_link_create(bpf_program__fd(xdp_prog), - bpf_program__fd(dispatcher_prog), 0, &opts); + lfd = bpf_link_create(bpf_program__fd(xdp_prog), + bpf_program__fd(dispatcher_prog), 0, &opts); if (lfd < 0) { ret = -errno; goto out; @@ -142,84 +142,84 @@ int load_dispatcher_v1(int ifindex) ret = mkdir(BPFFS_DIR, S_IRWXU); if (ret && errno != EEXIST) { ret = -errno; - printf("mkdir err (%s): %s\n", BPFFS_DIR, strerror(-ret)); + printf("mkdir err (%s): %s\n", BPFFS_DIR, strerror(-ret)); goto out; } ret = mkdir(pin_path, S_IRWXU); if (ret) { ret = -errno; - printf("mkdir err (%s): %s\n", pin_path, strerror(-ret)); + printf("mkdir err (%s): %s\n", pin_path, strerror(-ret)); goto out; } - ret = try_snprintf(buf, sizeof(buf), "%s/prog0-link", pin_path); - if (ret) - goto err_unpin; + ret = try_snprintf(buf, sizeof(buf), "%s/prog0-link", pin_path); + if (ret) + goto err_unpin; - ret = bpf_obj_pin(lfd, buf); - if (ret) - goto err_unpin; + ret = bpf_obj_pin(lfd, buf); + if (ret) + goto err_unpin; - ret = try_snprintf(buf, sizeof(buf), "%s/prog0-prog", pin_path); - if (ret) - goto err_unpin; + ret = try_snprintf(buf, sizeof(buf), "%s/prog0-prog", pin_path); + if (ret) + goto err_unpin; - ret = bpf_obj_pin(bpf_program__fd(xdp_prog), buf); - if (ret) - goto err_unpin; + ret = bpf_obj_pin(bpf_program__fd(xdp_prog), buf); + if (ret) + goto err_unpin; - ret = xdp_attach_fd(bpf_program__fd(dispatcher_prog), -1, ifindex, - XDP_MODE_NATIVE); - if (ret) - goto err_unpin; + ret = xdp_attach_fd(bpf_program__fd(dispatcher_prog), -1, ifindex, + XDP_MODE_NATIVE); + if (ret) + goto err_unpin; out: - if (lfd >= 0) - close(lfd); - bpf_object__close(obj_dispatcher); - bpf_object__close(obj_prog); - return ret; + if (lfd >= 0) + close(lfd); + bpf_object__close(obj_dispatcher); + bpf_object__close(obj_prog); + return ret; err_unpin: - if (!try_snprintf(buf, sizeof(buf), "%s/prog0-link", pin_path)) - unlink(buf); - if (!try_snprintf(buf, sizeof(buf), "%s/prog0-prog", pin_path)) - unlink(buf); - rmdir(pin_path); - goto out; + if (!try_snprintf(buf, sizeof(buf), "%s/prog0-link", pin_path)) + unlink(buf); + if (!try_snprintf(buf, sizeof(buf), "%s/prog0-prog", pin_path)) + unlink(buf); + rmdir(pin_path); + goto out; } int check_old_dispatcher(int ifindex) { - struct xdp_multiprog *mp = NULL; - struct xdp_program *xdp_prog; - char buf[100]; - int ret; + struct xdp_multiprog *mp = NULL; + struct xdp_program *xdp_prog; + char buf[100]; + int ret; - ret = load_dispatcher_v1(ifindex); - if (ret) - goto out; + ret = load_dispatcher_v1(ifindex); + if (ret) + goto out; - mp = xdp_multiprog__get_from_ifindex(ifindex); - ret = libxdp_get_error(mp); + mp = xdp_multiprog__get_from_ifindex(ifindex); + ret = libxdp_get_error(mp); if (ret) goto out; if (xdp_multiprog__is_legacy(mp)) { printf("Got unexpected legacy multiprog\n"); - ret = -EINVAL; - goto out; + ret = -EINVAL; + goto out; } if (xdp_multiprog__program_count(mp) != 1) { printf("Expected 1 attached program, got %d\n", - xdp_multiprog__program_count(mp)); - ret = -EINVAL; - goto out; + xdp_multiprog__program_count(mp)); + ret = -EINVAL; + goto out; } - xdp_prog = xdp_multiprog__next_prog(NULL, mp); + xdp_prog = xdp_multiprog__next_prog(NULL, mp); if (!xdp_prog) { ret = -errno; goto out; @@ -239,9 +239,9 @@ int check_old_dispatcher(int ifindex) goto out; } - ret = xdp_program__print_chain_call_actions(xdp_prog, buf, sizeof(buf)); - if (ret) - goto out; + ret = xdp_program__print_chain_call_actions(xdp_prog, buf, sizeof(buf)); + if (ret) + goto out; if (strcmp(buf, "XDP_DROP")) { printf("Expected actions XDP_PASS, got %s\n", buf); @@ -249,25 +249,25 @@ int check_old_dispatcher(int ifindex) goto out; } - xdp_prog = xdp_program__open_file("xdp_pass.o", "xdp", NULL); - ret = libxdp_get_error(xdp_prog); - if (ret) - goto out; + xdp_prog = xdp_program__open_file("xdp_pass.o", "xdp", NULL); + ret = libxdp_get_error(xdp_prog); + if (ret) + goto out; - ret = xdp_program__attach(xdp_prog, ifindex, XDP_MODE_NATIVE, 0); - xdp_program__close(xdp_prog); + ret = xdp_program__attach(xdp_prog, ifindex, XDP_MODE_NATIVE, 0); + xdp_program__close(xdp_prog); if (!ret) { printf("Shouldn't have been able to attach a new program to ifindex!\n"); ret = -EINVAL; goto out; } - ret = 0; + ret = 0; out: - if (mp) - xdp_multiprog__detach(mp); - xdp_multiprog__close(mp); - return ret; + if (mp) + xdp_multiprog__detach(mp); + xdp_multiprog__close(mp); + return ret; } static void usage(char *progname) @@ -278,23 +278,23 @@ static void usage(char *progname) int main(int argc, char **argv) { - int ifindex, ret; - char *envval; + int ifindex, ret; + char *envval; - envval = secure_getenv("VERBOSE_TESTS"); + envval = secure_getenv("VERBOSE_TESTS"); silence_libbpf_logging(); - if (envval && envval[0] == '1') - verbose_libxdp_logging(); - else - silence_libxdp_logging(); + if (envval && envval[0] == '1') + verbose_libxdp_logging(); + else + silence_libxdp_logging(); if (argc != 2) - usage(argv[0]); + usage(argv[0]); ifindex = if_nametoindex(argv[1]); - ret = check_old_dispatcher(ifindex); + ret = check_old_dispatcher(ifindex); - return ret; -} + return ret; +} \ No newline at end of file From a97a40c04336d22dc7ea9564f338462cc35f7aa4 Mon Sep 17 00:00:00 2001 From: Jalal Mostafa Date: Fri, 12 Sep 2025 15:30:15 +0200 Subject: [PATCH 5/6] libxdp: add selftest for dispatcher v2 compatibility Signed-off-by: Jalal Mostafa --- lib/libxdp/tests/Makefile | 4 +- lib/libxdp/tests/test_dispatcher_versions.c | 91 +++++++++++++++++---- lib/libxdp/tests/xdp_dispatcher.h | 31 +++++++ lib/libxdp/tests/xdp_dispatcher_v1.c | 3 +- lib/libxdp/tests/xdp_dispatcher_v1.h | 16 ---- lib/libxdp/tests/xdp_dispatcher_v2.c | 42 ++++++++++ 6 files changed, 150 insertions(+), 37 deletions(-) create mode 100644 lib/libxdp/tests/xdp_dispatcher.h delete mode 100644 lib/libxdp/tests/xdp_dispatcher_v1.h create mode 100644 lib/libxdp/tests/xdp_dispatcher_v2.c diff --git a/lib/libxdp/tests/Makefile b/lib/libxdp/tests/Makefile index 36fccb55..26faa9e5 100644 --- a/lib/libxdp/tests/Makefile +++ b/lib/libxdp/tests/Makefile @@ -1,10 +1,10 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) USER_TARGETS := test_xsk_refcnt check_kern_compat test_xdp_devbound test_xdp_frags test_dispatcher_versions test_link_detach test_xsk_umem_flags -BPF_TARGETS := xdp_dispatcher_v1 xdp_pass +BPF_TARGETS := xdp_dispatcher_v1 xdp_dispatcher_v2 xdp_pass USER_LIBS := -lpthread -EXTRA_DEPS += xdp_dispatcher_v1.h +EXTRA_DEPS += xdp_dispatcher.h EXTRA_USER_DEPS += test_utils.h TEST_FILE := ./test-libxdp.sh diff --git a/lib/libxdp/tests/test_dispatcher_versions.c b/lib/libxdp/tests/test_dispatcher_versions.c index 561bb071..ec10d3d2 100644 --- a/lib/libxdp/tests/test_dispatcher_versions.c +++ b/lib/libxdp/tests/test_dispatcher_versions.c @@ -15,7 +15,7 @@ #include "test_utils.h" #include "../libxdp_internal.h" -#include "xdp_dispatcher_v1.h" +#include "xdp_dispatcher.h" #include #include @@ -29,6 +29,15 @@ #define PROG_RUN_PRIO 42 #define PROG_CHAIN_CALL_ACTIONS (1 << XDP_DROP) +#define DISPATCHER_V1_FILE "xdp_dispatcher_v1.o" +#define DISPATCHER_V2_FILE "xdp_dispatcher_v2.o" + +static void print_test_result(const char *func, int ret) +{ + fflush(stderr); + fprintf(stderr, "%s:\t%s\n", func, ret ? "FAILED" : "PASSED"); + fflush(stdout); +} int get_prog_id(int prog_fd) { @@ -43,21 +52,38 @@ int get_prog_id(int prog_fd) return info.id; } -int load_dispatcher_v1(int ifindex) +static char* get_dispatcher_file(unsigned int dispatcher_version) +{ + switch (dispatcher_version) { + case XDP_DISPATCHER_VERSION_V1: + return DISPATCHER_V1_FILE; + + case XDP_DISPATCHER_VERSION_V2: + return DISPATCHER_V2_FILE; + + default: + break; + } + return NULL; +} + +int load_dispatcher(int ifindex, unsigned int dispatcher_version) { - struct xdp_dispatcher_config_v1 dispatcher_config = {}; + struct xdp_dispatcher_config_v1 dispatcher_config_v1 = {}; + struct xdp_dispatcher_config_v2 dispatcher_config_v2 = {}; + char *dispatcher_file = get_dispatcher_file(dispatcher_version); struct bpf_object *obj_dispatcher, *obj_prog = NULL; DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); struct bpf_program *dispatcher_prog, *xdp_prog; - int ret, btf_id, lfd = -1, dispatcher_id; + int ret = 0, btf_id, lfd = -1, dispatcher_id; char pin_path[PATH_MAX], buf[PATH_MAX]; const char *attach_func = "prog0"; struct bpf_map *map; - if (!ifindex) + if (!ifindex || !dispatcher_file) return -ENOENT; - obj_dispatcher = bpf_object__open("xdp_dispatcher_v1.o"); + obj_dispatcher = bpf_object__open(dispatcher_file); if (!obj_dispatcher) return -errno; @@ -76,22 +102,38 @@ int load_dispatcher_v1(int ifindex) } dispatcher_prog = bpf_object__find_program_by_name(obj_dispatcher, - "xdp_dispatcher"); + "xdp_dispatcher"); if (!dispatcher_prog) { ret = -errno; goto out; } - dispatcher_config.num_progs_enabled = 1; - dispatcher_config.chain_call_actions[0] = PROG_CHAIN_CALL_ACTIONS; - dispatcher_config.run_prios[0] = PROG_RUN_PRIO; + switch (dispatcher_version) { + case XDP_DISPATCHER_VERSION_V1: + dispatcher_config_v1.num_progs_enabled = 1; + dispatcher_config_v1.chain_call_actions[0] = PROG_CHAIN_CALL_ACTIONS; + dispatcher_config_v1.run_prios[0] = PROG_RUN_PRIO; + + ret = bpf_map__set_initial_value(map, &dispatcher_config_v1, + sizeof(dispatcher_config_v1)); + break; + + case XDP_DISPATCHER_VERSION_V2: + dispatcher_config_v2.magic = XDP_DISPATCHER_MAGIC; + dispatcher_config_v2.num_progs_enabled = 1; + dispatcher_config_v2.chain_call_actions[0] = PROG_CHAIN_CALL_ACTIONS; + dispatcher_config_v2.run_prios[0] = PROG_RUN_PRIO; + dispatcher_config_v2.is_xdp_frags = 0; + dispatcher_config_v2.program_flags[0] = 0; + dispatcher_config_v2.dispatcher_version = XDP_DISPATCHER_VERSION_V2; + + ret = bpf_map__set_initial_value(map, &dispatcher_config_v2, + sizeof(dispatcher_config_v2)); + } - ret = bpf_map__set_initial_value(map, &dispatcher_config, - sizeof(dispatcher_config)); if (ret) goto out; - ret = bpf_object__load(obj_dispatcher); if (ret) goto out; @@ -190,14 +232,14 @@ int load_dispatcher_v1(int ifindex) goto out; } -int check_old_dispatcher(int ifindex) +int check_old_dispatcher(int ifindex, unsigned int dispatcher_version) { struct xdp_multiprog *mp = NULL; struct xdp_program *xdp_prog; char buf[100]; int ret; - ret = load_dispatcher_v1(ifindex); + ret = load_dispatcher(ifindex, dispatcher_version); if (ret) goto out; @@ -276,6 +318,20 @@ static void usage(char *progname) exit(EXIT_FAILURE); } +int check_old_dispatcher_v1(int ifindex) +{ + int ret = check_old_dispatcher(ifindex, XDP_DISPATCHER_VERSION_V1); + print_test_result(__func__, ret); + return ret; +} + +int check_old_dispatcher_v2(int ifindex) +{ + int ret = check_old_dispatcher(ifindex, XDP_DISPATCHER_VERSION_V2); + print_test_result(__func__, ret); + return ret; +} + int main(int argc, char **argv) { int ifindex, ret; @@ -294,7 +350,8 @@ int main(int argc, char **argv) ifindex = if_nametoindex(argv[1]); - ret = check_old_dispatcher(ifindex); + ret = check_old_dispatcher_v1(ifindex); + ret = check_old_dispatcher_v2(ifindex) || ret; return ret; -} \ No newline at end of file +} diff --git a/lib/libxdp/tests/xdp_dispatcher.h b/lib/libxdp/tests/xdp_dispatcher.h new file mode 100644 index 00000000..adbe94e9 --- /dev/null +++ b/lib/libxdp/tests/xdp_dispatcher.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __XDP_DISPATCHER_H +#define __XDP_DISPATCHER_H + +#ifndef MAX_DISPATCHER_ACTIONS +#define MAX_DISPATCHER_ACTIONS 10 +#endif + +struct xdp_dispatcher_config_v1 { + __u8 num_progs_enabled; + __u32 chain_call_actions[MAX_DISPATCHER_ACTIONS]; + __u32 run_prios[MAX_DISPATCHER_ACTIONS]; +}; + +#define XDP_DISPATCHER_VERSION_V1 1 + +struct xdp_dispatcher_config_v2 { + __u8 magic; /* Set to XDP_DISPATCHER_MAGIC */ + __u8 dispatcher_version; /* Set to XDP_DISPATCHER_VERSION */ + __u8 num_progs_enabled; /* Number of active program slots */ + __u8 is_xdp_frags; /* Whether this dispatcher is loaded with XDP frags support */ + __u32 chain_call_actions[MAX_DISPATCHER_ACTIONS]; + __u32 run_prios[MAX_DISPATCHER_ACTIONS]; + __u32 program_flags[MAX_DISPATCHER_ACTIONS]; +}; + +#define XDP_DISPATCHER_MAGIC 236 +#define XDP_DISPATCHER_VERSION_V2 2 + +#endif diff --git a/lib/libxdp/tests/xdp_dispatcher_v1.c b/lib/libxdp/tests/xdp_dispatcher_v1.c index 00bb426e..c46b0734 100644 --- a/lib/libxdp/tests/xdp_dispatcher_v1.c +++ b/lib/libxdp/tests/xdp_dispatcher_v1.c @@ -4,10 +4,9 @@ #include #include -#include "xdp_dispatcher_v1.h" +#include "xdp_dispatcher.h" #define XDP_METADATA_SECTION "xdp_metadata" -#define XDP_DISPATCHER_VERSION_V1 1 #define XDP_DISPATCHER_RETVAL 31 diff --git a/lib/libxdp/tests/xdp_dispatcher_v1.h b/lib/libxdp/tests/xdp_dispatcher_v1.h deleted file mode 100644 index 55dac376..00000000 --- a/lib/libxdp/tests/xdp_dispatcher_v1.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#ifndef __XDP_DISPATCHER_V1_H -#define __XDP_DISPATCHER_V1_H - -#ifndef MAX_DISPATCHER_ACTIONS -#define MAX_DISPATCHER_ACTIONS 10 -#endif - -struct xdp_dispatcher_config_v1 { - __u8 num_progs_enabled; - __u32 chain_call_actions[MAX_DISPATCHER_ACTIONS]; - __u32 run_prios[MAX_DISPATCHER_ACTIONS]; -}; - -#endif diff --git a/lib/libxdp/tests/xdp_dispatcher_v2.c b/lib/libxdp/tests/xdp_dispatcher_v2.c new file mode 100644 index 00000000..dbab72d1 --- /dev/null +++ b/lib/libxdp/tests/xdp_dispatcher_v2.c @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +#include "xdp_dispatcher.h" + +#define XDP_METADATA_SECTION "xdp_metadata" +#define XDP_DISPATCHER_RETVAL 31 + + +static volatile const struct xdp_dispatcher_config_v2 conf = {}; + +__attribute__ ((noinline)) +int prog0(struct xdp_md *ctx) { + volatile int ret = XDP_DISPATCHER_RETVAL; + + if (!ctx) + return XDP_ABORTED; + return ret; +} +__attribute__ ((noinline)) + +SEC("xdp") +int xdp_dispatcher(struct xdp_md *ctx) +{ + __u8 num_progs_enabled = conf.num_progs_enabled; + int ret; + + if (num_progs_enabled < 1) + goto out; + ret = prog0(ctx); + if (!((1U << ret) & conf.chain_call_actions[0])) + return ret; + +out: + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; +__uint(dispatcher_version, XDP_DISPATCHER_VERSION_V2) SEC(XDP_METADATA_SECTION); From e2a7d7699171bb92fec4f3f8bec5002ab3b6e451 Mon Sep 17 00:00:00 2001 From: Jalal Mostafa Date: Tue, 16 Sep 2025 14:13:40 +0200 Subject: [PATCH 6/6] libxdp: skip devbind selftests if no kernel support If no kernel support for `BPF_F_XDP_DEV_BOUND_ONLY`, skip micro-selftests that has this flag. Signed-off-by: Jalal Mostafa --- lib/libxdp/tests/test_xdp_devbound.c | 69 ++++++++++++++++++---------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/lib/libxdp/tests/test_xdp_devbound.c b/lib/libxdp/tests/test_xdp_devbound.c index 673f395f..eb3fc89f 100644 --- a/lib/libxdp/tests/test_xdp_devbound.c +++ b/lib/libxdp/tests/test_xdp_devbound.c @@ -18,6 +18,7 @@ #include #define ARRAY_SIZE(_x) (sizeof(_x) / sizeof((_x)[0])) +#define EXIT_SKIPPED 249 static bool kern_compat; @@ -33,12 +34,6 @@ static int check_attached_progs(int ifindex, int count, bool devbound) struct xdp_multiprog *mp; int ret; - /* If the kernel does not support device binding, we always expect - * device binding support to be disabled on a returned dispatcher - */ - if (!kern_compat) - devbound = false; - mp = xdp_multiprog__get_from_ifindex(ifindex); ret = libxdp_get_error(mp); if (ret) { @@ -81,7 +76,8 @@ static int check_attached_progs(int ifindex, int count, bool devbound) static void print_test_result(const char *func, int ret) { fflush(stderr); - fprintf(stderr, "%s:\t%s\n", func, ret ? "FAILED" : "PASSED"); + fprintf(stderr, "%s:\t%s\n", func, + ret ? (ret == EXIT_SKIPPED ? "SKIPPED" : "FAILED") : "PASSED"); fflush(stdout); } @@ -104,9 +100,14 @@ static int load_attach_prog(struct xdp_program **prog, int ifindex, static int _check_load(int ifindex, bool devbound, bool should_succeed) { struct xdp_program *prog = NULL; - bool attached; + bool attached = false; int ret; + if (!kern_compat && devbound) { + ret = EXIT_SKIPPED; + goto out; + } + ret = load_attach_prog(&prog, ifindex, devbound); attached = !ret; @@ -146,6 +147,11 @@ static int check_load_devbound_multi(int ifindex) struct xdp_program *prog1 = NULL, *prog2 = NULL; int ret; + if (!kern_compat) { + ret = EXIT_SKIPPED; + goto out; + } + ret = load_attach_prog(&prog1, ifindex, true); if (ret) goto out; @@ -171,6 +177,11 @@ static int _check_load_mix(int ifindex, bool devbound1, bool devbound2) struct xdp_program *prog1 = NULL, *prog2 = NULL; int ret; + if (!kern_compat && (devbound1 || devbound2)) { + ret = EXIT_SKIPPED; + goto out; + } + ret = load_attach_prog(&prog1, ifindex, devbound1); if (ret) goto out; @@ -218,6 +229,11 @@ static int check_load_devbound_multiple_ifindex(int ifindex1, int ifindex2) struct xdp_program *prog = NULL; int ret; + if (!kern_compat) { + ret = EXIT_SKIPPED; + goto out; + } + prog = load_prog(); ret = xdp_program__attach(prog, ifindex1, XDP_MODE_NATIVE, @@ -243,7 +259,7 @@ static int check_load_devbound_multiple_ifindex(int ifindex1, int ifindex2) out: xdp_program__detach(prog, ifindex1, XDP_MODE_NATIVE, 0); xdp_program__close(prog); - print_test_result(__func__, !ret); + print_test_result(__func__, ret == EXIT_SKIPPED ? ret : !ret); return !ret; } @@ -252,6 +268,11 @@ static int check_load_mixed_multiple_ifindex(int ifindex1, int ifindex2) struct xdp_program *prog = NULL; int ret; + if (!kern_compat) { + ret = EXIT_SKIPPED; + goto out; + } + prog = load_prog(); ret = xdp_program__attach(prog, ifindex1, XDP_MODE_NATIVE, @@ -274,7 +295,7 @@ static int check_load_mixed_multiple_ifindex(int ifindex1, int ifindex2) xdp_program__detach(prog, ifindex1, XDP_MODE_NATIVE, 0); out: xdp_program__close(prog); - print_test_result(__func__, !ret); + print_test_result(__func__, ret == EXIT_SKIPPED ? ret : !ret); return !ret; } @@ -283,6 +304,11 @@ static int check_load2_mixed_multiple_ifindex(int ifindex1, int ifindex2) struct xdp_program *prog1 = NULL, *prog2 = NULL; int ret; + if (!kern_compat) { + ret = EXIT_SKIPPED; + goto out; + } + ret = load_attach_prog(&prog1, ifindex1, true); if (ret) goto out; @@ -389,17 +415,14 @@ int main(int argc, char **argv) } kern_compat = check_devbound_compat(); - - ret = check_load_devbound(kern_compat ? ifindex1 : 0); - ret = check_load_nodevbound_success(ifindex1) || ret; - if (kern_compat) { - ret = check_load_devbound_multi(ifindex1) || ret; - ret = check_load_mix_devbound_nodevbound(ifindex1) || ret; - ret = check_load_mix_nodevbound_devbound(ifindex1) || ret; - ret = check_load_devbound_multiple_ifindex(ifindex1, ifindex2) || ret; - ret = check_load_mixed_multiple_ifindex(ifindex1, ifindex2) || ret; - ret = check_load2_mixed_multiple_ifindex(ifindex1, ifindex2) || ret; - } - - return ret; + ret = check_load_devbound(ifindex1); + ret |= check_load_nodevbound_success(ifindex1); + ret |= check_load_devbound_multi(ifindex1); + ret |= check_load_mix_devbound_nodevbound(ifindex1); + ret |= check_load_mix_nodevbound_devbound(ifindex1); + ret |= check_load_devbound_multiple_ifindex(ifindex1, ifindex2); + ret |= check_load_mixed_multiple_ifindex(ifindex1, ifindex2); + ret |= check_load2_mixed_multiple_ifindex(ifindex1, ifindex2); + + return ret == EXIT_SKIPPED ? 0 : ret; }