diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/bpf/bpf_dummy_struct_ops.c | 14 | ||||
-rw-r--r-- | net/bpf/test_run.c | 34 | ||||
-rw-r--r-- | net/core/bpf_sk_storage.c | 24 | ||||
-rw-r--r-- | net/core/filter.c | 29 | ||||
-rw-r--r-- | net/core/sock_map.c | 8 | ||||
-rw-r--r-- | net/core/xdp.c | 19 | ||||
-rw-r--r-- | net/ipv4/Makefile | 2 | ||||
-rw-r--r-- | net/ipv4/bpf_tcp_ca.c | 23 | ||||
-rw-r--r-- | net/ipv4/fou_bpf.c | 119 | ||||
-rw-r--r-- | net/ipv4/fou_core.c | 5 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 22 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 66 | ||||
-rw-r--r-- | net/ipv6/sit.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_bpf.c | 5 | ||||
-rw-r--r-- | net/xdp/xsk.c | 9 | ||||
-rw-r--r-- | net/xdp/xsk_queue.h | 19 | ||||
-rw-r--r-- | net/xdp/xskmap.c | 8 |
18 files changed, 299 insertions, 110 deletions
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index ff4f89a2b02a..5918d1b32e19 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -173,14 +173,11 @@ static int bpf_dummy_ops_check_member(const struct btf_type *t, static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, - enum bpf_type_flag *flag) + int off, int size) { const struct btf_type *state; const struct btf_type *t; s32 type_id; - int err; type_id = btf_find_by_name_kind(reg->btf, "bpf_dummy_ops_state", BTF_KIND_STRUCT); @@ -194,11 +191,12 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, return -EACCES; } - err = btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); - if (err < 0) - return err; + if (off + size > sizeof(struct bpf_dummy_ops_state)) { + bpf_log(log, "write access at off %d with size %d\n", off, size); + return -EACCES; + } - return atype == BPF_READ ? err : NOT_INIT; + return NOT_INIT; } static const struct bpf_verifier_ops bpf_dummy_verifier_ops = { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index d350f31c7a3d..0b9bd9b39990 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -215,6 +215,16 @@ static void xdp_test_run_teardown(struct xdp_test_data *xdp) kfree(xdp->skbs); } +static bool frame_was_changed(const struct xdp_page_head *head) +{ + /* xdp_scrub_frame() zeroes the data pointer, flags is the last field, + * i.e. has the highest chances to be overwritten. If those two are + * untouched, it's most likely safe to skip the context reset. + */ + return head->frame->data != head->orig_ctx.data || + head->frame->flags != head->orig_ctx.flags; +} + static bool ctx_was_changed(struct xdp_page_head *head) { return head->orig_ctx.data != head->ctx.data || @@ -224,7 +234,7 @@ static bool ctx_was_changed(struct xdp_page_head *head) static void reset_ctx(struct xdp_page_head *head) { - if (likely(!ctx_was_changed(head))) + if (likely(!frame_was_changed(head) && !ctx_was_changed(head))) return; head->ctx.data = head->orig_ctx.data; @@ -538,6 +548,11 @@ int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg) return (long)arg->a; } +__bpf_kfunc u32 bpf_fentry_test9(u32 *a) +{ + return *a; +} + __bpf_kfunc int bpf_modify_return_test(int a, int *b) { *b += 1; @@ -567,6 +582,11 @@ long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) return (long)a + (long)b + (long)c + d; } +int noinline bpf_fentry_shadow_test(int a) +{ + return a + 1; +} + struct prog_test_member1 { int a; }; @@ -598,6 +618,11 @@ bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) return &prog_test_struct; } +__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) +{ + WARN_ON_ONCE(1); +} + __bpf_kfunc struct prog_test_member * bpf_kfunc_call_memb_acquire(void) { @@ -607,9 +632,6 @@ bpf_kfunc_call_memb_acquire(void) __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { - if (!p) - return; - refcount_dec(&p->cnt); } @@ -795,6 +817,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, @@ -844,7 +867,8 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || - bpf_fentry_test8(&arg) != 0) + bpf_fentry_test8(&arg) != 0 || + bpf_fentry_test9(&retval) != 0) goto out; break; case BPF_MODIFY_RETURN: diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 7a36353dbc22..d4172534dfa8 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), true); + bpf_selem_unlink(SELEM(sdata), false); return 0; } @@ -49,7 +49,6 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) void bpf_sk_storage_free(struct sock *sk) { struct bpf_local_storage *sk_storage; - bool free_sk_storage = false; rcu_read_lock(); sk_storage = rcu_dereference(sk->sk_bpf_storage); @@ -58,13 +57,8 @@ void bpf_sk_storage_free(struct sock *sk) return; } - raw_spin_lock_bh(&sk_storage->lock); - free_sk_storage = bpf_local_storage_unlink_nolock(sk_storage); - raw_spin_unlock_bh(&sk_storage->lock); + bpf_local_storage_destroy(sk_storage); rcu_read_unlock(); - - if (free_sk_storage) - kfree_rcu(sk_storage, rcu); } static void bpf_sk_storage_map_free(struct bpf_map *map) @@ -74,7 +68,7 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) { - return bpf_local_storage_map_alloc(attr, &sk_cache); + return bpf_local_storage_map_alloc(attr, &sk_cache, false); } static int notsupp_get_next_key(struct bpf_map *map, void *key, @@ -100,8 +94,8 @@ static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) return ERR_PTR(err); } -static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_local_storage_data *sdata; struct socket *sock; @@ -120,7 +114,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, return err; } -static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) +static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) { struct socket *sock; int fd, err; @@ -203,7 +197,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) } else { ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); if (ret) { - kfree(copy_selem); + bpf_selem_free(copy_selem, smap, true); atomic_sub(smap->elem_size, &newsk->sk_omem_alloc); bpf_map_put(map); @@ -418,7 +412,7 @@ const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { .gpl_only = false, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, .arg4_type = ARG_ANYTHING, @@ -430,7 +424,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], .allowed = bpf_sk_storage_tracing_allowed, }; diff --git a/net/core/filter.c b/net/core/filter.c index a8c8fd96c822..df0df59814ae 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5002,7 +5002,7 @@ const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = { .func = bpf_get_socket_ptr_cookie, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | PTR_MAYBE_NULL, }; BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) @@ -8746,23 +8746,18 @@ EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock); int (*nfct_btf_struct_access)(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag); + int off, int size); EXPORT_SYMBOL_GPL(nfct_btf_struct_access); static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { int ret = -EACCES; - if (atype == BPF_READ) - return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); - mutex_lock(&nf_conn_btf_access_lock); if (nfct_btf_struct_access) - ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); + ret = nfct_btf_struct_access(log, reg, off, size); mutex_unlock(&nf_conn_btf_access_lock); return ret; @@ -8829,17 +8824,13 @@ EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); static int xdp_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { int ret = -EACCES; - if (atype == BPF_READ) - return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); - mutex_lock(&nf_conn_btf_access_lock); if (nfct_btf_struct_access) - ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); + ret = nfct_btf_struct_access(log, reg, off, size); mutex_unlock(&nf_conn_btf_access_lock); return ret; @@ -9189,7 +9180,7 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si, __u8 tmp_reg = BPF_REG_AX; *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, - PKT_VLAN_PRESENT_OFFSET); + SKB_BF_MONO_TC_OFFSET); *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_MONO_DELIVERY_TIME_MASK, 2); *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC); @@ -9236,7 +9227,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, /* AX is needed because src_reg and dst_reg could be the same */ __u8 tmp_reg = BPF_REG_AX; - *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET); + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, @@ -9271,14 +9262,14 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, if (!prog->tstamp_type_access) { __u8 tmp_reg = BPF_REG_AX; - *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET); + *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); /* Writing __sk_buff->tstamp as ingress, goto <clear> */ *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); /* goto <store> */ *insn++ = BPF_JMP_A(2); /* <clear>: mono_delivery_time */ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); - *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET); + *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } #endif diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 9b854e236d23..7c189c2e2fbf 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -437,7 +437,7 @@ static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk, __sock_map_delete(stab, sk, link_raw); } -static int sock_map_delete_elem(struct bpf_map *map, void *key) +static long sock_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); u32 i = *(u32 *)key; @@ -587,8 +587,8 @@ out: return ret; } -static int sock_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 flags) +static long sock_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 flags) { struct sock *sk = (struct sock *)value; int ret; @@ -925,7 +925,7 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk, raw_spin_unlock_bh(&bucket->lock); } -static int sock_hash_delete_elem(struct bpf_map *map, void *key) +static long sock_hash_delete_elem(struct bpf_map *map, void *key) { struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map); u32 hash, key_size = map->key_size; diff --git a/net/core/xdp.c b/net/core/xdp.c index fb85aca81961..41e5ca8643ec 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -531,21 +531,6 @@ out: } EXPORT_SYMBOL_GPL(xdp_return_buff); -/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ -void __xdp_release_frame(void *data, struct xdp_mem_info *mem) -{ - struct xdp_mem_allocator *xa; - struct page *page; - - rcu_read_lock(); - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - page = virt_to_head_page(data); - if (xa) - page_pool_release_page(xa->page_pool, page); - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(__xdp_release_frame); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { @@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, * - RX ring dev queue index (skb_record_rx_queue) */ - /* Until page_pool get SKB return path, release DMA here */ - xdp_release_frame(xdpf); + if (xdpf->mem.type == MEM_TYPE_PAGE_POOL) + skb_mark_for_recycle(skb); /* Allow SKB to reuse area used by xdp_frame */ xdp_scrub_frame(xdpf); diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 880277c9fd07..b18ba8ef93ad 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -26,7 +26,7 @@ obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o obj-$(CONFIG_NET_IPIP) += ipip.o gre-y := gre_demux.o -fou-y := fou_core.o fou_nl.o +fou-y := fou_core.o fou_nl.o fou_bpf.o obj-$(CONFIG_NET_FOU) += fou.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 13fc0c185cd9..4406d796cc2f 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -72,15 +72,11 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size, static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { const struct btf_type *t; size_t end; - if (atype == BPF_READ) - return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag); - t = btf_type_by_id(reg->btf, reg->btf_id); if (t != tcp_sock_type) { bpf_log(log, "only read is supported\n"); @@ -113,6 +109,9 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, case offsetof(struct tcp_sock, ecn_flags): end = offsetofend(struct tcp_sock, ecn_flags); break; + case offsetof(struct tcp_sock, app_limited): + end = offsetofend(struct tcp_sock, app_limited); + break; default: bpf_log(log, "no write support to tcp_sock at off %d\n", off); return -EACCES; @@ -239,8 +238,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t, if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name, sizeof(tcp_ca->name)) <= 0) return -EINVAL; - if (tcp_ca_find(utcp_ca->name)) - return -EEXIST; return 1; } @@ -266,13 +263,25 @@ static void bpf_tcp_ca_unreg(void *kdata) tcp_unregister_congestion_control(kdata); } +static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +{ + return tcp_update_congestion_control(kdata, old_kdata); +} + +static int bpf_tcp_ca_validate(void *kdata) +{ + return tcp_validate_congestion_control(kdata); +} + struct bpf_struct_ops bpf_tcp_congestion_ops = { .verifier_ops = &bpf_tcp_ca_verifier_ops, .reg = bpf_tcp_ca_reg, .unreg = bpf_tcp_ca_unreg, + .update = bpf_tcp_ca_update, .check_member = bpf_tcp_ca_check_member, .init_member = bpf_tcp_ca_init_member, .init = bpf_tcp_ca_init, + .validate = bpf_tcp_ca_validate, .name = "tcp_congestion_ops", }; diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c new file mode 100644 index 000000000000..3760a14b6b57 --- /dev/null +++ b/net/ipv4/fou_bpf.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable Fou Helpers for TC-BPF hook + * + * These are called from SCHED_CLS BPF programs. Note that it is + * allowed to break compatibility for these functions since the interface they + * are exposed through to BPF programs is explicitly unstable. + */ + +#include <linux/bpf.h> +#include <linux/btf_ids.h> + +#include <net/dst_metadata.h> +#include <net/fou.h> + +struct bpf_fou_encap { + __be16 sport; + __be16 dport; +}; + +enum bpf_fou_encap_type { + FOU_BPF_ENCAP_FOU, + FOU_BPF_ENCAP_GUE, +}; + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in BTF"); + +/* bpf_skb_set_fou_encap - Set FOU encap parameters + * + * This function allows for using GUE or FOU encapsulation together with an + * ipip device in collect-metadata mode. + * + * It is meant to be used in BPF tc-hooks and after a call to the + * bpf_skb_set_tunnel_key helper, responsible for setting IP addresses. + * + * Parameters: + * @skb_ctx Pointer to ctx (__sk_buff) in TC program. Cannot be NULL + * @encap Pointer to a `struct bpf_fou_encap` storing UDP src and + * dst ports. If sport is set to 0 the kernel will auto-assign a + * port. This is similar to using `encap-sport auto`. + * Cannot be NULL + * @type Encapsulation type for the packet. Their definitions are + * specified in `enum bpf_fou_encap_type` + */ +__bpf_kfunc int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, + struct bpf_fou_encap *encap, int type) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct ip_tunnel_info *info = skb_tunnel_info(skb); + + if (unlikely(!encap)) + return -EINVAL; + + if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) + return -EINVAL; + + switch (type) { + case FOU_BPF_ENCAP_FOU: + info->encap.type = TUNNEL_ENCAP_FOU; + break; + case FOU_BPF_ENCAP_GUE: + info->encap.type = TUNNEL_ENCAP_GUE; + break; + default: + info->encap.type = TUNNEL_ENCAP_NONE; + } + + if (info->key.tun_flags & TUNNEL_CSUM) + info->encap.flags |= TUNNEL_ENCAP_FLAG_CSUM; + + info->encap.sport = encap->sport; + info->encap.dport = encap->dport; + + return 0; +} + +/* bpf_skb_get_fou_encap - Get FOU encap parameters + * + * This function allows for reading encap metadata from a packet received + * on an ipip device in collect-metadata mode. + * + * Parameters: + * @skb_ctx Pointer to ctx (__sk_buff) in TC program. Cannot be NULL + * @encap Pointer to a struct bpf_fou_encap storing UDP source and + * destination port. Cannot be NULL + */ +__bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, + struct bpf_fou_encap *encap) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct ip_tunnel_info *info = skb_tunnel_info(skb); + + if (unlikely(!info)) + return -EINVAL; + + encap->sport = info->encap.sport; + encap->dport = info->encap.dport; + + return 0; +} + +__diag_pop() + +BTF_SET8_START(fou_kfunc_set) +BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) +BTF_ID_FLAGS(func, bpf_skb_get_fou_encap) +BTF_SET8_END(fou_kfunc_set) + +static const struct btf_kfunc_id_set fou_bpf_kfunc_set = { + .owner = THIS_MODULE, + .set = &fou_kfunc_set, +}; + +int register_fou_bpf(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &fou_bpf_kfunc_set); +} diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index cafec9b4eee0..0c41076e31ed 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -1236,10 +1236,15 @@ static int __init fou_init(void) if (ret < 0) goto unregister; + ret = register_fou_bpf(); + if (ret < 0) + goto kfunc_failed; + ret = ip_tunnel_encap_add_fou_ops(); if (ret == 0) return 0; +kfunc_failed: genl_unregister_family(&fou_nl_family); unregister: unregister_pernet_device(&fou_net_ops); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 2541083d49ad..beeae624c412 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -359,6 +359,20 @@ err_dev_set_mtu: return ERR_PTR(err); } +void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *udph; + + if (iph->protocol != IPPROTO_UDP) + return; + + udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2)); + info->encap.sport = udph->source; + info->encap.dport = udph->dest; +} +EXPORT_SYMBOL(ip_tunnel_md_udp_encap); + int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error) @@ -572,7 +586,11 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel_id_to_key32(key->tun_id), RT_TOS(tos), dev_net(dev), 0, skb->mark, skb_get_hash(skb), key->flow_flags); - if (tunnel->encap.type != TUNNEL_ENCAP_NONE) + + if (!tunnel_hlen) + tunnel_hlen = ip_encap_hlen(&tun_info->encap); + + if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0) goto tx_error; use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); @@ -732,7 +750,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev_net(dev), tunnel->parms.link, tunnel->fwmark, skb_get_hash(skb), 0); - if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) + if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) goto tx_error; if (connected && md) { diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index abea77759b7e..27b8f83c6ea2 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -241,6 +241,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tun_dst = ip_tun_rx_dst(skb, 0, 0, 0); if (!tun_dst) return 0; + ip_tunnel_md_udp_encap(skb, &tun_dst->u.tun_info); } skb_reset_mac_header(skb); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index db8b4b488c31..1b34050a7538 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -75,14 +75,8 @@ struct tcp_congestion_ops *tcp_ca_find_key(u32 key) return NULL; } -/* - * Attach new congestion control algorithm to the list - * of available options. - */ -int tcp_register_congestion_control(struct tcp_congestion_ops *ca) +int tcp_validate_congestion_control(struct tcp_congestion_ops *ca) { - int ret = 0; - /* all algorithms must implement these */ if (!ca->ssthresh || !ca->undo_cwnd || !(ca->cong_avoid || ca->cong_control)) { @@ -90,6 +84,20 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) return -EINVAL; } + return 0; +} + +/* Attach new congestion control algorithm to the list + * of available options. + */ +int tcp_register_congestion_control(struct tcp_congestion_ops *ca) +{ + int ret; + + ret = tcp_validate_congestion_control(ca); + if (ret) + return ret; + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); spin_lock(&tcp_cong_list_lock); @@ -130,6 +138,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); +/* Replace a registered old ca with a new one. + * + * The new ca must have the same name as the old one, that has been + * registered. + */ +int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca) +{ + struct tcp_congestion_ops *existing; + int ret; + + ret = tcp_validate_congestion_control(ca); + if (ret) + return ret; + + ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); + + spin_lock(&tcp_cong_list_lock); + existing = tcp_ca_find_key(old_ca->key); + if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) { + pr_notice("%s not registered or non-unique key\n", + ca->name); + ret = -EINVAL; + } else if (existing != old_ca) { + pr_notice("invalid old congestion control algorithm to replace\n"); + ret = -EINVAL; + } else { + /* Add the new one before removing the old one to keep + * one implementation available all the time. + */ + list_add_tail_rcu(&ca->list, &tcp_cong_list); + list_del_rcu(&existing->list); + pr_debug("%s updated\n", ca->name); + } + spin_unlock(&tcp_cong_list_lock); + + /* Wait for outstanding readers to complete before the + * module or struct_ops gets removed entirely. + */ + if (!ret) + synchronize_rcu(); + + return ret; +} + u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 70d81bba5093..063560e2cb1a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1024,7 +1024,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ttl = iph6->hop_limit; tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) { + if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0) { ip_rt_put(rt); goto tx_error; } diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index cd99e6dc1f35..3f821b7ba646 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -192,8 +192,7 @@ BTF_ID(struct, nf_conn___init) /* Check writes into `struct nf_conn` */ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, const struct bpf_reg_state *reg, - int off, int size, enum bpf_access_type atype, - u32 *next_btf_id, enum bpf_type_flag *flag) + int off, int size) { const struct btf_type *ncit, *nct, *t; size_t end; @@ -401,8 +400,6 @@ __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i) */ __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct) { - if (!nfct) - return; nf_ct_put(nfct); } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 2ac58b282b5e..cc1e7f15fa73 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1301,9 +1301,10 @@ static int xsk_mmap(struct file *file, struct socket *sock, loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; unsigned long size = vma->vm_end - vma->vm_start; struct xdp_sock *xs = xdp_sk(sock->sk); + int state = READ_ONCE(xs->state); struct xsk_queue *q = NULL; - if (READ_ONCE(xs->state) != XSK_READY) + if (state != XSK_READY && state != XSK_BOUND) return -EBUSY; if (offset == XDP_PGOFF_RX_RING) { @@ -1314,9 +1315,11 @@ static int xsk_mmap(struct file *file, struct socket *sock, /* Matches the smp_wmb() in XDP_UMEM_REG */ smp_rmb(); if (offset == XDP_UMEM_PGOFF_FILL_RING) - q = READ_ONCE(xs->fq_tmp); + q = state == XSK_READY ? READ_ONCE(xs->fq_tmp) : + READ_ONCE(xs->pool->fq); else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) - q = READ_ONCE(xs->cq_tmp); + q = state == XSK_READY ? READ_ONCE(xs->cq_tmp) : + READ_ONCE(xs->pool->cq); } if (!q) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index bfb2a7e50c26..6d40a77fccbe 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -133,16 +133,12 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 chunk, chunk_end; + u64 offset = desc->addr & (pool->chunk_size - 1); - chunk = xp_aligned_extract_addr(pool, desc->addr); - if (likely(desc->len)) { - chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len - 1); - if (chunk != chunk_end) - return false; - } + if (offset + desc->len > pool->chunk_size) + return false; - if (chunk >= pool->addrs_cnt) + if (desc->addr >= pool->addrs_cnt) return false; if (desc->options) @@ -153,15 +149,12 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { - u64 addr, base_addr; - - base_addr = xp_unaligned_extract_addr(desc->addr); - addr = xp_unaligned_add_offset_to_addr(desc->addr); + u64 addr = xp_unaligned_add_offset_to_addr(desc->addr); if (desc->len > pool->chunk_size) return false; - if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt || + if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt || xp_desc_crosses_non_contig_pg(pool, addr, desc->len)) return false; diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 0c38d7175922..2c1427074a3b 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -162,8 +162,8 @@ static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key) return ERR_PTR(-EOPNOTSUPP); } -static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static long xsk_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; @@ -223,7 +223,7 @@ out: return err; } -static int xsk_map_delete_elem(struct bpf_map *map, void *key) +static long xsk_map_delete_elem(struct bpf_map *map, void *key) { struct xsk_map *m = container_of(map, struct xsk_map, map); struct xdp_sock __rcu **map_entry; @@ -243,7 +243,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) +static long xsk_map_redirect(struct bpf_map *map, u64 index, u64 flags) { return __bpf_xdp_redirect_map(map, index, flags, 0, __xsk_map_lookup_elem); |